Compare commits
4 Commits
1b3468b72d
...
013-k8s-ma
| Author | SHA1 | Date | |
|---|---|---|---|
| bf27c97deb | |||
| ce279e6121 | |||
| b14508e4cf | |||
| 602648ef56 |
@@ -27,3 +27,7 @@ LOGIN_COOLDOWN_SECONDS=900
|
||||
# Comma-separated IPs/CIDRs of trusted upstream proxies (e.g. nginx ingress pod CIDR).
|
||||
# Leave empty when not behind a reverse proxy.
|
||||
LOGIN_TRUSTED_PROXY_IPS=
|
||||
|
||||
# API documentation endpoints (Swagger UI, ReDoc, OpenAPI schema)
|
||||
# Set to false in production to avoid exposing the API surface publicly.
|
||||
API_DOCS_ENABLED=true
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"feature_directory": "specs/011-ui-prod-dockerfile"
|
||||
"feature_directory": "specs/013-k8s-manifests"
|
||||
}
|
||||
|
||||
4
.yamllint.yml
Normal file
4
.yamllint.yml
Normal file
@@ -0,0 +1,4 @@
|
||||
extends: relaxed
|
||||
rules:
|
||||
line-length:
|
||||
max: 120
|
||||
@@ -1,5 +1,5 @@
|
||||
<!-- SPECKIT START -->
|
||||
For additional context about technologies to be used, project structure,
|
||||
shell commands, and other important information, read the current plan at
|
||||
`specs/011-ui-prod-dockerfile/plan.md`.
|
||||
`specs/013-k8s-manifests/plan.md`.
|
||||
<!-- SPECKIT END -->
|
||||
|
||||
8
Makefile
8
Makefile
@@ -1,9 +1,10 @@
|
||||
.PHONY: test-unit test-integration build-prod verify-prod build-ui-prod verify-ui-prod
|
||||
.PHONY: test-unit test-integration build-prod verify-prod build-ui-prod verify-ui-prod validate-k8s
|
||||
|
||||
test-unit:
|
||||
cd api && python -m pytest tests/unit/ -v
|
||||
|
||||
test-integration:
|
||||
docker compose -f docker-compose.test.yml build api-test
|
||||
docker compose -f docker-compose.test.yml run --rm api-test
|
||||
|
||||
build-prod:
|
||||
@@ -17,3 +18,8 @@ build-ui-prod:
|
||||
|
||||
verify-ui-prod:
|
||||
bash ui/tests/build/verify_production_image.sh
|
||||
|
||||
# Offline: yamllint only. Online (requires kubeconfig): kubectl apply --dry-run=client -f k8s/
|
||||
validate-k8s:
|
||||
yamllint -d relaxed k8s/
|
||||
kubectl apply --dry-run=client -f k8s/
|
||||
|
||||
@@ -12,6 +12,3 @@ dist/
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
tests/
|
||||
alembic/
|
||||
alembic.ini
|
||||
|
||||
@@ -35,6 +35,8 @@ RUN groupadd --system --gid 1001 appgroup \
|
||||
|
||||
COPY --from=builder --chown=appuser:appgroup /app/.venv /app/.venv
|
||||
COPY --chown=appuser:appgroup app/ ./app/
|
||||
COPY --chown=appuser:appgroup alembic/ ./alembic/
|
||||
COPY --chown=appuser:appgroup alembic.ini .
|
||||
|
||||
USER appuser
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from functools import lru_cache
|
||||
|
||||
from pydantic import field_validator
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
@@ -22,6 +23,19 @@ class Settings(BaseSettings):
|
||||
login_window_seconds: int = 300
|
||||
login_cooldown_seconds: int = 900
|
||||
login_trusted_proxy_ips: str = ""
|
||||
api_docs_enabled: bool = True
|
||||
|
||||
@field_validator("api_docs_enabled", mode="before")
|
||||
@classmethod
|
||||
def coerce_docs_enabled(cls, v):
|
||||
if isinstance(v, bool):
|
||||
return v
|
||||
try:
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
return TypeAdapter(bool).validate_python(v)
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
|
||||
@lru_cache
|
||||
|
||||
@@ -33,7 +33,16 @@ async def lifespan(application: FastAPI):
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
app = FastAPI(title="Reactbin API", version="1.0.0", lifespan=lifespan)
|
||||
_settings = get_settings()
|
||||
|
||||
app = FastAPI(
|
||||
title="Reactbin API",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
docs_url="/docs" if _settings.api_docs_enabled else None,
|
||||
redoc_url="/redoc" if _settings.api_docs_enabled else None,
|
||||
openapi_url="/openapi.json" if _settings.api_docs_enabled else None,
|
||||
)
|
||||
|
||||
# Defaults so app.state is populated even when lifespan doesn't run (e.g. tests)
|
||||
app.state.login_rate_limiter = LoginRateLimiter()
|
||||
|
||||
48
api/tests/integration/test_docs_gate.py
Normal file
48
api/tests/integration/test_docs_gate.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import importlib
|
||||
|
||||
from starlette.testclient import TestClient
|
||||
|
||||
from app.config import get_settings
|
||||
|
||||
_BASE_ENV = {
|
||||
"DATABASE_URL": "postgresql+asyncpg://u:p@localhost/db",
|
||||
"JWT_SECRET_KEY": "test-secret",
|
||||
"OWNER_USERNAME": "admin",
|
||||
"OWNER_PASSWORD": "password",
|
||||
"S3_ENDPOINT_URL": "http://localhost:9000",
|
||||
"S3_BUCKET_NAME": "test-bucket",
|
||||
"S3_ACCESS_KEY_ID": "key",
|
||||
"S3_SECRET_ACCESS_KEY": "secret",
|
||||
}
|
||||
|
||||
|
||||
def _set_env(monkeypatch, extra=None):
|
||||
for k, v in {**_BASE_ENV, **(extra or {})}.items():
|
||||
monkeypatch.setenv(k, v)
|
||||
|
||||
|
||||
def test_docs_hidden_when_flag_disabled(monkeypatch):
|
||||
_set_env(monkeypatch, {"API_DOCS_ENABLED": "false"})
|
||||
get_settings.cache_clear()
|
||||
import app.main as m
|
||||
|
||||
importlib.reload(m)
|
||||
client = TestClient(m.app, raise_server_exceptions=False)
|
||||
assert client.get("/docs").status_code == 404
|
||||
assert client.get("/redoc").status_code == 404
|
||||
assert client.get("/openapi.json").status_code == 404
|
||||
assert client.get("/api/v1/health").status_code == 200
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_docs_visible_when_flag_enabled(monkeypatch):
|
||||
_set_env(monkeypatch, {"API_DOCS_ENABLED": "true"})
|
||||
get_settings.cache_clear()
|
||||
import app.main as m
|
||||
|
||||
importlib.reload(m)
|
||||
client = TestClient(m.app, raise_server_exceptions=False)
|
||||
assert client.get("/docs").status_code == 200
|
||||
assert client.get("/redoc").status_code == 200
|
||||
assert client.get("/openapi.json").status_code == 200
|
||||
get_settings.cache_clear()
|
||||
@@ -59,3 +59,39 @@ def test_settings_jwt_expiry_override(monkeypatch):
|
||||
|
||||
s = config_module.Settings()
|
||||
assert s.jwt_expiry_seconds == 3600
|
||||
|
||||
|
||||
def test_api_docs_enabled_default(monkeypatch):
|
||||
_apply_env(monkeypatch)
|
||||
|
||||
import importlib
|
||||
|
||||
import app.config as config_module
|
||||
importlib.reload(config_module)
|
||||
|
||||
s = config_module.Settings()
|
||||
assert s.api_docs_enabled is True
|
||||
|
||||
|
||||
def test_api_docs_enabled_false(monkeypatch):
|
||||
_apply_env(monkeypatch, {"API_DOCS_ENABLED": "false"})
|
||||
|
||||
import importlib
|
||||
|
||||
import app.config as config_module
|
||||
importlib.reload(config_module)
|
||||
|
||||
s = config_module.Settings()
|
||||
assert s.api_docs_enabled is False
|
||||
|
||||
|
||||
def test_api_docs_invalid_value_defaults_to_enabled(monkeypatch):
|
||||
_apply_env(monkeypatch, {"API_DOCS_ENABLED": "not-a-bool"})
|
||||
|
||||
import importlib
|
||||
|
||||
import app.config as config_module
|
||||
importlib.reload(config_module)
|
||||
|
||||
s = config_module.Settings()
|
||||
assert s.api_docs_enabled is True
|
||||
|
||||
53
k8s/api/deployment.yaml
Normal file
53
k8s/api/deployment.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
# Replace 'latest' with the real image tag before applying
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: api
|
||||
namespace: reactbin
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: api
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: api
|
||||
spec:
|
||||
initContainers:
|
||||
- name: migrate
|
||||
image: reactbin-api:latest
|
||||
command: ["alembic", "upgrade", "head"]
|
||||
workingDir: /app
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: api-env
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1001
|
||||
containers:
|
||||
- name: api
|
||||
image: reactbin-api:latest
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: api-env
|
||||
env:
|
||||
- name: API_DOCS_ENABLED
|
||||
value: "false"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /api/v1/health
|
||||
port: 8000
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /api/v1/health
|
||||
port: 8000
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1001
|
||||
13
k8s/api/service.yaml
Normal file
13
k8s/api/service.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: api
|
||||
namespace: reactbin
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: api
|
||||
ports:
|
||||
- name: http
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
34
k8s/ingress.yaml
Normal file
34
k8s/ingress.yaml
Normal file
@@ -0,0 +1,34 @@
|
||||
# Replace <your-domain> with the real domain before applying
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: reactbin
|
||||
namespace: reactbin
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
spec:
|
||||
ingressClassName: nginx
|
||||
tls:
|
||||
- hosts:
|
||||
- <your-domain>
|
||||
secretName: reactbin-tls
|
||||
rules:
|
||||
- host: <your-domain>
|
||||
http:
|
||||
paths:
|
||||
# /api/ must appear before / — nginx evaluates paths in declaration order
|
||||
- path: /api/
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: api
|
||||
port:
|
||||
number: 8000
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: ui
|
||||
port:
|
||||
number: 8080
|
||||
24
k8s/minio/init-job.yaml
Normal file
24
k8s/minio/init-job.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: minio-init
|
||||
namespace: reactbin
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: mc
|
||||
image: minio/mc:latest
|
||||
# mc runs as root by default; FR-013 exception documented in spec
|
||||
securityContext:
|
||||
runAsNonRoot: false
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
mc alias set local http://minio:9000 "$MINIO_ROOT_USER" "$MINIO_ROOT_PASSWORD"
|
||||
mc mb --ignore-existing local/reactbin
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: minio-credentials
|
||||
16
k8s/minio/service.yaml
Normal file
16
k8s/minio/service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: minio
|
||||
namespace: reactbin
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: minio
|
||||
ports:
|
||||
- name: api
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
- name: console
|
||||
port: 9001
|
||||
targetPort: 9001
|
||||
58
k8s/minio/statefulset.yaml
Normal file
58
k8s/minio/statefulset.yaml
Normal file
@@ -0,0 +1,58 @@
|
||||
# Replace 'latest' with the real image tag before applying
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: minio
|
||||
namespace: reactbin
|
||||
spec:
|
||||
serviceName: minio
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: minio
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: minio
|
||||
spec:
|
||||
containers:
|
||||
- name: minio
|
||||
image: minio/minio:latest
|
||||
args:
|
||||
- server
|
||||
- /data
|
||||
- --console-address
|
||||
- ":9001"
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
- containerPort: 9001
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: minio-credentials
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /minio/health/live
|
||||
port: 9000
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /minio/health/ready
|
||||
port: 9000
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: data
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
4
k8s/namespace.yaml
Normal file
4
k8s/namespace.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: reactbin
|
||||
30
k8s/ui/deployment.yaml
Normal file
30
k8s/ui/deployment.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
# Replace 'latest' with the real image tag before applying
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: ui
|
||||
namespace: reactbin
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ui
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ui
|
||||
spec:
|
||||
containers:
|
||||
- name: ui
|
||||
image: reactbin-ui:latest
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 8080
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 101 # nginxinc/nginx-unprivileged default UID
|
||||
13
k8s/ui/service.yaml
Normal file
13
k8s/ui/service.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ui
|
||||
namespace: reactbin
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: ui
|
||||
ports:
|
||||
- name: http
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
18
k8s/vault/api-secret.yaml
Normal file
18
k8s/vault/api-secret.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultStaticSecret
|
||||
metadata:
|
||||
name: api-secret
|
||||
namespace: reactbin
|
||||
spec:
|
||||
vaultAuthRef: reactbin-auth
|
||||
mount: secret
|
||||
type: kv-v2
|
||||
# Required Vault keys at this path:
|
||||
# DATABASE_URL, JWT_SECRET_KEY, OWNER_USERNAME, OWNER_PASSWORD,
|
||||
# S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY,
|
||||
# API_BASE_URL
|
||||
path: reactbin/api/config
|
||||
refreshAfter: 1h
|
||||
destination:
|
||||
name: api-env
|
||||
create: true
|
||||
16
k8s/vault/minio-secret.yaml
Normal file
16
k8s/vault/minio-secret.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultStaticSecret
|
||||
metadata:
|
||||
name: minio-secret
|
||||
namespace: reactbin
|
||||
spec:
|
||||
vaultAuthRef: reactbin-auth
|
||||
mount: secret
|
||||
type: kv-v2
|
||||
# Required Vault keys at this path:
|
||||
# MINIO_ROOT_USER, MINIO_ROOT_PASSWORD
|
||||
path: reactbin/minio/credentials
|
||||
refreshAfter: 1h
|
||||
destination:
|
||||
name: minio-credentials
|
||||
create: true
|
||||
16
k8s/vault/vault-auth.yaml
Normal file
16
k8s/vault/vault-auth.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultAuth
|
||||
metadata:
|
||||
name: reactbin-auth
|
||||
namespace: reactbin
|
||||
spec:
|
||||
method: kubernetes
|
||||
mount: kubernetes
|
||||
kubernetes:
|
||||
# The operator must create this role in Vault and bind it to the
|
||||
# default service account in the reactbin namespace with read access
|
||||
# to both reactbin/api/config and reactbin/minio/credentials.
|
||||
role: reactbin
|
||||
serviceAccount: default
|
||||
audiences:
|
||||
- https://kubernetes.default.svc
|
||||
34
specs/012-api-docs-gate/checklists/requirements.md
Normal file
34
specs/012-api-docs-gate/checklists/requirements.md
Normal file
@@ -0,0 +1,34 @@
|
||||
# Specification Quality Checklist: API Documentation Visibility Gate
|
||||
|
||||
**Purpose**: Validate specification completeness and quality before proceeding to planning
|
||||
**Created**: 2026-05-07
|
||||
**Feature**: [spec.md](../spec.md)
|
||||
|
||||
## Content Quality
|
||||
|
||||
- [X] No implementation details (languages, frameworks, APIs)
|
||||
- [X] Focused on user value and business needs
|
||||
- [X] Written for non-technical stakeholders
|
||||
- [X] All mandatory sections completed
|
||||
|
||||
## Requirement Completeness
|
||||
|
||||
- [X] No [NEEDS CLARIFICATION] markers remain
|
||||
- [X] Requirements are testable and unambiguous
|
||||
- [X] Success criteria are measurable
|
||||
- [X] Success criteria are technology-agnostic (no implementation details)
|
||||
- [X] All acceptance scenarios are defined
|
||||
- [X] Edge cases are identified
|
||||
- [X] Scope is clearly bounded
|
||||
- [X] Dependencies and assumptions identified
|
||||
|
||||
## Feature Readiness
|
||||
|
||||
- [X] All functional requirements have clear acceptance criteria
|
||||
- [X] User scenarios cover primary flows
|
||||
- [X] Feature meets measurable outcomes defined in Success Criteria
|
||||
- [X] No implementation details leak into specification
|
||||
|
||||
## Notes
|
||||
|
||||
- All items pass. Spec is ready for `/speckit-plan`.
|
||||
40
specs/012-api-docs-gate/contracts/docs-endpoints.md
Normal file
40
specs/012-api-docs-gate/contracts/docs-endpoints.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Contract: API Documentation Endpoints
|
||||
|
||||
These three endpoints exist in FastAPI by default. This feature makes their availability conditional on a runtime configuration flag.
|
||||
|
||||
## Affected Endpoints
|
||||
|
||||
| Endpoint | Default path | Purpose |
|
||||
|----------|-------------|---------|
|
||||
| Swagger UI | `GET /docs` | Interactive browser-based API documentation |
|
||||
| ReDoc UI | `GET /redoc` | Alternative read-only API documentation |
|
||||
| OpenAPI schema | `GET /openapi.json` | Raw JSON schema of the entire API surface |
|
||||
|
||||
## Behaviour by Flag State
|
||||
|
||||
### `API_DOCS_ENABLED=true` (default)
|
||||
|
||||
All three endpoints respond exactly as they did before this feature. No change.
|
||||
|
||||
| Endpoint | Response |
|
||||
|----------|----------|
|
||||
| `GET /docs` | `200 OK` — Swagger UI HTML |
|
||||
| `GET /redoc` | `200 OK` — ReDoc UI HTML |
|
||||
| `GET /openapi.json` | `200 OK` — OpenAPI schema JSON |
|
||||
|
||||
### `API_DOCS_ENABLED=false`
|
||||
|
||||
All three endpoints are unregistered. Requests fall through to the framework's default 404 handler.
|
||||
|
||||
| Endpoint | Response |
|
||||
|----------|----------|
|
||||
| `GET /docs` | `404 Not Found` |
|
||||
| `GET /redoc` | `404 Not Found` |
|
||||
| `GET /openapi.json` | `404 Not Found` |
|
||||
|
||||
## Invariants
|
||||
|
||||
- All other endpoints are unaffected in both flag states.
|
||||
- The `GET /api/v1/health` endpoint always returns `200 OK` regardless of the flag.
|
||||
- Internal OpenAPI schema generation (used for request/response validation) is not disabled — only the HTTP routes serving it are removed.
|
||||
- The flag is read once at application startup. A running process does not respond to live changes; a restart is required.
|
||||
138
specs/012-api-docs-gate/plan.md
Normal file
138
specs/012-api-docs-gate/plan.md
Normal file
@@ -0,0 +1,138 @@
|
||||
# Implementation Plan: API Documentation Visibility Gate
|
||||
|
||||
**Branch**: `012-api-docs-gate` | **Date**: 2026-05-07 | **Spec**: [spec.md](spec.md)
|
||||
**Input**: Feature specification from `specs/012-api-docs-gate/spec.md`
|
||||
|
||||
## Summary
|
||||
|
||||
Add `API_DOCS_ENABLED` (boolean, default `true`) to `app/config.py`. When `false`, pass `docs_url=None`, `redoc_url=None`, `openapi_url=None` to the `FastAPI()` constructor in `app/main.py`, making all three documentation routes return 404. A field validator provides graceful fallback for invalid flag values. Two new integration tests verify both flag states; the existing unit test suite is extended with two settings tests.
|
||||
|
||||
## Technical Context
|
||||
|
||||
**Language/Version**: Python 3.12
|
||||
**Primary Dependencies**: FastAPI (constructor params), pydantic-settings (field validator)
|
||||
**Storage**: None
|
||||
**Testing**: pytest unit (`api/tests/unit/test_config.py`), pytest + ASGI test client (`api/tests/integration/test_docs_gate.py`)
|
||||
**Target Platform**: API container (same as existing)
|
||||
**Project Type**: Web service configuration change
|
||||
**Performance Goals**: No measurable impact — one boolean read at startup
|
||||
**Constraints**: Default must be `true` (backwards compatible); invalid env var value must not crash startup; no other routes affected
|
||||
**Scale/Scope**: Three files changed (`config.py`, `main.py`, `.env.example`); one new test file; one existing test file extended
|
||||
|
||||
## Constitution Check
|
||||
|
||||
| Principle | Requirement | Status |
|
||||
|-----------|-------------|--------|
|
||||
| §5.1 TDD | Failing tests written before implementation | ✅ Tasks order tests first |
|
||||
| §5.2 Integration tests | New integration tests follow existing pattern | ✅ |
|
||||
| §5.3 Tests next to code | `api/tests/unit/` and `api/tests/integration/` | ✅ |
|
||||
| §5.4 CI before done | All tests pass before task marked done | ✅ |
|
||||
| §7.2 Env config | Flag via environment variable, not hardcoded | ✅ |
|
||||
| §7.3 Linting | `ruff` passes on all changed files | ✅ Enforced in polish task |
|
||||
| §2.6 No speculative abstraction | One boolean field, no plugin system | ✅ |
|
||||
|
||||
**No violations. All gates pass.**
|
||||
|
||||
## Project Structure
|
||||
|
||||
### Documentation (this feature)
|
||||
|
||||
```text
|
||||
specs/012-api-docs-gate/
|
||||
├── plan.md ← this file
|
||||
├── research.md ← 6 decisions
|
||||
├── contracts/
|
||||
│ └── docs-endpoints.md ← behaviour contract for 3 affected endpoints
|
||||
├── quickstart.md ← 4 test scenarios
|
||||
└── tasks.md ← generated by /speckit-tasks
|
||||
```
|
||||
|
||||
### Source Code Changes
|
||||
|
||||
```text
|
||||
api/
|
||||
├── app/
|
||||
│ ├── config.py ← MODIFIED: add api_docs_enabled field + validator
|
||||
│ └── main.py ← MODIFIED: conditional docs_url/redoc_url/openapi_url
|
||||
├── tests/
|
||||
│ ├── unit/
|
||||
│ │ └── test_config.py ← MODIFIED: 2 new tests for api_docs_enabled
|
||||
│ └── integration/
|
||||
│ └── test_docs_gate.py ← NEW: 2 integration tests (disabled + enabled)
|
||||
|
||||
.env.example ← MODIFIED: document API_DOCS_ENABLED
|
||||
```
|
||||
|
||||
## Implementation Design
|
||||
|
||||
### `app/config.py` — new field with graceful fallback validator
|
||||
|
||||
```python
|
||||
from pydantic import field_validator
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# ... existing fields ...
|
||||
api_docs_enabled: bool = True
|
||||
|
||||
@field_validator('api_docs_enabled', mode='before')
|
||||
@classmethod
|
||||
def coerce_docs_enabled(cls, v):
|
||||
if isinstance(v, bool):
|
||||
return v
|
||||
try:
|
||||
from pydantic import TypeAdapter
|
||||
return TypeAdapter(bool).validate_python(v)
|
||||
except Exception:
|
||||
return True # FR-007: invalid value → safe default (enabled)
|
||||
```
|
||||
|
||||
### `app/main.py` — conditional docs URLs
|
||||
|
||||
```python
|
||||
_settings = get_settings()
|
||||
|
||||
app = FastAPI(
|
||||
title="Reactbin API",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
docs_url="/docs" if _settings.api_docs_enabled else None,
|
||||
redoc_url="/redoc" if _settings.api_docs_enabled else None,
|
||||
openapi_url="/openapi.json" if _settings.api_docs_enabled else None,
|
||||
)
|
||||
```
|
||||
|
||||
### Integration test pattern
|
||||
|
||||
The `app` object is constructed at module import time. Tests reload the module with the env var pre-set:
|
||||
|
||||
```python
|
||||
def test_docs_disabled(monkeypatch, _base_env):
|
||||
monkeypatch.setenv("API_DOCS_ENABLED", "false")
|
||||
from app.config import get_settings
|
||||
get_settings.cache_clear()
|
||||
import importlib, app.main as m
|
||||
importlib.reload(m)
|
||||
client = TestClient(m.app)
|
||||
assert client.get("/docs").status_code == 404
|
||||
assert client.get("/redoc").status_code == 404
|
||||
assert client.get("/openapi.json").status_code == 404
|
||||
assert client.get("/api/v1/health").status_code == 200
|
||||
```
|
||||
|
||||
`get_settings.cache_clear()` is required before the reload so the new env var is picked up.
|
||||
|
||||
### `.env.example` addition
|
||||
|
||||
```bash
|
||||
# API documentation endpoints (Swagger UI, ReDoc, OpenAPI schema)
|
||||
# Set to false in production to avoid exposing the API surface publicly.
|
||||
API_DOCS_ENABLED=true
|
||||
```
|
||||
|
||||
## Dependencies & Risks
|
||||
|
||||
| Item | Risk | Mitigation |
|
||||
|------|------|------------|
|
||||
| `@lru_cache` on `get_settings()` | Tests may pick up cached settings across reloads | Always call `get_settings.cache_clear()` before reloading `app.main` in tests |
|
||||
| Module-level `get_settings()` in `main.py` | Import fails if required settings are absent (pre-existing behaviour) | Not a new risk; same as today |
|
||||
| `openapi_url=None` | Disables HTTP route but not internal schema generation | Intentional; request validation is unaffected |
|
||||
42
specs/012-api-docs-gate/quickstart.md
Normal file
42
specs/012-api-docs-gate/quickstart.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Quickstart: API Documentation Visibility Gate
|
||||
|
||||
## Verify docs are disabled
|
||||
|
||||
```bash
|
||||
# Start API with docs disabled
|
||||
API_DOCS_ENABLED=false uvicorn app.main:app --reload
|
||||
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/docs # → 404
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/redoc # → 404
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/openapi.json # → 404
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/health # → 200
|
||||
```
|
||||
|
||||
## Verify docs are enabled (default)
|
||||
|
||||
```bash
|
||||
# Start API without the flag (or with it set to true)
|
||||
uvicorn app.main:app --reload
|
||||
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/docs # → 200
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/redoc # → 200
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/openapi.json # → 200
|
||||
```
|
||||
|
||||
## Integration test scenarios
|
||||
|
||||
### Scenario 1: flag disabled — all three docs endpoints return 404
|
||||
|
||||
Start a test client with `API_DOCS_ENABLED=false` injected into settings. Assert each of the three endpoint paths returns 404. Assert `/api/v1/health` returns 200.
|
||||
|
||||
### Scenario 2: flag enabled (default) — docs endpoints return 200
|
||||
|
||||
Start a test client without the flag (or with `API_DOCS_ENABLED=true`). Assert each of the three endpoint paths returns 200.
|
||||
|
||||
### Scenario 3: invalid flag value — app starts, docs enabled
|
||||
|
||||
Set `API_DOCS_ENABLED=not-a-bool`. The app must start without error. Docs must be accessible (safe fallback to enabled).
|
||||
|
||||
### Scenario 4: flag absent — docs enabled (backwards compatibility)
|
||||
|
||||
Start the app with no `API_DOCS_ENABLED` variable set. Assert docs endpoints return 200 — identical to pre-feature behaviour.
|
||||
36
specs/012-api-docs-gate/research.md
Normal file
36
specs/012-api-docs-gate/research.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# Research: API Documentation Visibility Gate
|
||||
|
||||
## Decision 1: Env var name
|
||||
|
||||
**Decision**: `API_DOCS_ENABLED` (boolean, default `true`)
|
||||
**Rationale**: Consistent with the existing `API_BASE_URL` naming convention in the project. The positive-phrasing default (`true` = enabled) preserves backwards compatibility — existing deployments that don't set the variable get the same behaviour as today.
|
||||
**Alternatives considered**: `HIDE_API_DOCS=false` (negative phrasing) — inverted booleans are error-prone and confusing in `.env` files; `DOCS_ENABLED` — too generic, could collide with other tools in a multi-service env file.
|
||||
|
||||
## Decision 2: FastAPI docs suppression mechanism
|
||||
|
||||
**Decision**: Pass `docs_url=None`, `redoc_url=None`, `openapi_url=None` to the `FastAPI()` constructor when the flag is disabled.
|
||||
**Rationale**: This is the official FastAPI-supported mechanism. Setting these to `None` causes FastAPI to register no routes for those paths — requests to them fall through to the default 404 handler. The internal OpenAPI schema is still generated in memory (for request validation), but no HTTP route exposes it.
|
||||
**Alternatives considered**: Route-level middleware that intercepts and returns 404 — more complex, not the canonical approach; removing routers at runtime — impossible, routers are registered at import time.
|
||||
|
||||
## Decision 3: Settings read at module level
|
||||
|
||||
**Decision**: Read `get_settings()` once at module import time in `main.py` to configure the `FastAPI()` constructor.
|
||||
**Rationale**: `FastAPI()` is instantiated at module level; the docs URL parameters must be known at that point. `get_settings()` is already `@lru_cache` so calling it at module level is cheap and consistent with calling it again inside `lifespan`. Tests that need to change the flag must reload the module or override `get_settings`.
|
||||
**Alternatives considered**: Lazy initialisation of `app` inside a factory function — would require restructuring `main.py` and all imports; not worth the complexity for this change.
|
||||
|
||||
## Decision 4: Graceful fallback for invalid flag values (FR-007)
|
||||
|
||||
**Decision**: Add a `@field_validator('api_docs_enabled', mode='before')` in `Settings` that wraps Pydantic's bool coercion in a try/except and returns `True` on any `ValueError`.
|
||||
**Rationale**: Pydantic v2 raises `ValidationError` for unrecognised boolean strings (e.g., `API_DOCS_ENABLED=maybe`). FR-007 requires the app to start rather than fail. The validator intercepts the invalid value before Pydantic's own coercion and returns the safe default.
|
||||
**Alternatives considered**: Using `Optional[bool] = True` without a validator — Pydantic would still raise on invalid input; using `str` field with manual parsing — duplicates Pydantic's boolean parsing logic unnecessarily.
|
||||
|
||||
## Decision 5: Integration test approach
|
||||
|
||||
**Decision**: Test both enabled and disabled states by overriding `get_settings` in integration tests using `app.dependency_overrides`, or by constructing a local `FastAPI` app instance with the appropriate `docs_url`/`redoc_url`/`openapi_url` values.
|
||||
**Rationale**: The `app` in `app.main` is created at import time. Since the unit tests already use `monkeypatch` + `importlib.reload` for config changes, the integration tests for docs visibility can follow the same pattern — reload `app.main` with the env var set before importing `app`. Alternatively, test the URL routing behaviour directly by constructing a minimal test app.
|
||||
**Alternatives considered**: Patching `app.docs_url` after import — FastAPI does not re-register routes when these attributes are changed post-construction; no effect on routing.
|
||||
|
||||
## Decision 6: Production documentation
|
||||
|
||||
**Decision**: Update `.env.example` to include `API_DOCS_ENABLED=true` with a comment recommending `false` for production. No changes to `api/Dockerfile.prod` (env vars are supplied by the deployment environment, not the image).
|
||||
**Rationale**: The Dockerfile intentionally contains no runtime secrets or config. The `.env.example` is the canonical documentation for operators. A comment is sufficient; the production Dockerfile.prod already has no docs-related config.
|
||||
80
specs/012-api-docs-gate/spec.md
Normal file
80
specs/012-api-docs-gate/spec.md
Normal file
@@ -0,0 +1,80 @@
|
||||
# Feature Specification: API Documentation Visibility Gate
|
||||
|
||||
**Feature Branch**: `012-api-docs-gate`
|
||||
**Created**: 2026-05-07
|
||||
**Status**: Draft
|
||||
**Input**: User description: "Add an environment variable flag to disable the FastAPI Swagger and ReDoc documentation endpoints (and the raw OpenAPI schema) in production. When disabled, all three endpoints return 404. When enabled (the default), behaviour is unchanged. The flag should be off by default in production and on by default in development."
|
||||
|
||||
## User Scenarios & Testing *(mandatory)*
|
||||
|
||||
### User Story 1 - Documentation Hidden in Production (Priority: P1)
|
||||
|
||||
An operator deploys the API to a production environment and wants to ensure that the interactive documentation UI and the raw API schema are not publicly reachable. Setting a configuration flag causes all three documentation endpoints to return "not found", as if they do not exist.
|
||||
|
||||
**Why this priority**: Exposing the full API schema and interactive console to anonymous users in production reveals the attack surface of the application. Hiding it is a low-effort, high-value hardening step.
|
||||
|
||||
**Independent Test**: Start the API with the flag set to disabled. Request each of the three documentation endpoints. All three must return 404.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the API is started with documentation disabled, **When** a client requests the interactive documentation UI, **Then** the response is 404 Not Found.
|
||||
2. **Given** the API is started with documentation disabled, **When** a client requests the alternative documentation UI, **Then** the response is 404 Not Found.
|
||||
3. **Given** the API is started with documentation disabled, **When** a client requests the raw OpenAPI schema endpoint, **Then** the response is 404 Not Found.
|
||||
4. **Given** the API is started with documentation disabled, **When** a client requests any other API endpoint (e.g., the health check), **Then** the response is unaffected — normal behaviour continues.
|
||||
|
||||
---
|
||||
|
||||
### User Story 2 - Documentation Available in Development (Priority: P2)
|
||||
|
||||
A developer runs the API locally without setting the flag. The documentation endpoints remain fully accessible — no change in behaviour from before this feature.
|
||||
|
||||
**Why this priority**: Developer productivity depends on the interactive docs being available during local development. The default must not break existing workflows.
|
||||
|
||||
**Independent Test**: Start the API without the flag set (or with it explicitly enabled). Request each of the three documentation endpoints. All three must respond successfully with their normal content.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the API is started without the flag set, **When** a client requests any documentation endpoint, **Then** the response is the same as it was before this feature was introduced.
|
||||
2. **Given** the API is started with the flag explicitly set to enabled, **When** a client requests any documentation endpoint, **Then** the response is the same as it was before this feature was introduced.
|
||||
3. **Given** the flag is changed from enabled to disabled (or vice versa), **When** the API is restarted, **Then** the new state takes effect immediately with no other changes required.
|
||||
|
||||
---
|
||||
|
||||
### Edge Cases
|
||||
|
||||
- What happens if the flag is set to an unrecognised value (e.g., a typo)?
|
||||
- What happens if the flag is absent entirely — is the default enabled or disabled?
|
||||
- Does disabling documentation affect any other behaviour (e.g., internal schema generation used for validation)?
|
||||
- If a monitoring tool scrapes the schema endpoint for API drift detection, does disabling break it?
|
||||
|
||||
## Requirements *(mandatory)*
|
||||
|
||||
### Functional Requirements
|
||||
|
||||
- **FR-001**: The system MUST support a configuration flag that controls whether the API documentation endpoints are reachable.
|
||||
- **FR-002**: When the flag is set to disabled, all three documentation endpoints (interactive UI, alternative UI, and raw schema) MUST return 404 Not Found.
|
||||
- **FR-003**: When the flag is set to enabled, the behaviour of all three documentation endpoints MUST be identical to the behaviour before this feature was introduced.
|
||||
- **FR-004**: The flag MUST default to **enabled** when not explicitly set (preserving backwards compatibility for existing deployments).
|
||||
- **FR-005**: Disabling documentation MUST NOT affect any other API endpoint, including the health check, authentication, and all resource endpoints.
|
||||
- **FR-006**: The flag MUST be configurable via an environment variable without requiring a code change or rebuild.
|
||||
- **FR-007**: An unrecognised or missing flag value MUST fall back to the enabled default rather than causing a startup failure.
|
||||
- **FR-008**: The existing `.env.example` file MUST be updated to document the flag and its default value.
|
||||
- **FR-009**: The production environment configuration MUST set the flag to disabled by default.
|
||||
|
||||
## Success Criteria *(mandatory)*
|
||||
|
||||
### Measurable Outcomes
|
||||
|
||||
- **SC-001**: With the flag disabled, all three documentation endpoints return 404, confirmed by automated test.
|
||||
- **SC-002**: With the flag enabled (or absent), all three documentation endpoints respond successfully, confirmed by automated test.
|
||||
- **SC-003**: All existing tests continue to pass — zero regressions introduced.
|
||||
- **SC-004**: The flag takes effect on restart with no other intervention required.
|
||||
- **SC-005**: The `.env.example` file documents the flag so any developer setting up the project discovers it without reading source code.
|
||||
|
||||
## Assumptions
|
||||
|
||||
- There are exactly three documentation-related endpoints to gate: the primary interactive UI, the alternative documentation UI, and the raw OpenAPI schema JSON. No other endpoints are affected.
|
||||
- The flag is read once at application startup; a running process does not need to respond to live changes.
|
||||
- Internal schema generation (used by the framework for request validation) is not affected by hiding the documentation endpoints — only the public-facing HTTP routes are removed.
|
||||
- The production Dockerfile (`api/Dockerfile.prod`) does not hardcode the flag; it is supplied via the deployment environment (docker-compose, Kubernetes secret, etc.).
|
||||
- "Off by default in production" means the recommended value for production is disabled, documented in `.env.example` and in the production docker-compose or deployment config; it does not mean the application auto-detects its environment.
|
||||
100
specs/012-api-docs-gate/tasks.md
Normal file
100
specs/012-api-docs-gate/tasks.md
Normal file
@@ -0,0 +1,100 @@
|
||||
# Tasks: API Documentation Visibility Gate
|
||||
|
||||
**Input**: Design documents from `specs/012-api-docs-gate/`
|
||||
**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, contracts/docs-endpoints.md ✅, quickstart.md ✅
|
||||
|
||||
**Tests**: TDD is non-negotiable (§5.1). Failing tests are written before implementation code in each phase.
|
||||
|
||||
**Organization**: No setup or foundational phases — this feature modifies three existing files and adds one new test file. Phase 3 (US1) covers the disable path; Phase 4 (US2) verifies the enable/default path using the same implementation; Phase 5 polishes.
|
||||
|
||||
## Format: `[ID] [P?] [Story] Description`
|
||||
|
||||
- **[P]**: Can run in parallel with other [P] tasks in the same phase
|
||||
- **[Story]**: Which user story this task belongs to
|
||||
- Exact file paths included in every task description
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: User Story 1 — Documentation Hidden in Production (Priority: P1) 🎯 MVP
|
||||
|
||||
**Goal**: When `API_DOCS_ENABLED=false`, all three documentation endpoints (`/docs`, `/redoc`, `/openapi.json`) return 404. All other endpoints are unaffected.
|
||||
|
||||
**Independent Test**: `make test-unit` passes the new settings tests; `make test-integration` passes the new `test_docs_disabled` integration test.
|
||||
|
||||
### Tests for User Story 1 (TDD — write first, confirm failure before T003)
|
||||
|
||||
- [X] T001 [US1] Add three failing unit tests to `api/tests/unit/test_config.py` using the existing `_apply_env`/`_BASE_ENV` pattern:
|
||||
1. `test_api_docs_enabled_default` — call `Settings()` with `_BASE_ENV` only (no `API_DOCS_ENABLED`); assert `s.api_docs_enabled is True`
|
||||
2. `test_api_docs_enabled_false` — call `Settings()` with `_BASE_ENV` + `{"API_DOCS_ENABLED": "false"}`; assert `s.api_docs_enabled is False`
|
||||
3. `test_api_docs_invalid_value_defaults_to_enabled` — call `Settings()` with `_BASE_ENV` + `{"API_DOCS_ENABLED": "not-a-bool"}`; assert `s.api_docs_enabled is True` (graceful fallback, FR-007)
|
||||
All three tests fail before T003 because `api_docs_enabled` does not yet exist on `Settings`.
|
||||
|
||||
- [X] T002 [US1] Create `api/tests/integration/test_docs_gate.py` with two failing integration tests; the file MUST set up a minimal app client using `from starlette.testclient import TestClient` and the `importlib.reload` + `get_settings.cache_clear()` pattern shown in plan.md:
|
||||
1. `test_docs_hidden_when_flag_disabled(monkeypatch)` — set `API_DOCS_ENABLED=false` via monkeypatch + all required env vars (`DATABASE_URL`, `JWT_SECRET_KEY`, `OWNER_USERNAME`, `OWNER_PASSWORD`, `S3_ENDPOINT_URL`, `S3_BUCKET_NAME`, `S3_ACCESS_KEY_ID`, `S3_SECRET_ACCESS_KEY`); call `get_settings.cache_clear()`; `importlib.reload(app.main)`; create `TestClient(app.main.app)`; assert `/docs` → 404, `/redoc` → 404, `/openapi.json` → 404, `/api/v1/health` → 200; after test, call `get_settings.cache_clear()` again as cleanup
|
||||
2. `test_docs_visible_when_flag_enabled(monkeypatch)` — same setup but with `API_DOCS_ENABLED=true` (or omit it); assert `/docs` → 200, `/redoc` → 200, `/openapi.json` → 200
|
||||
Both tests fail before T003/T004 because `api_docs_enabled` does not exist on `Settings`.
|
||||
|
||||
### Implementation for User Story 1
|
||||
|
||||
- [X] T003 [US1] Add `api_docs_enabled: bool = True` field and a `coerce_docs_enabled` field validator to the `Settings` class in `api/app/config.py`: the validator MUST use `mode='before'`, be a `@classmethod`, and wrap Pydantic bool coercion in a try/except that returns `True` on any exception (implements FR-007); import `field_validator` from `pydantic` at the top of the file; the field goes after the existing `login_trusted_proxy_ips` field.
|
||||
|
||||
- [X] T004 [US1] Update `api/app/main.py`: before the `app = FastAPI(...)` call, add `_settings = get_settings()`; add `docs_url="/docs" if _settings.api_docs_enabled else None`, `redoc_url="/redoc" if _settings.api_docs_enabled else None`, and `openapi_url="/openapi.json" if _settings.api_docs_enabled else None` as keyword arguments to the `FastAPI()` constructor; the existing module-level defaults for `app.state` (after the `app = FastAPI(...)` line) are unchanged.
|
||||
|
||||
- [X] T005 [US1] Verify TDD green for US1: run `cd api && python -m pytest tests/unit/ -v -k "docs"` and confirm all three new unit tests pass; then run `cd api && python -m pytest tests/unit/ -v` to confirm no regressions in the full 102-test unit suite.
|
||||
|
||||
**Checkpoint**: US1 is complete. With `API_DOCS_ENABLED=false` the three docs endpoints return 404; all other endpoints are unaffected.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: User Story 2 — Documentation Available in Development (Priority: P2)
|
||||
|
||||
**Goal**: Without the flag set (or with it set to `true`), docs endpoints behave identically to before this feature. Default is backwards compatible.
|
||||
|
||||
**Independent Test**: `make test-integration` — the `test_docs_visible_when_flag_enabled` test written in T002 passes, confirming the enabled/default path.
|
||||
|
||||
- [X] T006 [US2] Verify TDD green for US2: run `make test-integration` from `/workspace` and confirm all integration tests pass, including `test_docs_gate.py::test_docs_visible_when_flag_enabled` and the full existing suite (102 tests + 2 new = 104 total).
|
||||
|
||||
**Checkpoint**: Both user stories verified. Flag disabled → 404; flag enabled or absent → unchanged behaviour.
|
||||
|
||||
---
|
||||
|
||||
## Phase 5: Polish & Cross-Cutting Concerns
|
||||
|
||||
- [X] T007 Add documentation for `API_DOCS_ENABLED` to `/workspace/.env.example`: insert a new section after the `LOGIN_TRUSTED_PROXY_IPS` block with a comment and `API_DOCS_ENABLED=true`; the comment MUST note that this should be set to `false` in production to avoid publicly exposing the API schema
|
||||
|
||||
- [X] T008 Run `ruff check api/app/config.py api/app/main.py api/tests/integration/test_docs_gate.py` from `/workspace/api` and fix any lint violations; then run `ruff check api/` to confirm the full API directory is clean
|
||||
|
||||
---
|
||||
|
||||
## Dependencies & Execution Order
|
||||
|
||||
- T001 and T002 can run in parallel (different files, both TDD-red before implementation)
|
||||
- T003 must complete before T004 (main.py reads from config.py)
|
||||
- T005 after T003 and T004
|
||||
- T006 after T005
|
||||
- T007 and T008 can run in parallel (different files, after all tests pass)
|
||||
|
||||
### Execution Order Summary
|
||||
|
||||
```
|
||||
Step 1: T001 ∥ T002 (write failing tests — TDD red)
|
||||
Step 2: T003 (implement config.py — turns T001 green)
|
||||
Step 3: T004 (implement main.py — turns T002 green)
|
||||
Step 4: T005 (verify unit tests green)
|
||||
Step 5: T006 (verify integration tests green — regression gate)
|
||||
Step 6: T007 ∥ T008 (polish — .env.example + ruff)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Strategy
|
||||
|
||||
### MVP (US1 + US2 — one implementation covers both)
|
||||
|
||||
1. Write failing tests (T001, T002)
|
||||
2. Add `api_docs_enabled` to `config.py` (T003)
|
||||
3. Update `FastAPI()` constructor in `main.py` (T004)
|
||||
4. Verify all tests green (T005, T006)
|
||||
5. Polish (T007, T008)
|
||||
|
||||
US1 and US2 share the same implementation — the flag controls both paths. There is no separate implementation for US2; the default value of `true` is the entire implementation of US2.
|
||||
35
specs/013-k8s-manifests/checklists/requirements.md
Normal file
35
specs/013-k8s-manifests/checklists/requirements.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# Specification Quality Checklist: Kubernetes Production Manifests
|
||||
|
||||
**Purpose**: Validate specification completeness and quality before proceeding to planning
|
||||
**Created**: 2026-05-07
|
||||
**Feature**: [spec.md](../spec.md)
|
||||
|
||||
## Content Quality
|
||||
|
||||
- [x] No implementation details (languages, frameworks, APIs)
|
||||
- [x] Focused on user value and business needs
|
||||
- [x] Written for non-technical stakeholders
|
||||
- [x] All mandatory sections completed
|
||||
|
||||
## Requirement Completeness
|
||||
|
||||
- [x] No [NEEDS CLARIFICATION] markers remain
|
||||
- [x] Requirements are testable and unambiguous
|
||||
- [x] Success criteria are measurable
|
||||
- [x] Success criteria are technology-agnostic (no implementation details)
|
||||
- [x] All acceptance scenarios are defined
|
||||
- [x] Edge cases are identified
|
||||
- [x] Scope is clearly bounded
|
||||
- [x] Dependencies and assumptions identified
|
||||
|
||||
## Feature Readiness
|
||||
|
||||
- [x] All functional requirements have clear acceptance criteria
|
||||
- [x] User scenarios cover primary flows
|
||||
- [x] Feature meets measurable outcomes defined in Success Criteria
|
||||
- [x] No implementation details leak into specification
|
||||
|
||||
## Notes
|
||||
|
||||
- FR-014 (migration files in production image) is a prerequisite code change to `Dockerfile.prod`, not a manifest. Included in scope as it is required for the init container to function.
|
||||
- Image tag placeholder strategy is documented in Assumptions; the specifics of tag substitution (kustomize, sed, etc.) are left to planning.
|
||||
59
specs/013-k8s-manifests/contracts/operator-deploy.md
Normal file
59
specs/013-k8s-manifests/contracts/operator-deploy.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Contract: Operator Deployment Interface
|
||||
|
||||
The manifests in `k8s/` define the operator's deployment interface — the inputs required before applying and the observable outputs after applying.
|
||||
|
||||
## Pre-deployment Prerequisites (Operator-supplied)
|
||||
|
||||
| Prerequisite | Details |
|
||||
|---|---|
|
||||
| Vault KV v2 secret at `reactbin/api/config` | Must contain keys: `DATABASE_URL`, `JWT_SECRET_KEY`, `OWNER_USERNAME`, `OWNER_PASSWORD`, `S3_ENDPOINT_URL`, `S3_BUCKET_NAME`, `S3_ACCESS_KEY_ID`, `S3_SECRET_ACCESS_KEY`, `API_BASE_URL` |
|
||||
| Vault KV v2 secret at `reactbin/minio/credentials` | Must contain keys: `MINIO_ROOT_USER`, `MINIO_ROOT_PASSWORD` |
|
||||
| Vault Kubernetes auth role | A role in the Vault Kubernetes auth mount bound to the `default` service account in the `reactbin` namespace with read access to both paths above |
|
||||
| `VaultConnection` resource | Named `default` in the operator's VSO namespace pointing to the Vault server address |
|
||||
| External PostgreSQL database | A dedicated database and user created; `DATABASE_URL` in Vault reflects the credentials |
|
||||
| DNS | The production domain resolves to the cluster ingress IP |
|
||||
| `ClusterIssuer` | A cert-manager `ClusterIssuer` named `letsencrypt-prod` exists in the cluster |
|
||||
| Image tags | The operator substitutes the `latest` placeholder in `k8s/api/deployment.yaml` and `k8s/ui/deployment.yaml` with the real image tag before applying |
|
||||
|
||||
## Apply Command
|
||||
|
||||
```bash
|
||||
# Substitute image tags
|
||||
sed -i 's|reactbin-api:latest|reactbin-api:<tag>|g' k8s/api/deployment.yaml
|
||||
sed -i 's|reactbin-ui:latest|reactbin-ui:<tag>|g' k8s/ui/deployment.yaml
|
||||
|
||||
# Apply all manifests
|
||||
kubectl apply -f k8s/
|
||||
```
|
||||
|
||||
Applying is idempotent — safe to re-run on every deployment.
|
||||
|
||||
## Observable Outputs (Post-apply)
|
||||
|
||||
| Resource | Expected State |
|
||||
|---|---|
|
||||
| `Namespace/reactbin` | Active |
|
||||
| `Deployment/api` in `reactbin` | 1/1 Ready (init container completes first) |
|
||||
| `Deployment/ui` in `reactbin` | 1/1 Ready |
|
||||
| `StatefulSet/minio` in `reactbin` | 1/1 Ready |
|
||||
| `Job/minio-init-bucket` in `reactbin` | Completed |
|
||||
| `Secret/api-env` in `reactbin` | Created by VSO, populated with all API env keys |
|
||||
| `Secret/minio-credentials` in `reactbin` | Created by VSO, populated with MinIO root keys |
|
||||
| `Certificate/reactbin-tls` in `reactbin` | Issued (may take up to 2 minutes on first apply) |
|
||||
| `Ingress/reactbin` in `reactbin` | Address populated with cluster ingress IP |
|
||||
|
||||
## Verification Commands
|
||||
|
||||
```bash
|
||||
# All pods running
|
||||
kubectl get pods -n reactbin
|
||||
|
||||
# API health
|
||||
curl -sf https://<domain>/api/v1/health
|
||||
|
||||
# UI reachable
|
||||
curl -sf https://<domain>/
|
||||
|
||||
# Docs correctly gated (should return 404)
|
||||
curl -o /dev/null -w "%{http_code}" https://<domain>/docs
|
||||
```
|
||||
238
specs/013-k8s-manifests/plan.md
Normal file
238
specs/013-k8s-manifests/plan.md
Normal file
@@ -0,0 +1,238 @@
|
||||
# Implementation Plan: Kubernetes Production Manifests
|
||||
|
||||
**Branch**: `013-k8s-manifests` | **Date**: 2026-05-07 | **Spec**: [spec.md](spec.md)
|
||||
**Input**: Feature specification from `specs/013-k8s-manifests/spec.md`
|
||||
|
||||
## Summary
|
||||
|
||||
Write Kubernetes manifests deploying Reactbin to k3s: a `Namespace`, API `Deployment` (with Alembic init container) + `Service`, UI `Deployment` + `Service`, a shared `Ingress` with Let's Encrypt TLS, a MinIO `StatefulSet` + `Service` + bucket-init `Job`, and three VSO CRDs (`VaultConnection`, `VaultAuth`, `VaultStaticSecret` × 2) to sync secrets from Vault. A small update to `api/Dockerfile.prod` includes Alembic migration files in the production image so the init container can run them.
|
||||
|
||||
## Technical Context
|
||||
|
||||
**Language/Version**: YAML (Kubernetes manifests); Python 3.12 (Dockerfile.prod touch)
|
||||
**Primary Dependencies**: Kubernetes 1.29+ API, nginx Ingress controller, cert-manager (ClusterIssuer `letsencrypt-prod`), Vault Secrets Operator (`secrets.hashicorp.com/v1beta1`), MinIO
|
||||
**Storage**: MinIO StatefulSet with ReadWriteOnce PVC (cluster default storage class); external PostgreSQL (operator-provisioned)
|
||||
**Testing**: `kubectl apply --dry-run=client` for schema validation; `yamllint` for formatting
|
||||
**Target Platform**: k3s cluster (Kubernetes 1.29+, Linux)
|
||||
**Performance Goals**: No measurable impact — manifests are declarative config, not runtime code
|
||||
**Constraints**: All secrets must come from Vault (no plaintext in manifests); all containers run non-root; MinIO is ClusterIP-only (no external Ingress)
|
||||
**Scale/Scope**: 11 YAML files across `k8s/`; one Dockerfile.prod change; one Makefile target
|
||||
|
||||
## Constitution Check
|
||||
|
||||
| Principle | Requirement | Status |
|
||||
|-----------|-------------|--------|
|
||||
| §5.1 TDD | Failing tests before implementation | ✅ Dry-run validation script written before manifests |
|
||||
| §5.4 CI before done | All tests pass before task marked done | ✅ kubectl dry-run + yamllint gate |
|
||||
| §7.2 Env config | No hardcoded secrets or hostnames | ✅ All secrets via VSO; domain is operator-substituted placeholder |
|
||||
| §7.3 Linting | `ruff` / linting passes | ✅ `yamllint` on all manifests |
|
||||
| §2.6 No speculative abstraction | No Kustomize overlays or Helm chart | ✅ Plain YAML, single environment |
|
||||
| §8 Scope boundaries | No multi-user, no OIDC, no OR/NOT tags | ✅ Not affected |
|
||||
|
||||
**No violations. All gates pass.**
|
||||
|
||||
*Post-design re-check*: The Dockerfile.prod change (FR-014) adds `alembic/` to the runtime stage only — no builder-stage change, no new dependencies, no behaviour change to the running API. Constitution unchanged.
|
||||
|
||||
## Project Structure
|
||||
|
||||
### Documentation (this feature)
|
||||
|
||||
```text
|
||||
specs/013-k8s-manifests/
|
||||
├── plan.md ← this file
|
||||
├── research.md ← 8 decisions
|
||||
├── contracts/
|
||||
│ └── operator-deploy.md ← prerequisites + verification commands
|
||||
├── quickstart.md ← deploy + verify + scenario walkthroughs
|
||||
└── tasks.md ← generated by /speckit-tasks
|
||||
```
|
||||
|
||||
### Source Code Changes
|
||||
|
||||
```text
|
||||
k8s/ ← NEW directory
|
||||
├── namespace.yaml ← Namespace: reactbin
|
||||
├── api/
|
||||
│ ├── deployment.yaml ← Deployment: api (with alembic init container)
|
||||
│ └── service.yaml ← Service: api (ClusterIP, port 8000)
|
||||
├── ui/
|
||||
│ ├── deployment.yaml ← Deployment: ui
|
||||
│ └── service.yaml ← Service: ui (ClusterIP, port 8080)
|
||||
├── ingress.yaml ← Ingress: /api/ → api, / → ui, TLS via cert-manager
|
||||
├── minio/
|
||||
│ ├── statefulset.yaml ← StatefulSet: minio (volumeClaimTemplates)
|
||||
│ ├── service.yaml ← Service: minio (ClusterIP, port 9000)
|
||||
│ └── init-job.yaml ← Job: minio-init-bucket (mc mb --ignore-existing)
|
||||
└── vault/
|
||||
├── vault-auth.yaml ← VaultAuth: kubernetes method, reactbin SA
|
||||
├── api-secret.yaml ← VaultStaticSecret → K8s Secret: api-env
|
||||
└── minio-secret.yaml ← VaultStaticSecret → K8s Secret: minio-credentials
|
||||
|
||||
api/Dockerfile.prod ← MODIFIED: add alembic/ and alembic.ini to runtime stage
|
||||
Makefile ← MODIFIED: add dry-run validation target
|
||||
```
|
||||
|
||||
## Implementation Design
|
||||
|
||||
### `api/Dockerfile.prod` — runtime stage addition
|
||||
|
||||
```dockerfile
|
||||
# In the runtime stage, after copying app/:
|
||||
COPY --chown=appuser:appgroup alembic/ ./alembic/
|
||||
COPY --chown=appuser:appgroup alembic.ini .
|
||||
```
|
||||
|
||||
No builder-stage change. No new base image. The init container uses the same image and `workingDir: /app`.
|
||||
|
||||
### `k8s/namespace.yaml`
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: reactbin
|
||||
```
|
||||
|
||||
### `k8s/vault/vault-auth.yaml`
|
||||
|
||||
```yaml
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultAuth
|
||||
metadata:
|
||||
name: reactbin-auth
|
||||
namespace: reactbin
|
||||
spec:
|
||||
method: kubernetes
|
||||
mount: kubernetes
|
||||
kubernetes:
|
||||
role: reactbin
|
||||
serviceAccount: default
|
||||
audiences:
|
||||
- https://kubernetes.default.svc
|
||||
```
|
||||
|
||||
Note: `VaultConnection` is not included in the `k8s/` tree — it lives in the VSO operator's namespace and is operator-managed infrastructure, not application manifests.
|
||||
|
||||
### `k8s/vault/api-secret.yaml`
|
||||
|
||||
```yaml
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultStaticSecret
|
||||
metadata:
|
||||
name: api-secret
|
||||
namespace: reactbin
|
||||
spec:
|
||||
vaultAuthRef: reactbin-auth
|
||||
mount: secret
|
||||
type: kv-v2
|
||||
path: reactbin/api/config
|
||||
refreshAfter: 1h
|
||||
destination:
|
||||
name: api-env
|
||||
create: true
|
||||
```
|
||||
|
||||
The API Deployment then uses `envFrom: [{secretRef: {name: api-env}}]`.
|
||||
|
||||
### `k8s/vault/minio-secret.yaml`
|
||||
|
||||
Same pattern, path `reactbin/minio/credentials`, destination `minio-credentials`.
|
||||
|
||||
### `k8s/api/deployment.yaml` — init container
|
||||
|
||||
```yaml
|
||||
initContainers:
|
||||
- name: alembic-migrate
|
||||
image: reactbin-api:latest # same tag as main container
|
||||
command: ["alembic", "upgrade", "head"]
|
||||
workingDir: /app
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: api-env
|
||||
containers:
|
||||
- name: api
|
||||
image: reactbin-api:latest
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: api-env
|
||||
env:
|
||||
- name: API_DOCS_ENABLED
|
||||
value: "false"
|
||||
livenessProbe:
|
||||
httpGet: {path: /api/v1/health, port: 8000}
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet: {path: /api/v1/health, port: 8000}
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1001
|
||||
```
|
||||
|
||||
### `k8s/ingress.yaml`
|
||||
|
||||
```yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: reactbin
|
||||
namespace: reactbin
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
spec:
|
||||
ingressClassName: nginx
|
||||
tls:
|
||||
- hosts: [<your-domain>]
|
||||
secretName: reactbin-tls
|
||||
rules:
|
||||
- host: <your-domain>
|
||||
http:
|
||||
paths:
|
||||
- path: /api/
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service: {name: api, port: {number: 8000}}
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service: {name: ui, port: {number: 8080}}
|
||||
```
|
||||
|
||||
`/api/` must be listed before `/`.
|
||||
|
||||
### `k8s/minio/statefulset.yaml` — StatefulSet (not Deployment)
|
||||
|
||||
StatefulSet gives stable pod name `minio-0` and automatic PVC reattachment via `volumeClaimTemplates`. ReadWriteOnce, default storage class.
|
||||
|
||||
Health probes: `GET /minio/health/live:9000` (liveness), `GET /minio/health/ready:9000` (readiness).
|
||||
|
||||
### `k8s/minio/init-job.yaml`
|
||||
|
||||
```yaml
|
||||
command: ["sh", "-c", "mc alias set local http://minio:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD && mc mb --ignore-existing local/reactbin"]
|
||||
```
|
||||
|
||||
`restartPolicy: OnFailure`. `--ignore-existing` makes the job idempotent.
|
||||
|
||||
### Makefile addition
|
||||
|
||||
```makefile
|
||||
validate-k8s:
|
||||
yamllint k8s/
|
||||
kubectl apply --dry-run=client -f k8s/
|
||||
```
|
||||
|
||||
## Dependencies & Risks
|
||||
|
||||
| Item | Risk | Mitigation |
|
||||
|------|------|------------|
|
||||
| `VaultConnection` not in `k8s/` | Operator may not have it pre-created | Documented as prerequisite in contracts/operator-deploy.md |
|
||||
| `letsencrypt-prod` ClusterIssuer name | May differ in operator's cluster | Documented as prerequisite; easy to sed-replace |
|
||||
| Image tag placeholder `latest` | Operator forgets to substitute | `validate-k8s` dry-run will succeed but notes in quickstart.md and task descriptions warn explicitly |
|
||||
| MinIO PVC storage class | Default may be unsuitable (e.g., ephemeral) | Noted in Assumptions; operator can patch `storageClassName` |
|
||||
| `<your-domain>` placeholder in Ingress | `kubectl apply --dry-run=client` validates everything except host value | Noted in quickstart; hostname must be substituted before applying |
|
||||
92
specs/013-k8s-manifests/quickstart.md
Normal file
92
specs/013-k8s-manifests/quickstart.md
Normal file
@@ -0,0 +1,92 @@
|
||||
# Quickstart: Kubernetes Production Deployment
|
||||
|
||||
## Before You Apply
|
||||
|
||||
1. Store API secrets in Vault at `reactbin/api/config` (KV v2):
|
||||
```
|
||||
DATABASE_URL = postgresql+asyncpg://reactbin:<pw>@<host>:5432/reactbin
|
||||
JWT_SECRET_KEY = <long-random-string>
|
||||
OWNER_USERNAME = <your-username>
|
||||
OWNER_PASSWORD = <your-password>
|
||||
S3_ENDPOINT_URL = http://minio.reactbin.svc.cluster.local:9000
|
||||
S3_BUCKET_NAME = reactbin
|
||||
S3_ACCESS_KEY_ID = <same as MINIO_ROOT_USER>
|
||||
S3_SECRET_ACCESS_KEY = <same as MINIO_ROOT_PASSWORD>
|
||||
API_BASE_URL = https://<your-domain>
|
||||
API_DOCS_ENABLED = false
|
||||
```
|
||||
|
||||
2. Store MinIO credentials in Vault at `reactbin/minio/credentials` (KV v2):
|
||||
```
|
||||
MINIO_ROOT_USER = <choose a strong username>
|
||||
MINIO_ROOT_PASSWORD = <choose a strong password>
|
||||
```
|
||||
|
||||
3. Create a Vault Kubernetes auth role bound to the `default` service account in the `reactbin` namespace with read access to both paths above.
|
||||
|
||||
4. Confirm DNS resolves to the cluster ingress IP and the `letsencrypt-prod` ClusterIssuer exists.
|
||||
|
||||
## Deploy
|
||||
|
||||
```bash
|
||||
# Substitute the real image tags
|
||||
sed -i 's|reactbin-api:latest|reactbin-api:v1.0.0|g' k8s/api/deployment.yaml
|
||||
sed -i 's|reactbin-ui:latest|reactbin-ui:v1.0.0|g' k8s/ui/deployment.yaml
|
||||
|
||||
# Apply everything
|
||||
kubectl apply -f k8s/
|
||||
```
|
||||
|
||||
## Verify
|
||||
|
||||
```bash
|
||||
# Watch pods come up (init container runs first on the API pod)
|
||||
kubectl get pods -n reactbin -w
|
||||
|
||||
# API health
|
||||
curl -sf https://<your-domain>/api/v1/health && echo "API OK"
|
||||
|
||||
# UI reachable
|
||||
curl -sf -o /dev/null -w "%{http_code}\n" https://<your-domain>/
|
||||
|
||||
# Docs correctly gated
|
||||
curl -o /dev/null -w "%{http_code}\n" https://<your-domain>/docs # → 404
|
||||
curl -o /dev/null -w "%{http_code}\n" https://<your-domain>/redoc # → 404
|
||||
|
||||
# Check migration init container ran
|
||||
kubectl logs -n reactbin -l app=api -c alembic-migrate
|
||||
```
|
||||
|
||||
## Scenario: Migration fails on deploy
|
||||
|
||||
```bash
|
||||
# Pod will be stuck in Init state
|
||||
kubectl get pods -n reactbin
|
||||
# NAME READY STATUS RESTARTS
|
||||
# api-xxx-yyy 0/1 Init:CrashLoopBackOff 2
|
||||
|
||||
# See why
|
||||
kubectl logs -n reactbin <pod-name> -c alembic-migrate
|
||||
|
||||
# Fix the issue (e.g. correct DATABASE_URL in Vault, wait for VSO to resync)
|
||||
# Then delete the pod to force a fresh rollout
|
||||
kubectl rollout restart deployment/api -n reactbin
|
||||
```
|
||||
|
||||
## Scenario: Update to a new image version
|
||||
|
||||
```bash
|
||||
kubectl set image deployment/api api=reactbin-api:v1.1.0 -n reactbin
|
||||
kubectl set image deployment/ui ui=reactbin-ui:v1.1.0 -n reactbin
|
||||
# Kubernetes rolls out new pods; init container runs migrations before traffic switches
|
||||
```
|
||||
|
||||
## Scenario: Restore after MinIO pod restart
|
||||
|
||||
MinIO uses a PersistentVolumeClaim. Pod restarts do not affect stored data. Verify:
|
||||
|
||||
```bash
|
||||
kubectl delete pod -n reactbin minio-0
|
||||
kubectl get pods -n reactbin -w # minio-0 restarts, PVC reattaches
|
||||
# Previously uploaded images should still be accessible via the API
|
||||
```
|
||||
63
specs/013-k8s-manifests/research.md
Normal file
63
specs/013-k8s-manifests/research.md
Normal file
@@ -0,0 +1,63 @@
|
||||
# Research: Kubernetes Production Manifests
|
||||
|
||||
## Decision 1: VSO CRD chain (VaultConnection → VaultAuth → VaultStaticSecret)
|
||||
|
||||
**Decision**: Use three CRDs — `VaultConnection`, `VaultAuth`, and `VaultStaticSecret` — all under `apiVersion: secrets.hashicorp.com/v1beta1`.
|
||||
**Rationale**: This is the required VSO resource chain. `VaultConnection` points to the Vault server address. `VaultAuth` declares the Kubernetes auth method (role, service account, mount path). `VaultStaticSecret` references a `VaultAuth` via `vaultAuthRef` and declares the Vault KV path and the destination K8s Secret name. VSO syncs all Vault keys to the K8s Secret 1:1 by default — no explicit key mapping needed.
|
||||
**Alternatives considered**: `VaultAuthGlobal` for cross-namespace sharing — not needed; all resources are in the same `reactbin` namespace.
|
||||
|
||||
Key fields:
|
||||
- `VaultStaticSecret.spec.type`: `kv-v2` (standard for modern Vault)
|
||||
- `VaultStaticSecret.spec.refreshAfter`: `1h` (Go duration string)
|
||||
- `VaultStaticSecret.spec.destination.create: true` — VSO creates the K8s Secret if absent
|
||||
- `VaultAuth.spec.kubernetes.role` — a Vault role the operator must pre-create and bind to the `reactbin` namespace service account
|
||||
|
||||
## Decision 2: MinIO as StatefulSet (not Deployment)
|
||||
|
||||
**Decision**: Run MinIO as a `StatefulSet` with `volumeClaimTemplates`.
|
||||
**Rationale**: StatefulSet gives the pod a stable name (`minio-0`) and automatically reattaches its PVC on pod recreation. A Deployment would require a manually-created PVC and is prone to PVC binding issues on reschedule. The marginal complexity of a StatefulSet over a Deployment is acceptable. `ReadWriteOnce` PVC is correct for single-replica MinIO.
|
||||
**Alternatives considered**: Deployment with explicit PVC — works but PVC lifecycle is decoupled from the pod, creating operational risk.
|
||||
|
||||
MinIO health probes:
|
||||
- Liveness: `GET /minio/health/live:9000`
|
||||
- Readiness: `GET /minio/health/ready:9000`
|
||||
|
||||
MinIO env vars: `MINIO_ROOT_USER`, `MINIO_ROOT_PASSWORD` (injected from a K8s Secret synced by VSO).
|
||||
|
||||
## Decision 3: Bucket initialisation via Kubernetes Job with `minio/mc`
|
||||
|
||||
**Decision**: A one-off `Job` using `minio/mc:latest` runs `mc mb --ignore-existing` to create the bucket idempotently.
|
||||
**Rationale**: This is the standard in-cluster pattern. `--ignore-existing` makes the job safe to re-apply (exits 0 if bucket already exists). `restartPolicy: OnFailure` retries transient failures (e.g. MinIO not yet ready).
|
||||
**Alternatives considered**: Init container on the API pod — tightly couples bucket creation to API startup; a Job is cleaner and independently rerunnable.
|
||||
|
||||
## Decision 4: Ingress — single resource, `/api/` path before `/`
|
||||
|
||||
**Decision**: One `Ingress` resource with `ingressClassName: nginx`, two path entries in a single rule: `/api/` (Prefix) → API Service, `/` (Prefix) → UI Service; `/api/` must be listed first.
|
||||
**Rationale**: nginx ingress evaluates paths in declaration order; the more specific `/api/` prefix must appear before `/` or all traffic is routed to the UI. No path rewriting annotation is needed — the API already handles full `/api/v1/...` paths.
|
||||
**TLS**: cert-manager annotation `cert-manager.io/cluster-issuer: letsencrypt-prod` triggers automatic certificate provisioning into a K8s Secret named in `spec.tls[].secretName`. HTTP→HTTPS redirect is on by default when TLS is configured (`nginx.ingress.kubernetes.io/ssl-redirect: "true"` is explicit but redundant).
|
||||
**Alternatives considered**: Two separate Ingress resources (one per service) — works but harder to reason about routing order; single Ingress is canonical.
|
||||
|
||||
## Decision 5: Alembic init container — same image, workdir `/app`
|
||||
|
||||
**Decision**: The API Deployment includes an init container with the same image as the main container, `command: ["alembic", "upgrade", "head"]`, and `workingDir: /app`. It shares the API's env secret via `envFrom` so it can read `DATABASE_URL`.
|
||||
**Rationale**: Alembic needs `DATABASE_URL` to connect and `alembic.ini` + `alembic/` to find migrations. Both are available in the production image once `Dockerfile.prod` is updated. Using the same image guarantees the migration files match the running version.
|
||||
**Dockerfile.prod update required**: Add `COPY --chown=appuser:appgroup alembic/ ./alembic/` and `COPY --chown=appuser:appgroup alembic.ini .` in the runtime stage (not the builder stage — no compilation needed).
|
||||
**Alternatives considered**: Separate migration image — adds a second image to build and push on every release; unnecessary when the source image already has everything.
|
||||
|
||||
## Decision 6: Image tag strategy — placeholder `latest`, substituted at deploy time
|
||||
|
||||
**Decision**: Manifests reference image tags using `latest` as a documented placeholder. The operator substitutes the real tag with `kubectl set image` or a `sed` one-liner before applying.
|
||||
**Rationale**: Kustomize's `images` transformer is the clean alternative, but introduces a tooling dependency. For a personal single-operator deployment, `sed` or `kubectl set image` after `kubectl apply` is simpler and requires no additional setup. The placeholder is documented in the operator guide (quickstart.md).
|
||||
**Alternatives considered**: Kustomize overlays — appropriate for multi-environment setups; over-engineered for one environment.
|
||||
|
||||
## Decision 7: Two VaultStaticSecrets (API env and MinIO credentials)
|
||||
|
||||
**Decision**: Separate VaultStaticSecret resources for API env vars and MinIO root credentials, syncing into `api-env` and `minio-credentials` K8s Secrets respectively.
|
||||
**Rationale**: The API's env secret contains database, JWT, and S3 access credentials. MinIO's root credentials are a different concern with a different rotation lifecycle. Keeping them separate makes Vault policies simpler (least privilege) and avoids giving the API pod access to MinIO's root password.
|
||||
**Vault paths assumed**: `reactbin/api/config` (KV v2) for API env; `reactbin/minio/credentials` (KV v2) for MinIO root credentials.
|
||||
|
||||
## Decision 8: Namespace manifest included in `k8s/`
|
||||
|
||||
**Decision**: `k8s/namespace.yaml` creates the `reactbin` namespace as part of the manifest set.
|
||||
**Rationale**: Makes the full deployment self-contained — operator runs `kubectl apply -f k8s/` without a prerequisite namespace creation step.
|
||||
**Note**: If the namespace already exists, `kubectl apply` is idempotent.
|
||||
124
specs/013-k8s-manifests/spec.md
Normal file
124
specs/013-k8s-manifests/spec.md
Normal file
@@ -0,0 +1,124 @@
|
||||
# Feature Specification: Kubernetes Production Manifests
|
||||
|
||||
**Feature Branch**: `013-k8s-manifests`
|
||||
**Created**: 2026-05-07
|
||||
**Status**: Draft
|
||||
**Input**: User description: "Kubernetes manifests for production deployment to k3s: Deployment, Service, and Ingress for the API and UI; VaultStaticSecret CRDs to sync secrets from HashiCorp Vault; Alembic init container on the API Deployment for schema migrations. The cluster uses an nginx ingress controller with Let's Encrypt TLS, a shared external Postgres instance, MinIO running in-cluster, and VSO (Vault Secrets Operator) for secret management."
|
||||
|
||||
## User Scenarios & Testing *(mandatory)*
|
||||
|
||||
### User Story 1 — Application Reachable in Production (Priority: P1)
|
||||
|
||||
As an operator, I can apply the manifests to my k3s cluster and have both the API and UI reachable at the production domain over HTTPS, with all health checks passing.
|
||||
|
||||
**Why this priority**: This is the core deployment goal. Nothing else matters if the application is not reachable.
|
||||
|
||||
**Independent Test**: Apply the API and UI manifests with a manually-created K8s Secret (bypassing Vault). Confirm the UI loads at the domain root and the API health endpoint returns 200 at `/api/v1/health`. Confirm HTTPS is enforced and HTTP redirects to HTTPS.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the manifests are applied to the cluster, **When** a browser navigates to `https://<domain>/`, **Then** the UI loads successfully with a valid TLS certificate.
|
||||
2. **Given** the manifests are applied, **When** a request is made to `https://<domain>/api/v1/health`, **Then** a 200 response is returned.
|
||||
3. **Given** the API docs flag is disabled, **When** a request is made to `https://<domain>/docs`, **Then** a 404 is returned.
|
||||
4. **Given** the API pod is restarted, **When** it comes back up, **Then** it passes readiness checks before receiving traffic.
|
||||
5. **Given** a request for an unknown path, **When** it is made to the UI, **Then** the SPA serves the index page (client-side routing is preserved).
|
||||
|
||||
---
|
||||
|
||||
### User Story 2 — Secrets Sourced from Vault (Priority: P2)
|
||||
|
||||
As an operator, no secrets are stored in version-controlled manifest files. All sensitive values are declared in Vault and synced automatically into the cluster as Kubernetes Secrets by the Vault Secrets Operator.
|
||||
|
||||
**Why this priority**: Security prerequisite for production. Hardcoded secrets in manifests are a material risk.
|
||||
|
||||
**Independent Test**: Run `git grep` for known secret patterns across `k8s/` and confirm zero matches. Confirm VaultStaticSecret CRDs reference a Vault path and that the synced K8s Secret is created and the API pod's environment is populated from it.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** Vault contains the required secret values at the declared path, **When** VSO is running, **Then** a K8s Secret is created in the cluster namespace with the declared keys.
|
||||
2. **Given** the K8s Secret exists, **When** the API pod starts, **Then** its environment variables are populated from that secret.
|
||||
3. **Given** a `git grep` for plaintext credentials across `k8s/`, **When** run against the committed manifests, **Then** no plaintext secrets are found.
|
||||
|
||||
---
|
||||
|
||||
### User Story 3 — Schema Migrations Run Before API Starts (Priority: P3)
|
||||
|
||||
As an operator, every time the API is deployed, database migrations run automatically in an init container before the main application container starts. A failed migration prevents the pod from starting, protecting against schema drift.
|
||||
|
||||
**Why this priority**: Prevents the API from serving requests against a stale or incompatible schema. Safe deployment ordering is essential for production.
|
||||
|
||||
**Independent Test**: Deploy with the init container pointing at a valid database. Confirm migrations run and the API starts. Simulate a failing migration by pointing the init container at an unreachable database and confirm the pod stays in init state and does not serve traffic.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the API Deployment is applied, **When** the pod starts, **Then** the init container completes `alembic upgrade head` before the main container starts.
|
||||
2. **Given** the schema is already current, **When** the pod starts, **Then** the migration init container exits successfully with no changes applied.
|
||||
3. **Given** the migration fails, **When** the pod starts, **Then** the init container exits non-zero, the main container does not start, and the pod enters a visible error state.
|
||||
|
||||
---
|
||||
|
||||
### User Story 4 — MinIO Runs In-Cluster with Persistent Storage (Priority: P4)
|
||||
|
||||
As an operator, MinIO runs inside the cluster with a PersistentVolumeClaim for durable storage, is not externally reachable, and has the required bucket initialised on first deployment.
|
||||
|
||||
**Why this priority**: Required for image storage, but decoupled from the other manifests — the S3 endpoint is just a config value the API reads.
|
||||
|
||||
**Independent Test**: Confirm the MinIO pod is running and has no external Ingress. Confirm the required bucket exists. Restart the MinIO pod and confirm previously stored objects are still accessible.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the MinIO manifests are applied, **When** the MinIO pod starts, **Then** the required bucket is created and the API can store and retrieve images.
|
||||
2. **Given** the MinIO pod is restarted, **When** it comes back up, **Then** all previously stored objects remain accessible (PVC-backed storage persists).
|
||||
3. **Given** no Ingress is defined for MinIO, **When** a connection is attempted from outside the cluster, **Then** MinIO is not reachable.
|
||||
|
||||
---
|
||||
|
||||
### Edge Cases
|
||||
|
||||
- What if Vault is unavailable when VSO tries to sync? VSO retries on a configurable interval; the pod will not start until the K8s Secret exists.
|
||||
- What if the database is unreachable during migration? The init container exits non-zero; the pod does not start and Kubernetes retries with backoff.
|
||||
- What if the MinIO PVC runs out of space? MinIO will fail writes; the API will return upload errors. Capacity monitoring is out of scope for this feature.
|
||||
- What if migrations and the main container use different image tags? They use the same tag in the same Deployment spec, so they are always in sync.
|
||||
|
||||
## Requirements *(mandatory)*
|
||||
|
||||
### Functional Requirements
|
||||
|
||||
- **FR-001**: All manifests MUST target a single configurable namespace (default: `reactbin`).
|
||||
- **FR-002**: The API MUST be deployed as a Deployment with liveness and readiness probes on `/api/v1/health`.
|
||||
- **FR-003**: The API Deployment MUST include an init container using the same image that runs database schema migrations before the main container starts.
|
||||
- **FR-004**: The API Deployment MUST set `API_DOCS_ENABLED=false`.
|
||||
- **FR-005**: The UI MUST be deployed as a Deployment with a liveness probe confirming the nginx process is serving.
|
||||
- **FR-006**: A single Ingress MUST route `https://<domain>/api/` to the API Service and all other paths to the UI Service, with TLS termination via a cert-manager Let's Encrypt certificate.
|
||||
- **FR-007**: HTTP requests MUST be redirected to HTTPS via the Ingress.
|
||||
- **FR-008**: All API secrets MUST be declared in a VaultStaticSecret CRD and synced into a K8s Secret; no secret value MUST appear as plaintext in any manifest file.
|
||||
- **FR-009**: The API Deployment MUST source all environment variables from the synced K8s Secret via `envFrom`.
|
||||
- **FR-010**: MinIO MUST be deployed as a StatefulSet with a PersistentVolumeClaim using the cluster's default storage class.
|
||||
- **FR-011**: A Kubernetes Job MUST create the required S3 bucket in MinIO on first deployment and MUST be idempotent on re-apply.
|
||||
- **FR-012**: MinIO MUST have no Ingress; it MUST only be accessible within the cluster via ClusterIP.
|
||||
- **FR-013**: All containers MUST run as non-root users.
|
||||
- **FR-014**: The API production image MUST include migration files so the init container can run migrations without a separate image.
|
||||
|
||||
## Success Criteria *(mandatory)*
|
||||
|
||||
### Measurable Outcomes
|
||||
|
||||
- **SC-001**: The application is accessible at the production domain within 120 seconds of `kubectl apply`.
|
||||
- **SC-002**: Schema migrations complete and the API begins serving traffic without manual operator intervention on every deployment.
|
||||
- **SC-003**: A `git grep` across `k8s/` finds zero plaintext secret values in committed files.
|
||||
- **SC-004**: A simulated migration failure holds the pod in init state and the application never serves traffic.
|
||||
- **SC-005**: Restarting the MinIO pod does not result in data loss — previously uploaded images remain accessible.
|
||||
|
||||
## Assumptions
|
||||
|
||||
- The k3s cluster is running with the nginx ingress controller installed.
|
||||
- cert-manager is installed and a `ClusterIssuer` named `letsencrypt-prod` is already configured.
|
||||
- The Vault Secrets Operator is installed in the cluster.
|
||||
- A HashiCorp Vault instance is accessible from the cluster and the required secret values are stored at the declared Vault path before deployment.
|
||||
- A shared external PostgreSQL instance is available; the operator creates a dedicated database and user before deploying.
|
||||
- DNS for the production domain is already pointing at the cluster ingress IP.
|
||||
- Manifests are stored in a `k8s/` directory at the repository root.
|
||||
- The cluster's default storage class supports ReadWriteOnce (sufficient for single-replica MinIO).
|
||||
- All Deployments run a single replica (personal tool, no HA requirement).
|
||||
- Image tags are managed externally; manifests use a placeholder tag that the operator substitutes at deploy time.
|
||||
- The `API_DOCS_ENABLED` flag exists on the API (implemented in feature 012).
|
||||
174
specs/013-k8s-manifests/tasks.md
Normal file
174
specs/013-k8s-manifests/tasks.md
Normal file
@@ -0,0 +1,174 @@
|
||||
# Tasks: Kubernetes Production Manifests
|
||||
|
||||
**Input**: Design documents from `specs/013-k8s-manifests/`
|
||||
**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, contracts/operator-deploy.md ✅, quickstart.md ✅
|
||||
|
||||
**Tests**: K8s manifests have no unit test framework. Validation is via `yamllint` (format) and `kubectl apply --dry-run=client` (schema). Each phase ends with a validation step. The TDD analogue is: write the validate-k8s Makefile target (Phase 1) before any manifest exists, so it immediately fails — then manifests are written to make it pass.
|
||||
|
||||
**Organization**: Phase 1 creates the directory structure and validation target. Phase 2 creates the namespace and Vault CRDs (foundational — required by all user story deployments). Phases 3–6 implement user stories. Phase 7 polishes.
|
||||
|
||||
## Format: `[ID] [P?] [Story] Description`
|
||||
|
||||
- **[P]**: Can run in parallel with other [P] tasks in the same phase
|
||||
- **[Story]**: Which user story this task belongs to
|
||||
- Exact file paths included in every task description
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Setup
|
||||
|
||||
**Goal**: Create the `k8s/` directory structure and the validation Makefile target before any manifests exist.
|
||||
|
||||
- [X] T001 Create the `k8s/` directory tree: `mkdir -p k8s/api k8s/ui k8s/minio k8s/vault` from the repository root; confirm the four subdirectories exist
|
||||
|
||||
- [X] T002 Add a `validate-k8s` target to `Makefile` immediately after the existing `verify-ui-prod` target: the target MUST run `yamllint -d relaxed k8s/` then `kubectl apply --dry-run=client -f k8s/`; add `validate-k8s` to the `.PHONY` line; note in a comment that `kubectl apply --dry-run=client` requires a kubeconfig with cluster access — offline validation uses `yamllint` only
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Foundational (Namespace + Vault CRDs)
|
||||
|
||||
**Goal**: Namespace and Vault secret-sync resources that every other manifest depends on.
|
||||
|
||||
**⚠️ CRITICAL**: No user story manifest can be applied until this phase is complete — the namespace must exist before any namespaced resource, and the Vault CRDs must exist before the API or MinIO pods can start.
|
||||
|
||||
- [X] T003 Create `k8s/namespace.yaml`: a single `Namespace` resource with `name: reactbin` and no additional labels
|
||||
|
||||
- [X] T004 [P] Create `k8s/vault/vault-auth.yaml`: a `VaultAuth` resource (`apiVersion: secrets.hashicorp.com/v1beta1`) with `name: reactbin-auth`, `namespace: reactbin`, `spec.method: kubernetes`, `spec.mount: kubernetes`, `spec.kubernetes.role: reactbin`, `spec.kubernetes.serviceAccount: default`, `spec.kubernetes.audiences: [https://kubernetes.default.svc]`; add a comment noting the operator must create the Vault role and bind it to the `default` SA in the `reactbin` namespace with read access to both secret paths
|
||||
|
||||
- [X] T005 [P] Create `k8s/vault/api-secret.yaml`: a `VaultStaticSecret` resource with `name: api-secret`, `namespace: reactbin`, `spec.vaultAuthRef: reactbin-auth`, `spec.mount: secret`, `spec.type: kv-v2`, `spec.path: reactbin/api/config`, `spec.refreshAfter: 1h`, `spec.destination.name: api-env`, `spec.destination.create: true`; add a comment listing all required Vault keys: `DATABASE_URL`, `JWT_SECRET_KEY`, `OWNER_USERNAME`, `OWNER_PASSWORD`, `S3_ENDPOINT_URL`, `S3_BUCKET_NAME`, `S3_ACCESS_KEY_ID`, `S3_SECRET_ACCESS_KEY`, `API_BASE_URL`
|
||||
|
||||
- [X] T006 [P] Create `k8s/vault/minio-secret.yaml`: same structure as T005 but `name: minio-secret`, `spec.path: reactbin/minio/credentials`, `spec.destination.name: minio-credentials`; comment listing required Vault keys: `MINIO_ROOT_USER`, `MINIO_ROOT_PASSWORD`
|
||||
|
||||
**Checkpoint**: Foundational resources complete. User story implementation can now begin.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: User Story 1 — Application Reachable in Production (Priority: P1) 🎯 MVP
|
||||
|
||||
**Goal**: API and UI are deployed and reachable at the production domain via HTTPS with TLS from cert-manager.
|
||||
|
||||
**Independent Test**: Apply all Phase 2 + Phase 3 manifests. Confirm `kubectl get pods -n reactbin` shows api and ui pods Running. Confirm `curl https://<domain>/api/v1/health` returns 200 and `curl https://<domain>/` returns 200.
|
||||
|
||||
- [X] T007 [P] [US1] Create `k8s/api/service.yaml`: `Service`, `name: api`, `namespace: reactbin`, `type: ClusterIP`, `selector: {app: api}`, `ports: [{port: 8000, targetPort: 8000, name: http}]`
|
||||
|
||||
- [X] T008 [P] [US1] Create `k8s/ui/service.yaml`: `Service`, `name: ui`, `namespace: reactbin`, `type: ClusterIP`, `selector: {app: ui}`, `ports: [{port: 8080, targetPort: 8080, name: http}]`
|
||||
|
||||
- [X] T009 [P] [US1] Create `k8s/ui/deployment.yaml`: `Deployment`, `name: ui`, `namespace: reactbin`, 1 replica, `selector.matchLabels: {app: ui}`; container `name: ui`, `image: reactbin-ui:latest` (placeholder — operator substitutes real tag), `ports: [{containerPort: 8080}]`; `livenessProbe: {httpGet: {path: /, port: 8080}, initialDelaySeconds: 10, periodSeconds: 30}`; `securityContext: {runAsNonRoot: true, runAsUser: 101}` (UID 101 is the nginxinc/nginx-unprivileged user); add comment: `# Replace 'latest' with the real image tag before applying`
|
||||
|
||||
- [X] T010 [US1] Create `k8s/api/deployment.yaml`: `Deployment`, `name: api`, `namespace: reactbin`, 1 replica, `selector.matchLabels: {app: api}`; container `name: api`, `image: reactbin-api:latest` (placeholder), `ports: [{containerPort: 8000}]`; `envFrom: [{secretRef: {name: api-env}}]`; `env: [{name: API_DOCS_ENABLED, value: "false"}]`; `livenessProbe: {httpGet: {path: /api/v1/health, port: 8000}, initialDelaySeconds: 10, periodSeconds: 30}`; `readinessProbe: {httpGet: {path: /api/v1/health, port: 8000}, initialDelaySeconds: 5, periodSeconds: 10}`; `securityContext: {runAsNonRoot: true, runAsUser: 1001}`; add comment: `# initContainers block added in US3 (T015)`; add comment: `# Replace 'latest' with the real image tag before applying`
|
||||
|
||||
- [X] T011 [US1] Create `k8s/ingress.yaml`: `Ingress`, `name: reactbin`, `namespace: reactbin`; `annotations: {"cert-manager.io/cluster-issuer": "letsencrypt-prod", "nginx.ingress.kubernetes.io/ssl-redirect": "true"}`; `spec.ingressClassName: nginx`; `spec.tls: [{hosts: ["<your-domain>"], secretName: reactbin-tls}]`; `spec.rules: [{host: "<your-domain>", http: {paths: [{path: /api/, pathType: Prefix, backend: {service: {name: api, port: {number: 8000}}}}, {path: /, pathType: Prefix, backend: {service: {name: ui, port: {number: 8080}}}}]}}]`; IMPORTANT — `/api/` path entry MUST appear before `/` in the YAML (nginx evaluates in declaration order); add comment: `# Replace <your-domain> with the real domain before applying`
|
||||
|
||||
- [X] T012 [US1] Verify US1: run `yamllint -d relaxed k8s/` from the repository root and confirm no errors; run `kubectl apply --dry-run=client -f k8s/` (requires cluster kubeconfig) and confirm all resources in namespace.yaml, vault/, api/, ui/, and ingress.yaml are accepted; if no cluster is available, yamllint passing is sufficient for this checkpoint
|
||||
|
||||
**Checkpoint**: US1 complete. API and UI manifests are schema-valid and ready to apply.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: User Story 2 — Secrets Sourced from Vault (Priority: P2)
|
||||
|
||||
**Goal**: Confirm that no plaintext secret values appear in any committed manifest file. The implementation (VaultAuth + VaultStaticSecret × 2) was completed in Phase 2.
|
||||
|
||||
**Independent Test**: `git grep` across `k8s/` finds no plaintext credential values.
|
||||
|
||||
- [X] T013 [US2] Verify US2: run `git grep -rn "password\|secret_key\|access_key\|DATABASE_URL" k8s/` and confirm that only key names (in comments) and Vault path references appear — no actual values; also confirm that `k8s/vault/api-secret.yaml` and `k8s/vault/minio-secret.yaml` reference Vault paths under `spec.path` and that `spec.destination.create: true` is set so VSO creates the K8s Secrets
|
||||
|
||||
**Checkpoint**: US2 complete. Zero plaintext secrets in manifests; all secrets flow through Vault.
|
||||
|
||||
---
|
||||
|
||||
## Phase 5: User Story 3 — Schema Migrations Run Before API Starts (Priority: P3)
|
||||
|
||||
**Goal**: The API Deployment includes an Alembic init container. `api/Dockerfile.prod` is updated to include migration files.
|
||||
|
||||
**Independent Test**: `docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:test` succeeds and `docker run --rm reactbin-api-prod:test ls /app/alembic` shows migration files. `make validate-k8s` confirms the init container spec is accepted by the Kubernetes schema.
|
||||
|
||||
- [X] T014 [US3] Update `api/Dockerfile.prod`: in the **runtime stage** (the `FROM python:3.12-slim` stage), after the line `COPY --chown=appuser:appgroup app/ ./app/`, add two new lines: `COPY --chown=appuser:appgroup alembic/ ./alembic/` and `COPY --chown=appuser:appgroup alembic.ini .`; the builder stage is unchanged; verify with `docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:test && docker run --rm reactbin-api-prod:test ls /app/alembic /app/alembic.ini`
|
||||
|
||||
- [X] T015 [US3] Update `k8s/api/deployment.yaml`: add an `initContainers` block to the pod spec (before the `containers` block) containing one init container: `name: alembic-migrate`, `image: reactbin-api:latest` (same placeholder tag as the main container), `command: ["alembic", "upgrade", "head"]`, `workingDir: /app`, `envFrom: [{secretRef: {name: api-env}}]`, `securityContext: {runAsNonRoot: true, runAsUser: 1001}`; remove the `# initContainers block added in US3 (T015)` comment added in T010
|
||||
|
||||
- [X] T016 [US3] Verify US3: run `make validate-k8s` (or `yamllint -d relaxed k8s/`) and confirm the updated deployment.yaml with the init container passes validation; run `docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:test` and confirm it succeeds; run `docker run --rm reactbin-api-prod:test ls /app/alembic.ini` and confirm the file is present
|
||||
|
||||
**Checkpoint**: US3 complete. API Deployment includes Alembic init container; production image includes migration files.
|
||||
|
||||
---
|
||||
|
||||
## Phase 6: User Story 4 — MinIO In-Cluster with Persistent Storage (Priority: P4)
|
||||
|
||||
**Goal**: MinIO runs as a StatefulSet with a PVC, is accessible only within the cluster, and has the required bucket created by a Job.
|
||||
|
||||
**Independent Test**: `make validate-k8s` confirms all MinIO manifests pass schema validation. On a live cluster: MinIO pod reaches Running state, bucket exists, no external Ingress for MinIO.
|
||||
|
||||
- [X] T017 [P] [US4] Create `k8s/minio/service.yaml`: `Service`, `name: minio`, `namespace: reactbin`, `type: ClusterIP`, `selector: {app: minio}`, `ports: [{port: 9000, targetPort: 9000, name: s3}]`; add comment: `# No Ingress for MinIO — internal access only (FR-012)`
|
||||
|
||||
- [X] T018 [US4] Create `k8s/minio/statefulset.yaml`: `StatefulSet` (NOT Deployment — StatefulSet ensures stable PVC binding on pod recreation), `name: minio`, `namespace: reactbin`, `replicas: 1`, `selector.matchLabels: {app: minio}`, `serviceName: minio`; pod `securityContext: {runAsUser: 1000, runAsGroup: 1000, fsGroup: 1000}`; container `name: minio`, `image: minio/minio:latest`, `args: ["server", "/data", "--console-address", ":9001"]`, `ports: [{containerPort: 9000, name: s3}]`; `env: [{name: MINIO_ROOT_USER, valueFrom: {secretKeyRef: {name: minio-credentials, key: MINIO_ROOT_USER}}}, {name: MINIO_ROOT_PASSWORD, valueFrom: {secretKeyRef: {name: minio-credentials, key: MINIO_ROOT_PASSWORD}}}]`; `livenessProbe: {httpGet: {path: /minio/health/live, port: 9000}, initialDelaySeconds: 30, periodSeconds: 20}`; `readinessProbe: {httpGet: {path: /minio/health/ready, port: 9000}, initialDelaySeconds: 15, periodSeconds: 10}`; `volumeMounts: [{name: minio-data, mountPath: /data}]`; `volumeClaimTemplates: [{metadata: {name: minio-data}, spec: {accessModes: [ReadWriteOnce], resources: {requests: {storage: 10Gi}}}}]`; add comment: `# storageClassName omitted — uses cluster default; override if needed`
|
||||
|
||||
- [X] T019 [US4] Create `k8s/minio/init-job.yaml`: `Job`, `name: minio-init-bucket`, `namespace: reactbin`; `spec.template.spec.restartPolicy: OnFailure`; container `name: mc`, `image: minio/mc:latest`, `command: ["sh", "-c"]`, `args: ["mc alias set local http://minio.reactbin.svc.cluster.local:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD && mc mb --ignore-existing local/reactbin"]`; `env: [{name: MINIO_ROOT_USER, valueFrom: {secretKeyRef: {name: minio-credentials, key: MINIO_ROOT_USER}}}, {name: MINIO_ROOT_PASSWORD, valueFrom: {secretKeyRef: {name: minio-credentials, key: MINIO_ROOT_PASSWORD}}}]`; `securityContext: {runAsNonRoot: false}` with comment `# minio/mc runs as root by default; FR-013 exception for this one-off init Job`; add comment: `# --ignore-existing makes this Job idempotent — safe to re-apply`
|
||||
|
||||
- [X] T020 [US4] Verify US4: run `make validate-k8s` (or `yamllint -d relaxed k8s/`) and confirm all three MinIO manifests (statefulset.yaml, service.yaml, init-job.yaml) pass validation; confirm no Ingress resource references MinIO
|
||||
|
||||
**Checkpoint**: All four user stories complete.
|
||||
|
||||
---
|
||||
|
||||
## Phase 7: Polish & Cross-Cutting Concerns
|
||||
|
||||
- [X] T021 [P] Run `yamllint -d relaxed k8s/` from the repository root and fix any YAML formatting violations across all 12 manifest files; confirm output shows no errors
|
||||
|
||||
- [X] T022 [P] Add `.yamllint.yml` at the repository root (if not already present) with `extends: relaxed` and `rules: {line-length: {max: 120}}` to keep line length reasonable for verbose K8s YAML
|
||||
|
||||
- [X] T023 Run `make build-prod` to confirm `api/Dockerfile.prod` still builds cleanly after the T014 addition; run `docker run --rm reactbin-api-prod:latest ls /app/alembic.ini /app/alembic/` and confirm both are present in the production image
|
||||
|
||||
---
|
||||
|
||||
## Dependencies & Execution Order
|
||||
|
||||
- T001 and T002 can run in parallel (directory creation vs Makefile edit)
|
||||
- T003, T004, T005, T006 can run in parallel after T001 (different files, same phase)
|
||||
- T007, T008, T009 can run in parallel after Phase 2 completes
|
||||
- T010 after T007 (deployment references service name, easier to write with service done) — but they're different files so technically parallel; keep sequential for clarity
|
||||
- T011 after T007 and T008 (Ingress references both service names)
|
||||
- T012 after T007–T011
|
||||
- T013 after Phase 2 (Vault CRDs exist to inspect)
|
||||
- T014 and T015 can run in parallel (different files: Dockerfile.prod vs deployment.yaml)
|
||||
- T016 after T014 and T015
|
||||
- T017, T018, T019 can run in parallel after Phase 2 completes
|
||||
- T020 after T017–T019
|
||||
- T021, T022, T023 can run in parallel
|
||||
|
||||
### Execution Order Summary
|
||||
|
||||
```
|
||||
Step 1: T001 ∥ T002 (setup)
|
||||
Step 2: T003 ∥ T004 ∥ T005 ∥ T006 (foundational: namespace + Vault CRDs)
|
||||
Step 3: T007 ∥ T008 ∥ T009 (US1: services + UI deployment)
|
||||
Step 4: T010 (US1: API deployment)
|
||||
Step 5: T011 (US1: Ingress)
|
||||
Step 6: T012 (US1: validate)
|
||||
Step 7: T013 (US2: verify no plaintext secrets)
|
||||
Step 8: T014 ∥ T015 (US3: Dockerfile.prod + init container)
|
||||
Step 9: T016 (US3: verify)
|
||||
Step 10: T017 ∥ T018 ∥ T019 (US4: MinIO manifests)
|
||||
Step 11: T020 (US4: validate MinIO)
|
||||
Step 12: T021 ∥ T022 ∥ T023 (polish)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Strategy
|
||||
|
||||
### MVP (US1 + US2 — application is reachable with Vault-backed secrets)
|
||||
|
||||
1. Phase 1 (Setup) + Phase 2 (Foundational)
|
||||
2. Phase 3 (US1 — API, UI, Ingress)
|
||||
3. Phase 4 (US2 — verify no plaintext secrets)
|
||||
4. **STOP and VALIDATE**: apply to cluster, confirm `https://<domain>/` and `/api/v1/health` return 200
|
||||
5. Deploy MVP
|
||||
|
||||
### Incremental Delivery
|
||||
|
||||
1. Setup + Foundational → Apply → namespace and Vault sync ready
|
||||
2. Add US1 (API + UI + Ingress) → Deploy → application reachable at domain
|
||||
3. Add US3 (Alembic init container) → Deploy → migrations run automatically on rollout
|
||||
4. Add US4 (MinIO) → Deploy → persistent image storage in-cluster
|
||||
5. Polish → clean YAML, confirmed builds
|
||||
Reference in New Issue
Block a user