Compare commits
18 Commits
265b967f6b
...
v1.0.0
| Author | SHA1 | Date | |
|---|---|---|---|
| 9db20fdf90 | |||
| 9b66fe1918 | |||
| e9a2e9f014 | |||
| 7b3d4a9257 | |||
| 7c57629941 | |||
| 4fe8b19d19 | |||
| e34c9f7b7f | |||
| 551ddbec3b | |||
| 666c32cd69 | |||
| bf27c97deb | |||
| ce279e6121 | |||
| b14508e4cf | |||
| 602648ef56 | |||
| 1b3468b72d | |||
| 12176471e1 | |||
| 7a835d3172 | |||
| f3e0021ee8 | |||
| 354c85292d |
12
.env.example
12
.env.example
@@ -19,3 +19,15 @@ JWT_SECRET_KEY=change-me-to-a-long-random-string
|
||||
JWT_EXPIRY_SECONDS=86400
|
||||
OWNER_USERNAME=owner
|
||||
OWNER_PASSWORD=change-me
|
||||
|
||||
# Login brute-force protection
|
||||
LOGIN_MAX_FAILURES=5
|
||||
LOGIN_WINDOW_SECONDS=300
|
||||
LOGIN_COOLDOWN_SECONDS=900
|
||||
# Comma-separated IPs/CIDRs of trusted upstream proxies (e.g. nginx ingress pod CIDR).
|
||||
# Leave empty when not behind a reverse proxy.
|
||||
LOGIN_TRUSTED_PROXY_IPS=
|
||||
|
||||
# API documentation endpoints (Swagger UI, ReDoc, OpenAPI schema)
|
||||
# Set to false in production to avoid exposing the API surface publicly.
|
||||
API_DOCS_ENABLED=true
|
||||
|
||||
36
.env.test.example
Normal file
36
.env.test.example
Normal file
@@ -0,0 +1,36 @@
|
||||
# Integration test environment variables
|
||||
# Used when running pytest directly on the host (outside Docker).
|
||||
#
|
||||
# Start test services first:
|
||||
# docker compose -f docker-compose.test.yml up -d postgres-test minio-test minio-init-test
|
||||
#
|
||||
# Then source this file and run tests:
|
||||
# export $(grep -v '^#' .env.test.example | xargs)
|
||||
# cd api && python -m pytest tests/integration/ -v
|
||||
|
||||
# PostgreSQL test database (postgres-test container on host port 5433)
|
||||
TEST_DATABASE_URL=postgresql+asyncpg://reactbin:reactbin@localhost:5433/reactbin_test
|
||||
DATABASE_URL=postgresql+asyncpg://reactbin:reactbin@localhost:5433/reactbin_test
|
||||
|
||||
# MinIO test instance (minio-test container on host port 9002)
|
||||
S3_ENDPOINT_URL=http://localhost:9002
|
||||
S3_BUCKET_NAME=reactbin-test
|
||||
S3_ACCESS_KEY_ID=minioadmin
|
||||
S3_SECRET_ACCESS_KEY=minioadmin
|
||||
S3_REGION=us-east-1
|
||||
|
||||
# Auth (test values — not for production)
|
||||
JWT_SECRET_KEY=test-secret-key-for-testing-only
|
||||
OWNER_USERNAME=testowner
|
||||
OWNER_PASSWORD=testpassword
|
||||
|
||||
# API
|
||||
API_BASE_URL=http://localhost:8000
|
||||
MAX_UPLOAD_BYTES=52428800
|
||||
|
||||
# Login brute-force protection
|
||||
LOGIN_MAX_FAILURES=5
|
||||
LOGIN_WINDOW_SECONDS=300
|
||||
LOGIN_COOLDOWN_SECONDS=900
|
||||
# Comma-separated IPs/CIDRs of trusted upstream proxies; leave empty for direct connections.
|
||||
LOGIN_TRUSTED_PROXY_IPS=
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -5,6 +5,7 @@ notes/
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
!.env.test.example
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
@@ -15,6 +16,8 @@ venv/
|
||||
*.egg-info/
|
||||
dist/
|
||||
build/
|
||||
!api/tests/build/
|
||||
!ui/tests/build/
|
||||
.pytest_cache/
|
||||
.ruff_cache/
|
||||
.coverage
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 352 KiB After Width: | Height: | Size: 1.2 MiB |
@@ -1 +1,3 @@
|
||||
{"feature_directory":"specs/007-tag-browser"}
|
||||
{
|
||||
"feature_directory": "specs/013-k8s-manifests"
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
<!--
|
||||
SYNC IMPACT REPORT
|
||||
==================
|
||||
Version change: 1.1.1 → 1.2.0
|
||||
Ratified: 2026-05-01 | Last amended: 2026-05-03
|
||||
Version change: 1.3.0 → 1.4.0
|
||||
Ratified: 2026-05-01 | Last amended: 2026-05-08
|
||||
|
||||
Principles introduced (first population from docs/CONSTITUTION.md):
|
||||
- §2 Architecture Principles (6 sub-principles)
|
||||
@@ -94,10 +94,11 @@ The constitution acknowledges all three; the spec governs which is built.
|
||||
|
||||
### 2.5 Database abstraction
|
||||
|
||||
PostgreSQL is the Phase 1 database. All DB access MUST go through a repository
|
||||
layer (one repository class per domain aggregate). Raw SQL or an ORM is
|
||||
acceptable, but no query logic MAY live outside a repository. This makes the
|
||||
planned PostgreSQL → SQLite refactor a repository-layer change only.
|
||||
PostgreSQL is the database. All DB access MUST go through a repository layer
|
||||
(one repository class per domain aggregate). Raw SQL or an ORM is acceptable,
|
||||
but no query logic MAY live outside a repository. No alternative database
|
||||
engine (SQLite, DuckDB, in-memory substitutes) MAY be used in integration
|
||||
tests — dialect differences mask production bugs.
|
||||
|
||||
### 2.6 No speculative abstraction
|
||||
|
||||
@@ -170,17 +171,23 @@ OR/NOT logic is explicitly out of scope until the constitution is revised.
|
||||
|
||||
## 5. Testing Discipline
|
||||
|
||||
### 5.1 TDD is non-negotiable
|
||||
### 5.1 Tests are required alongside every implementation task
|
||||
|
||||
No production code MAY be written before a failing test exists for it. This
|
||||
applies to both API and UI. Tasks MUST include a "write failing test" step
|
||||
before any implementation step.
|
||||
Every implementation task MUST be accompanied by tests covering its behaviour.
|
||||
The ideal is red-green-refactor: write a failing test, then make it pass. In
|
||||
practice, tests written in the same task as the implementation are acceptable;
|
||||
what is non-negotiable is that no implementation task is marked done without
|
||||
corresponding test coverage. Tasks MUST NOT be split such that implementation
|
||||
is complete but tests are deferred to a later task.
|
||||
|
||||
### 5.2 Test pyramid
|
||||
|
||||
- **Unit tests** — pure logic, repository mocks, no I/O
|
||||
- **Integration tests** — API routes tested against a real (test) database
|
||||
and a real (test) S3-compatible bucket (e.g. MinIO in Docker)
|
||||
- **Integration tests** — API routes tested against a real PostgreSQL instance
|
||||
and a real S3-compatible bucket (e.g. MinIO in Docker). SQLite and other
|
||||
in-memory database substitutes are **prohibited** — PostgreSQL-specific
|
||||
behaviour (GROUP BY enforcement, JSON operators, constraint handling) MUST
|
||||
be exercised by the test suite.
|
||||
- **E2E tests** — Angular + API, minimal set covering the core happy paths
|
||||
|
||||
Unit and integration tests are required. E2E tests are best-effort in v1.
|
||||
@@ -190,10 +197,15 @@ Unit and integration tests are required. E2E tests are best-effort in v1.
|
||||
API tests in `api/tests/`, UI tests colocated with their components. No
|
||||
separate top-level `tests/` directory that mirrors the source tree.
|
||||
|
||||
### 5.4 CI must pass before any task is considered done
|
||||
### 5.4 The test suite must pass before any task is considered done
|
||||
|
||||
"Done" means: all tests pass, linter passes, type checker passes. A task MUST
|
||||
NOT be marked complete while CI is failing.
|
||||
NOT be marked complete while any of these are failing.
|
||||
|
||||
The acceptance gate is `make test-unit && make test-integration` plus `ruff
|
||||
check` / `ruff format --check` for the API. A formal CI pipeline is planned
|
||||
but not yet in place; until one exists, passing the above commands locally is
|
||||
the required gate. When CI is introduced it MUST enforce the same checks.
|
||||
|
||||
---
|
||||
|
||||
@@ -210,6 +222,9 @@ NOT be marked complete while CI is failing.
|
||||
| UI framework | Angular (latest stable) | Job-relevant, learning goal |
|
||||
| UI language | TypeScript strict mode | No `any`, no implicit types |
|
||||
| Containerisation | Docker + Docker Compose | Local dev must start with one command |
|
||||
| Production runtime | k3s (Kubernetes) | Manifests in `k8s/`; see deployment docs |
|
||||
| Ingress | nginx ingress controller + cert-manager | TLS via Let's Encrypt (`letsencrypt-prod` ClusterIssuer) |
|
||||
| Secret management | HashiCorp Vault + VSO (Vault Secrets Operator) | Secrets never committed; VSO syncs Vault KV v2 → K8s Secrets |
|
||||
|
||||
---
|
||||
|
||||
@@ -247,6 +262,15 @@ revised:
|
||||
- Mobile-native app
|
||||
- OIDC auth (planned Phase 3)
|
||||
|
||||
**Known gaps carried forward from v1** — these are not out of scope; they are
|
||||
acknowledged deficiencies that MUST be resolved before the affected area is
|
||||
expanded:
|
||||
|
||||
- **Password hashing**: The owner password is currently stored and compared in
|
||||
plaintext. Hashing (bcrypt or Argon2) MUST be implemented before any
|
||||
additional authentication work (e.g. OIDC, additional accounts) is started.
|
||||
Specs that touch credential storage MUST address this first.
|
||||
|
||||
---
|
||||
|
||||
## 9. Governance
|
||||
@@ -284,7 +308,9 @@ Phase 1 design is complete.
|
||||
| 1.1.0 | 2026-05-02 | Adopted into Spec Kit memory; fixed duplicate §4.3 → §4.4; strengthened "should" language to MUST/MUST NOT; added §9 Governance |
|
||||
| 1.1.1 | 2026-05-03 | Clarify that the only acceptable form of image transformation or editing is thumbnail generation |
|
||||
| 1.2.0 | 2026-05-03 | §2.4: Mark Phase 2 (JWT bearer auth) complete, reword phase status; §6: Add PyJWT to tech stack table; §8: Remove username/password auth from out-of-scope (now shipped) |
|
||||
| 1.3.0 | 2026-05-06 | §2.5: Remove planned PostgreSQL → SQLite refactor note; prohibit alternative database engines in integration tests. §5.2: Explicitly require PostgreSQL for integration tests; prohibit SQLite — a production HAVING/GROUP BY bug was masked by SQLite's permissive dialect. |
|
||||
| 1.4.0 | 2026-05-08 | §5.1: Soften strict TDD wording to reflect actual practice — tests alongside implementation are acceptable; deferring tests to a later task is not. §5.4: Replace "CI must pass" with local test suite gate; note CI is planned but not yet in place. §6: Add production runtime rows (k3s, nginx ingress + cert-manager, Vault + VSO). §8: Add "known gaps" subsection; document plaintext password storage as a deficiency that must be resolved before further auth work. |
|
||||
|
||||
---
|
||||
|
||||
**Version**: 1.2.0 | **Ratified**: 2026-05-01 | **Last Amended**: 2026-05-03
|
||||
**Version**: 1.4.0 | **Ratified**: 2026-05-01 | **Last Amended**: 2026-05-08
|
||||
|
||||
4
.yamllint.yml
Normal file
4
.yamllint.yml
Normal file
@@ -0,0 +1,4 @@
|
||||
extends: relaxed
|
||||
rules:
|
||||
line-length:
|
||||
max: 120
|
||||
@@ -1,5 +1,5 @@
|
||||
<!-- SPECKIT START -->
|
||||
For additional context about technologies to be used, project structure,
|
||||
shell commands, and other important information, read the current plan at
|
||||
`specs/007-tag-browser/plan.md`.
|
||||
`specs/013-k8s-manifests/plan.md`.
|
||||
<!-- SPECKIT END -->
|
||||
|
||||
25
Makefile
Normal file
25
Makefile
Normal file
@@ -0,0 +1,25 @@
|
||||
.PHONY: test-unit test-integration build-prod verify-prod build-ui-prod verify-ui-prod validate-k8s
|
||||
|
||||
test-unit:
|
||||
cd api && python -m pytest tests/unit/ -v
|
||||
|
||||
test-integration:
|
||||
docker compose -f docker-compose.test.yml build api-test
|
||||
docker compose -f docker-compose.test.yml run --rm api-test
|
||||
|
||||
build-prod:
|
||||
docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:latest
|
||||
|
||||
verify-prod:
|
||||
bash api/tests/build/verify_production_image.sh
|
||||
|
||||
build-ui-prod:
|
||||
docker build -f ui/Dockerfile.prod ui/ -t reactbin-ui-prod:latest
|
||||
|
||||
verify-ui-prod:
|
||||
bash ui/tests/build/verify_production_image.sh
|
||||
|
||||
# Offline: yamllint only. Online (requires kubeconfig): kubectl apply --dry-run=client -f k8s/
|
||||
validate-k8s:
|
||||
yamllint -d relaxed k8s/
|
||||
kubectl apply --dry-run=client -f k8s/
|
||||
138
README.md
138
README.md
@@ -2,3 +2,141 @@
|
||||
_Organize your reaction images._
|
||||
|
||||

|
||||
|
||||
A self-hosted reaction image board. Single owner account, tag-based browsing, S3-compatible image storage.
|
||||
|
||||
---
|
||||
|
||||
## Local development
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
# Edit .env — defaults work out of the box for local dev
|
||||
docker compose up
|
||||
```
|
||||
|
||||
- UI: http://localhost:4200
|
||||
- API: http://localhost:8000
|
||||
- MinIO console: http://localhost:9001 (minioadmin / minioadmin)
|
||||
|
||||
The API serves on port 8000 directly in dev. In production the nginx ingress routes `/api/` there.
|
||||
|
||||
### Running tests
|
||||
|
||||
```bash
|
||||
make test-unit # pytest unit tests (no Docker)
|
||||
make test-integration # builds api-test image, runs full suite against Postgres + MinIO
|
||||
```
|
||||
|
||||
### Production image builds
|
||||
|
||||
```bash
|
||||
make build-prod # builds reactbin-api-prod:latest from api/Dockerfile.prod
|
||||
make verify-prod # smoke-tests the production image
|
||||
make build-ui-prod # builds reactbin-ui-prod:latest from ui/Dockerfile.prod
|
||||
make verify-ui-prod # smoke-tests the production UI image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Production deployment (k3s)
|
||||
|
||||
### Cluster prerequisites
|
||||
|
||||
- nginx ingress controller
|
||||
- cert-manager with a `letsencrypt-prod` ClusterIssuer
|
||||
- Vault Secrets Operator (VSO) installed and connected to Vault
|
||||
- Vault KV v2 secrets populated (see below)
|
||||
|
||||
### Vault secrets
|
||||
|
||||
Two KV v2 paths. VSO syncs these into Kubernetes Secrets automatically.
|
||||
|
||||
**`reactbin/api/config`** → K8s Secret `api-env`
|
||||
|
||||
| Key | Notes |
|
||||
|-----|-------|
|
||||
| `DATABASE_URL` | `postgresql+asyncpg://user:pass@host:5432/db` |
|
||||
| `JWT_SECRET_KEY` | Long random string — `openssl rand -base64 48` |
|
||||
| `OWNER_USERNAME` | Login username |
|
||||
| `OWNER_PASSWORD` | Login password |
|
||||
| `S3_ENDPOINT_URL` | `http://minio.reactbin.svc.cluster.local:9000` |
|
||||
| `S3_BUCKET_NAME` | `reactbin` |
|
||||
| `S3_ACCESS_KEY_ID` | Same value as `MINIO_ROOT_USER` |
|
||||
| `S3_SECRET_ACCESS_KEY` | Same value as `MINIO_ROOT_PASSWORD` |
|
||||
| `API_BASE_URL` | `https://<your-domain>` |
|
||||
| `LOGIN_TRUSTED_PROXY_IPS` | Pod CIDR of nginx ingress pods, e.g. `10.42.0.0/16` — needed for per-client login rate limiting behind the ingress |
|
||||
|
||||
**`reactbin/minio/credentials`** → K8s Secret `minio-credentials`
|
||||
|
||||
| Key | Notes |
|
||||
|-----|-------|
|
||||
| `MINIO_ROOT_USER` | MinIO admin username |
|
||||
| `MINIO_ROOT_PASSWORD` | `openssl rand -base64 32` |
|
||||
|
||||
### Apply order
|
||||
|
||||
```bash
|
||||
# 1. Namespace first
|
||||
kubectl apply -f k8s/namespace.yaml
|
||||
|
||||
# 2. Vault CRDs — wait for VSO to create api-env and minio-credentials Secrets
|
||||
kubectl apply -f k8s/vault/
|
||||
kubectl get secret -n reactbin api-env minio-credentials # wait until both appear
|
||||
|
||||
# 3. API, UI, Ingress — replace 'latest' tags and <your-domain> first
|
||||
kubectl apply -f k8s/api/ -f k8s/ui/ -f k8s/ingress.yaml
|
||||
kubectl rollout status deployment/api -n reactbin # Alembic init container runs here
|
||||
|
||||
# 4. MinIO — wait for StatefulSet ready before running the bucket init Job
|
||||
kubectl apply -f k8s/minio/service.yaml -f k8s/minio/statefulset.yaml
|
||||
kubectl rollout status statefulset/minio -n reactbin
|
||||
kubectl apply -f k8s/minio/init-job.yaml
|
||||
```
|
||||
|
||||
Before applying: substitute real image tags in the Deployment manifests and replace `<your-domain>` in `k8s/ingress.yaml`.
|
||||
|
||||
### Updating a secret
|
||||
|
||||
1. Update the value in Vault
|
||||
2. Force VSO to sync immediately (otherwise waits up to 1 hour):
|
||||
```bash
|
||||
kubectl annotate vaultstaticsecret api-secret -n reactbin \
|
||||
secrets.hashicorp.com/force-sync=$(date +%s) --overwrite
|
||||
```
|
||||
3. Restart the deployment to pick up the new Secret:
|
||||
```bash
|
||||
kubectl rollout restart deployment/api -n reactbin
|
||||
```
|
||||
|
||||
### Validating manifests
|
||||
|
||||
```bash
|
||||
make validate-k8s # yamllint + kubectl apply --dry-run=client (requires kubeconfig)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Environment variables reference
|
||||
|
||||
All variables are read at startup from environment / `.env`.
|
||||
|
||||
| Variable | Default | Notes |
|
||||
|----------|---------|-------|
|
||||
| `DATABASE_URL` | — | Async DSN: `postgresql+asyncpg://...` |
|
||||
| `JWT_SECRET_KEY` | — | Required; use a long random string in production |
|
||||
| `JWT_EXPIRY_SECONDS` | `86400` | Token lifetime (24 h) |
|
||||
| `OWNER_USERNAME` | — | Single owner account username |
|
||||
| `OWNER_PASSWORD` | — | Single owner account password |
|
||||
| `S3_ENDPOINT_URL` | — | MinIO or any S3-compatible endpoint |
|
||||
| `S3_BUCKET_NAME` | `reactbin` | |
|
||||
| `S3_ACCESS_KEY_ID` | — | |
|
||||
| `S3_SECRET_ACCESS_KEY` | — | |
|
||||
| `S3_REGION` | `us-east-1` | |
|
||||
| `MAX_UPLOAD_BYTES` | `52428800` | 50 MiB |
|
||||
| `API_BASE_URL` | — | Used for generating public URLs |
|
||||
| `API_DOCS_ENABLED` | `true` | Set to `false` in production |
|
||||
| `LOGIN_MAX_FAILURES` | `5` | Failed attempts before cooldown |
|
||||
| `LOGIN_WINDOW_SECONDS` | `300` | Sliding window for failure count |
|
||||
| `LOGIN_COOLDOWN_SECONDS` | `900` | Lock duration after threshold hit |
|
||||
| `LOGIN_TRUSTED_PROXY_IPS` | `""` | Comma-separated CIDRs of trusted upstream proxies |
|
||||
|
||||
53
api/Dockerfile.prod
Normal file
53
api/Dockerfile.prod
Normal file
@@ -0,0 +1,53 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
# ════════════════════════════════════════════════
|
||||
# Build stage: install production deps via uv
|
||||
# ════════════════════════════════════════════════
|
||||
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV UV_COMPILE_BYTECODE=1 \
|
||||
UV_LINK_MODE=copy \
|
||||
UV_PYTHON_DOWNLOADS=never
|
||||
|
||||
# Layer cache split: deps only (changes rarely)
|
||||
COPY pyproject.toml uv.lock ./
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv sync --frozen --no-dev --no-install-project
|
||||
|
||||
# Layer cache split: source (changes often)
|
||||
COPY app/ ./app/
|
||||
|
||||
# ════════════════════════════════════════════════
|
||||
# Runtime stage: lean image with venv + source
|
||||
# ════════════════════════════════════════════════
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN groupadd --system --gid 1001 appgroup \
|
||||
&& useradd --system --uid 1001 --gid 1001 --no-create-home appuser
|
||||
|
||||
COPY --from=builder --chown=appuser:appgroup /app/.venv /app/.venv
|
||||
COPY --chown=appuser:appgroup app/ ./app/
|
||||
COPY --chown=appuser:appgroup alembic/ ./alembic/
|
||||
COPY --chown=appuser:appgroup alembic.ini .
|
||||
|
||||
USER appuser
|
||||
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/api/v1/health || exit 1
|
||||
|
||||
CMD ["uvicorn", "app.main:app", \
|
||||
"--host", "0.0.0.0", \
|
||||
"--port", "8000", \
|
||||
"--timeout-graceful-shutdown", "30"]
|
||||
91
api/app/auth/rate_limiter.py
Normal file
91
api/app/auth/rate_limiter.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import ipaddress
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from ipaddress import IPv4Network, IPv6Network
|
||||
from threading import Lock
|
||||
|
||||
from starlette.requests import Request
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_client_ip(
|
||||
request: Request,
|
||||
trusted_networks: list[IPv4Network | IPv6Network],
|
||||
) -> str:
|
||||
"""Return the resolved client IP, honouring X-Forwarded-For when the
|
||||
TCP peer is a trusted upstream proxy. Falls back to the TCP peer address
|
||||
when no trusted networks are configured or the peer is not in the list."""
|
||||
peer = request.client.host if request.client else "unknown"
|
||||
if trusted_networks and peer != "unknown":
|
||||
try:
|
||||
peer_addr = ipaddress.ip_address(peer)
|
||||
if any(peer_addr in net for net in trusted_networks):
|
||||
xff = request.headers.get("X-Forwarded-For", "").split(",")[0].strip()
|
||||
if xff:
|
||||
return xff
|
||||
real_ip = request.headers.get("X-Real-IP", "").strip()
|
||||
if real_ip:
|
||||
return real_ip
|
||||
except ValueError:
|
||||
pass
|
||||
return peer
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Record:
|
||||
failures: int = 0
|
||||
window_start: float = field(default_factory=time.time)
|
||||
blocked_until: float = 0.0
|
||||
|
||||
|
||||
class LoginRateLimiter:
|
||||
def __init__(
|
||||
self,
|
||||
max_failures: int = 5,
|
||||
window_seconds: int = 300,
|
||||
cooldown_seconds: int = 900,
|
||||
) -> None:
|
||||
self._max = max_failures
|
||||
self._window = window_seconds
|
||||
self._cooldown = cooldown_seconds
|
||||
self._store: dict[str, _Record] = {}
|
||||
self._lock = Lock()
|
||||
|
||||
@property
|
||||
def cooldown_seconds(self) -> int:
|
||||
return self._cooldown
|
||||
|
||||
def is_blocked(self, ip: str) -> bool:
|
||||
now = time.time()
|
||||
with self._lock:
|
||||
rec = self._store.get(ip)
|
||||
if rec is None:
|
||||
return False
|
||||
if rec.blocked_until > now:
|
||||
return True
|
||||
if rec.blocked_until > 0:
|
||||
del self._store[ip]
|
||||
return False
|
||||
|
||||
def record_failure(self, ip: str) -> None:
|
||||
now = time.time()
|
||||
with self._lock:
|
||||
rec = self._store.get(ip)
|
||||
if rec is None:
|
||||
rec = _Record(window_start=now)
|
||||
self._store[ip] = rec
|
||||
if now - rec.window_start > self._window:
|
||||
rec.failures = 0
|
||||
rec.window_start = now
|
||||
rec.failures += 1
|
||||
if rec.failures >= self._max:
|
||||
rec.blocked_until = now + self._cooldown
|
||||
logger.warning(
|
||||
"Login blocked for %s after %d failures", ip, rec.failures
|
||||
)
|
||||
|
||||
def record_success(self, ip: str) -> None:
|
||||
with self._lock:
|
||||
self._store.pop(ip, None)
|
||||
@@ -1,5 +1,6 @@
|
||||
from functools import lru_cache
|
||||
|
||||
from pydantic import field_validator
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
@@ -18,6 +19,23 @@ class Settings(BaseSettings):
|
||||
jwt_expiry_seconds: int = 86400
|
||||
owner_username: str
|
||||
owner_password: str
|
||||
login_max_failures: int = 5
|
||||
login_window_seconds: int = 300
|
||||
login_cooldown_seconds: int = 900
|
||||
login_trusted_proxy_ips: str = ""
|
||||
api_docs_enabled: bool = True
|
||||
|
||||
@field_validator("api_docs_enabled", mode="before")
|
||||
@classmethod
|
||||
def coerce_docs_enabled(cls, v):
|
||||
if isinstance(v, bool):
|
||||
return v
|
||||
try:
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
return TypeAdapter(bool).validate_python(v)
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
|
||||
@lru_cache
|
||||
|
||||
@@ -1,17 +1,30 @@
|
||||
from contextlib import asynccontextmanager
|
||||
import ipaddress
|
||||
from contextlib import asynccontextmanager, suppress
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.exceptions import HTTPException
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from app.auth.rate_limiter import LoginRateLimiter
|
||||
from app.config import get_settings
|
||||
from app.database import Base, get_engine
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(application: FastAPI):
|
||||
get_settings()
|
||||
# Verify DB connection and run migrations on startup
|
||||
settings = get_settings()
|
||||
application.state.login_rate_limiter = LoginRateLimiter(
|
||||
max_failures=settings.login_max_failures,
|
||||
window_seconds=settings.login_window_seconds,
|
||||
cooldown_seconds=settings.login_cooldown_seconds,
|
||||
)
|
||||
trusted_networks = []
|
||||
for part in settings.login_trusted_proxy_ips.split(","):
|
||||
part = part.strip()
|
||||
if part:
|
||||
with suppress(ValueError):
|
||||
trusted_networks.append(ipaddress.ip_network(part, strict=False))
|
||||
application.state.login_trusted_networks = trusted_networks
|
||||
engine = get_engine()
|
||||
async with engine.begin() as conn:
|
||||
# In production, Alembic handles migrations; this is a dev convenience
|
||||
@@ -20,7 +33,20 @@ async def lifespan(application: FastAPI):
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
app = FastAPI(title="Reactbin API", version="1.0.0", lifespan=lifespan)
|
||||
_settings = get_settings()
|
||||
|
||||
app = FastAPI(
|
||||
title="Reactbin API",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
docs_url="/docs" if _settings.api_docs_enabled else None,
|
||||
redoc_url="/redoc" if _settings.api_docs_enabled else None,
|
||||
openapi_url="/openapi.json" if _settings.api_docs_enabled else None,
|
||||
)
|
||||
|
||||
# Defaults so app.state is populated even when lifespan doesn't run (e.g. tests)
|
||||
app.state.login_rate_limiter = LoginRateLimiter()
|
||||
app.state.login_trusted_networks = []
|
||||
|
||||
|
||||
@app.exception_handler(HTTPException)
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.auth.jwt_provider import JWTAuthProvider
|
||||
from app.auth.rate_limiter import LoginRateLimiter, get_client_ip
|
||||
from app.dependencies import get_jwt_auth
|
||||
|
||||
router = APIRouter(tags=["auth"])
|
||||
@@ -19,12 +21,32 @@ class TokenResponse(BaseModel):
|
||||
|
||||
|
||||
@router.post("/auth/token", response_model=TokenResponse)
|
||||
async def login(body: LoginRequest, auth: JWTAuthProvider = Depends(get_jwt_auth)):
|
||||
async def login(
|
||||
request: Request,
|
||||
body: LoginRequest,
|
||||
auth: JWTAuthProvider = Depends(get_jwt_auth),
|
||||
):
|
||||
limiter: LoginRateLimiter = request.app.state.login_rate_limiter
|
||||
ip: str = get_client_ip(request, request.app.state.login_trusted_networks)
|
||||
|
||||
if limiter.is_blocked(ip):
|
||||
return JSONResponse(
|
||||
status_code=429,
|
||||
content={
|
||||
"detail": "Too many failed login attempts. Please try again later.",
|
||||
"code": "login_rate_limited",
|
||||
},
|
||||
headers={"Retry-After": str(limiter.cooldown_seconds)},
|
||||
)
|
||||
|
||||
if not auth.verify_credentials(body.username, body.password):
|
||||
limiter.record_failure(ip)
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail={"detail": "Invalid credentials", "code": "invalid_credentials"},
|
||||
)
|
||||
|
||||
limiter.record_success(ip)
|
||||
token = auth.create_token()
|
||||
return TokenResponse(
|
||||
access_token=token,
|
||||
|
||||
0
api/tests/build/.gitkeep
Normal file
0
api/tests/build/.gitkeep
Normal file
119
api/tests/build/verify_production_image.sh
Executable file
119
api/tests/build/verify_production_image.sh
Executable file
@@ -0,0 +1,119 @@
|
||||
#!/usr/bin/env bash
|
||||
# TDD verification script for api/Dockerfile.prod
|
||||
# Fails (red) if Dockerfile.prod does not exist or any check fails.
|
||||
set -euo pipefail
|
||||
|
||||
IMAGE="reactbin-api-prod:verify-$$"
|
||||
IMAGE2="reactbin-api-prod:verify-cache-$$"
|
||||
PG_CONTAINER=""
|
||||
APP_CONTAINER=""
|
||||
|
||||
cleanup() {
|
||||
[ -n "$APP_CONTAINER" ] && docker rm -f "$APP_CONTAINER" 2>/dev/null || true
|
||||
[ -n "$PG_CONTAINER" ] && docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
||||
docker rmi "$IMAGE" 2>/dev/null || true
|
||||
docker rmi "$IMAGE2" 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# ── US1 check 1: build ────────────────────────────────────────────────────────
|
||||
echo "[verify] Building $IMAGE..."
|
||||
docker build -f api/Dockerfile.prod api/ -t "$IMAGE"
|
||||
echo "[verify] Build OK"
|
||||
|
||||
# ── US1 check 2: start with a throwaway postgres ──────────────────────────────
|
||||
echo "[verify] Starting postgres..."
|
||||
PG_CONTAINER=$(docker run -d \
|
||||
-e POSTGRES_DB=reactbin_verify \
|
||||
-e POSTGRES_USER=verify \
|
||||
-e POSTGRES_PASSWORD=verify \
|
||||
postgres:16-alpine)
|
||||
|
||||
for i in $(seq 1 30); do
|
||||
if docker exec "$PG_CONTAINER" pg_isready -U verify -q 2>/dev/null; then break; fi
|
||||
sleep 1
|
||||
if [[ $i -eq 30 ]]; then echo "FAIL: postgres did not become ready"; exit 1; fi
|
||||
done
|
||||
|
||||
PG_IP=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$PG_CONTAINER")
|
||||
|
||||
echo "[verify] Starting production container..."
|
||||
APP_CONTAINER=$(docker run -d \
|
||||
-p 18000:8000 \
|
||||
-e JWT_SECRET_KEY=verify-key \
|
||||
-e OWNER_USERNAME=testowner \
|
||||
-e OWNER_PASSWORD=testpassword \
|
||||
-e DATABASE_URL="postgresql+asyncpg://verify:verify@${PG_IP}:5432/reactbin_verify" \
|
||||
-e S3_ENDPOINT_URL=http://noop:9000 \
|
||||
-e S3_BUCKET_NAME=noop \
|
||||
-e S3_ACCESS_KEY_ID=noop \
|
||||
-e S3_SECRET_ACCESS_KEY=noop \
|
||||
-e S3_REGION=us-east-1 \
|
||||
"$IMAGE")
|
||||
|
||||
# ── US1 check 3: health endpoint ──────────────────────────────────────────────
|
||||
echo "[verify] Polling health endpoint..."
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf http://localhost:18000/api/v1/health > /dev/null; then break; fi
|
||||
sleep 1
|
||||
if [[ $i -eq 30 ]]; then echo "FAIL: health check timed out after 30s"; exit 1; fi
|
||||
done
|
||||
echo "[verify] Health check passed"
|
||||
|
||||
# ── US2 check 1: non-root user ────────────────────────────────────────────────
|
||||
UID_IN_CONTAINER=$(docker exec "$APP_CONTAINER" id -u)
|
||||
if [[ "$UID_IN_CONTAINER" -eq 0 ]]; then
|
||||
echo "FAIL: process running as root (UID 0)"; exit 1
|
||||
fi
|
||||
echo "[verify] Non-root user OK (UID $UID_IN_CONTAINER)"
|
||||
|
||||
# ── C1: stdout/stderr log capture ────────────────────────────────────────────
|
||||
LOGS=$(docker logs "$APP_CONTAINER" 2>&1)
|
||||
if [[ -z "$LOGS" ]]; then
|
||||
echo "FAIL: no output on stdout/stderr"; exit 1
|
||||
fi
|
||||
if ! echo "$LOGS" | grep -qiE "(started server|application startup complete|uvicorn)"; then
|
||||
echo "FAIL: no startup logs found on stdout/stderr"; exit 1
|
||||
fi
|
||||
echo "[verify] Stdout logging OK"
|
||||
|
||||
# ── US1 check 4: SIGTERM → exit 0 ────────────────────────────────────────────
|
||||
docker stop "$APP_CONTAINER" > /dev/null
|
||||
EXIT_CODE=$(docker wait "$APP_CONTAINER")
|
||||
if [[ "$EXIT_CODE" -ne 0 ]]; then
|
||||
echo "FAIL: non-zero exit code $EXIT_CODE after SIGTERM"; exit 1
|
||||
fi
|
||||
echo "[verify] Graceful shutdown OK (exit $EXIT_CODE)"
|
||||
|
||||
# ── US2 check 2: dev deps absent ─────────────────────────────────────────────
|
||||
if docker run --rm "$IMAGE" /app/.venv/bin/python -c "import pytest" 2>/dev/null; then
|
||||
echo "FAIL: pytest importable in production image (dev deps present)"; exit 1
|
||||
fi
|
||||
echo "[verify] Dev deps absent OK"
|
||||
|
||||
# ── C2: no hardcoded secrets in image layers ─────────────────────────────────
|
||||
if docker history --no-trunc "$IMAGE" 2>&1 | grep -qiE "(password|secret_key|api_key|token)"; then
|
||||
echo "FAIL: potential secret found in image history"; exit 1
|
||||
fi
|
||||
echo "[verify] No secrets in image layers OK"
|
||||
|
||||
# ── C3: missing env var → non-zero exit ──────────────────────────────────────
|
||||
set +e
|
||||
docker run --rm -e JWT_SECRET_KEY=verify-key "$IMAGE" 2>/dev/null
|
||||
MISSING_ENV_EXIT=$?
|
||||
set -e
|
||||
if [[ "$MISSING_ENV_EXIT" -eq 0 ]]; then
|
||||
echo "FAIL: container exited 0 despite missing OWNER_USERNAME"; exit 1
|
||||
fi
|
||||
echo "[verify] Missing-env-var exit check OK (exit $MISSING_ENV_EXIT)"
|
||||
|
||||
# ── US3: dep layer cached on source-only rebuild ──────────────────────────────
|
||||
echo "[verify] Testing cache hit on source-only rebuild..."
|
||||
touch api/app/main.py
|
||||
BUILD2_OUTPUT=$(docker build --progress=plain -f api/Dockerfile.prod api/ -t "$IMAGE2" 2>&1)
|
||||
if ! echo "$BUILD2_OUTPUT" | grep -q "CACHED"; then
|
||||
echo "FAIL: dependency layer not reused on source-only rebuild"; exit 1
|
||||
fi
|
||||
echo "[verify] Dep layer cache hit confirmed (US3 OK)"
|
||||
|
||||
echo "[verify] All checks passed (US1 + US2 + US3)."
|
||||
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
||||
@@ -9,11 +10,11 @@ os.environ.setdefault("JWT_SECRET_KEY", "test-secret-key-for-testing-only")
|
||||
os.environ.setdefault("OWNER_USERNAME", "testowner")
|
||||
os.environ.setdefault("OWNER_PASSWORD", "testpassword")
|
||||
|
||||
from app.auth.jwt_provider import JWTAuthProvider
|
||||
from app.config import get_settings
|
||||
from app.database import Base
|
||||
from app.dependencies import get_auth, get_db, get_storage
|
||||
from app.main import app
|
||||
from app.auth.jwt_provider import JWTAuthProvider # noqa: E402
|
||||
from app.config import get_settings # noqa: E402
|
||||
from app.database import Base # noqa: E402
|
||||
from app.dependencies import get_auth, get_db, get_storage # noqa: E402
|
||||
from app.main import app # noqa: E402
|
||||
|
||||
# Bust the LRU cache so get_settings() picks up the env vars set above
|
||||
get_settings.cache_clear()
|
||||
@@ -26,8 +27,6 @@ _TEST_OWNER_PASSWORD = os.environ["OWNER_PASSWORD"]
|
||||
@pytest_asyncio.fixture(scope="session", loop_scope="session")
|
||||
async def engine():
|
||||
settings = get_settings()
|
||||
# Use a separate test database URL if TEST_DATABASE_URL is set
|
||||
import os
|
||||
db_url = os.getenv("TEST_DATABASE_URL", settings.database_url)
|
||||
eng = create_async_engine(db_url, echo=False)
|
||||
async with eng.begin() as conn:
|
||||
@@ -108,3 +107,15 @@ async def authed_client(db_session, jwt_auth_provider):
|
||||
yield c, valid_token
|
||||
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
db_url = os.getenv("TEST_DATABASE_URL") or os.getenv("DATABASE_URL", "")
|
||||
if not db_url.startswith("postgresql+asyncpg://"):
|
||||
pytest.exit(
|
||||
"Integration tests require a PostgreSQL database "
|
||||
"(postgresql+asyncpg://...). "
|
||||
"Set TEST_DATABASE_URL or DATABASE_URL accordingly. "
|
||||
f"Got: {db_url!r}",
|
||||
returncode=1,
|
||||
)
|
||||
|
||||
@@ -19,15 +19,18 @@ def _minimal_jpeg_v2() -> bytes:
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_removes_record(client):
|
||||
async def test_delete_removes_record(authed_client):
|
||||
client, token = authed_client
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
data = _minimal_jpeg_v2()
|
||||
upload = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("del-test.jpg", io.BytesIO(data), "image/jpeg")},
|
||||
headers=headers,
|
||||
)
|
||||
image_id = upload.json()["id"]
|
||||
|
||||
delete_resp = await client.delete(f"/api/v1/images/{image_id}")
|
||||
delete_resp = await client.delete(f"/api/v1/images/{image_id}", headers=headers)
|
||||
assert delete_resp.status_code == 204
|
||||
|
||||
get_resp = await client.get(f"/api/v1/images/{image_id}")
|
||||
@@ -36,16 +39,19 @@ async def test_delete_removes_record(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_removes_storage_object(client):
|
||||
async def test_delete_removes_storage_object(authed_client):
|
||||
client, token = authed_client
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
data = _minimal_jpeg_v2() + b"\x00"
|
||||
upload = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("del-storage-test.jpg", io.BytesIO(data), "image/jpeg")},
|
||||
headers=headers,
|
||||
)
|
||||
assert upload.status_code in (200, 201)
|
||||
image_id = upload.json()["id"]
|
||||
|
||||
delete_resp = await client.delete(f"/api/v1/images/{image_id}")
|
||||
delete_resp = await client.delete(f"/api/v1/images/{image_id}", headers=headers)
|
||||
assert delete_resp.status_code == 204
|
||||
|
||||
# Confirm storage redirect no longer works (404 since record is gone)
|
||||
@@ -54,15 +60,21 @@ async def test_delete_removes_storage_object(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_unknown_id_returns_404(client):
|
||||
response = await client.delete(f"/api/v1/images/{uuid.uuid4()}")
|
||||
async def test_delete_unknown_id_returns_404(authed_client):
|
||||
client, token = authed_client
|
||||
response = await client.delete(
|
||||
f"/api/v1/images/{uuid.uuid4()}",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert response.status_code == 404
|
||||
body = response.json()
|
||||
assert body["code"] == "image_not_found"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_removes_thumbnail(client):
|
||||
async def test_delete_removes_thumbnail(authed_client):
|
||||
client, token = authed_client
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
buf = io.BytesIO()
|
||||
PILImage.new("RGB", (200, 150), color=(60, 90, 120)).save(buf, format="JPEG")
|
||||
data = buf.getvalue()
|
||||
@@ -70,12 +82,13 @@ async def test_delete_removes_thumbnail(client):
|
||||
upload = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("thumb-del.jpg", io.BytesIO(data), "image/jpeg")},
|
||||
headers=headers,
|
||||
)
|
||||
assert upload.status_code == 201
|
||||
image_id = upload.json()["id"]
|
||||
assert upload.json()["thumbnail_key"] is not None
|
||||
|
||||
delete_resp = await client.delete(f"/api/v1/images/{image_id}")
|
||||
delete_resp = await client.delete(f"/api/v1/images/{image_id}", headers=headers)
|
||||
assert delete_resp.status_code == 204
|
||||
|
||||
thumb_resp = await client.get(f"/api/v1/images/{image_id}/thumbnail")
|
||||
|
||||
48
api/tests/integration/test_docs_gate.py
Normal file
48
api/tests/integration/test_docs_gate.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import importlib
|
||||
|
||||
from starlette.testclient import TestClient
|
||||
|
||||
from app.config import get_settings
|
||||
|
||||
_BASE_ENV = {
|
||||
"DATABASE_URL": "postgresql+asyncpg://u:p@localhost/db",
|
||||
"JWT_SECRET_KEY": "test-secret",
|
||||
"OWNER_USERNAME": "admin",
|
||||
"OWNER_PASSWORD": "password",
|
||||
"S3_ENDPOINT_URL": "http://localhost:9000",
|
||||
"S3_BUCKET_NAME": "test-bucket",
|
||||
"S3_ACCESS_KEY_ID": "key",
|
||||
"S3_SECRET_ACCESS_KEY": "secret",
|
||||
}
|
||||
|
||||
|
||||
def _set_env(monkeypatch, extra=None):
|
||||
for k, v in {**_BASE_ENV, **(extra or {})}.items():
|
||||
monkeypatch.setenv(k, v)
|
||||
|
||||
|
||||
def test_docs_hidden_when_flag_disabled(monkeypatch):
|
||||
_set_env(monkeypatch, {"API_DOCS_ENABLED": "false"})
|
||||
get_settings.cache_clear()
|
||||
import app.main as m
|
||||
|
||||
importlib.reload(m)
|
||||
client = TestClient(m.app, raise_server_exceptions=False)
|
||||
assert client.get("/docs").status_code == 404
|
||||
assert client.get("/redoc").status_code == 404
|
||||
assert client.get("/openapi.json").status_code == 404
|
||||
assert client.get("/api/v1/health").status_code == 200
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_docs_visible_when_flag_enabled(monkeypatch):
|
||||
_set_env(monkeypatch, {"API_DOCS_ENABLED": "true"})
|
||||
get_settings.cache_clear()
|
||||
import app.main as m
|
||||
|
||||
importlib.reload(m)
|
||||
client = TestClient(m.app, raise_server_exceptions=False)
|
||||
assert client.get("/docs").status_code == 200
|
||||
assert client.get("/redoc").status_code == 200
|
||||
assert client.get("/openapi.json").status_code == 200
|
||||
get_settings.cache_clear()
|
||||
121
api/tests/integration/test_login_rate_limit.py
Normal file
121
api/tests/integration/test_login_rate_limit.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from httpx import AsyncClient
|
||||
|
||||
from app.auth.rate_limiter import LoginRateLimiter
|
||||
from app.main import app
|
||||
|
||||
BAD_CREDS = {"username": "attacker", "password": "wrong"}
|
||||
VALID_CREDS = {
|
||||
"username": os.environ.get("OWNER_USERNAME", "testowner"),
|
||||
"password": os.environ.get("OWNER_PASSWORD", "testpassword"),
|
||||
}
|
||||
|
||||
|
||||
def _fresh_limiter():
|
||||
return LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=30)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_repeated_failures_trigger_429(client: AsyncClient):
|
||||
original_limiter = app.state.login_rate_limiter
|
||||
original_networks = app.state.login_trusted_networks
|
||||
app.state.login_rate_limiter = _fresh_limiter()
|
||||
app.state.login_trusted_networks = []
|
||||
try:
|
||||
for _ in range(3):
|
||||
await client.post("/api/v1/auth/token", json=BAD_CREDS)
|
||||
resp = await client.post("/api/v1/auth/token", json=BAD_CREDS)
|
||||
assert resp.status_code == 429
|
||||
assert resp.json()["code"] == "login_rate_limited"
|
||||
finally:
|
||||
app.state.login_rate_limiter = original_limiter
|
||||
app.state.login_trusted_networks = original_networks
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_success_resets_counter(client: AsyncClient):
|
||||
original_limiter = app.state.login_rate_limiter
|
||||
original_networks = app.state.login_trusted_networks
|
||||
app.state.login_rate_limiter = _fresh_limiter()
|
||||
app.state.login_trusted_networks = []
|
||||
try:
|
||||
for _ in range(2):
|
||||
await client.post("/api/v1/auth/token", json=BAD_CREDS)
|
||||
await client.post("/api/v1/auth/token", json=VALID_CREDS)
|
||||
for _ in range(3):
|
||||
resp = await client.post("/api/v1/auth/token", json=BAD_CREDS)
|
||||
assert resp.status_code == 401, "counter should have reset after success"
|
||||
finally:
|
||||
app.state.login_rate_limiter = original_limiter
|
||||
app.state.login_trusted_networks = original_networks
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_429_has_retry_after_header(client: AsyncClient):
|
||||
original_limiter = app.state.login_rate_limiter
|
||||
original_networks = app.state.login_trusted_networks
|
||||
app.state.login_rate_limiter = _fresh_limiter()
|
||||
app.state.login_trusted_networks = []
|
||||
try:
|
||||
for _ in range(3):
|
||||
await client.post("/api/v1/auth/token", json=BAD_CREDS)
|
||||
resp = await client.post("/api/v1/auth/token", json=BAD_CREDS)
|
||||
assert resp.status_code == 429
|
||||
assert "Retry-After" in resp.headers
|
||||
assert int(resp.headers["Retry-After"]) > 0
|
||||
finally:
|
||||
app.state.login_rate_limiter = original_limiter
|
||||
app.state.login_trusted_networks = original_networks
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_429_body_shape(client: AsyncClient):
|
||||
original_limiter = app.state.login_rate_limiter
|
||||
original_networks = app.state.login_trusted_networks
|
||||
app.state.login_rate_limiter = _fresh_limiter()
|
||||
app.state.login_trusted_networks = []
|
||||
try:
|
||||
for _ in range(3):
|
||||
await client.post("/api/v1/auth/token", json=BAD_CREDS)
|
||||
resp = await client.post("/api/v1/auth/token", json=BAD_CREDS)
|
||||
assert resp.status_code == 429
|
||||
assert resp.json() == {
|
||||
"detail": "Too many failed login attempts. Please try again later.",
|
||||
"code": "login_rate_limited",
|
||||
}
|
||||
finally:
|
||||
app.state.login_rate_limiter = original_limiter
|
||||
app.state.login_trusted_networks = original_networks
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_xff_header_ignored_when_no_trusted_networks(client: AsyncClient):
|
||||
original_limiter = app.state.login_rate_limiter
|
||||
original_networks = app.state.login_trusted_networks
|
||||
app.state.login_rate_limiter = _fresh_limiter()
|
||||
app.state.login_trusted_networks = []
|
||||
try:
|
||||
# Send 3 failures all claiming to be "1.2.3.4" via XFF
|
||||
for _ in range(3):
|
||||
await client.post(
|
||||
"/api/v1/auth/token",
|
||||
json=BAD_CREDS,
|
||||
headers={"X-Forwarded-For": "1.2.3.4"},
|
||||
)
|
||||
# 4th request with a *different* XFF — if XFF were trusted, this
|
||||
# would appear to be a fresh IP and get 401. Since XFF is ignored,
|
||||
# the real peer ("testclient") is blocked and we get 429.
|
||||
resp = await client.post(
|
||||
"/api/v1/auth/token",
|
||||
json=BAD_CREDS,
|
||||
headers={"X-Forwarded-For": "9.9.9.9"},
|
||||
)
|
||||
assert resp.status_code == 429, (
|
||||
"XFF should be ignored when no trusted networks are configured; "
|
||||
"expected real peer to be blocked"
|
||||
)
|
||||
finally:
|
||||
app.state.login_rate_limiter = original_limiter
|
||||
app.state.login_trusted_networks = original_networks
|
||||
@@ -16,7 +16,9 @@ def _minimal_gif() -> bytes:
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_and_filter_returns_only_matching_images(client):
|
||||
async def test_and_filter_returns_only_matching_images(authed_client):
|
||||
client, token = authed_client
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
data = _minimal_gif()
|
||||
|
||||
# Image with both tags
|
||||
@@ -24,6 +26,7 @@ async def test_and_filter_returns_only_matching_images(client):
|
||||
"/api/v1/images",
|
||||
files={"file": ("both.gif", io.BytesIO(data), "image/gif")},
|
||||
data={"tags": "andcat,andfunny"},
|
||||
headers=headers,
|
||||
)
|
||||
both_id = r_both.json()["id"]
|
||||
|
||||
@@ -32,6 +35,7 @@ async def test_and_filter_returns_only_matching_images(client):
|
||||
"/api/v1/images",
|
||||
files={"file": ("one.gif", io.BytesIO(data + b"\x00"), "image/gif")},
|
||||
data={"tags": "andcat"},
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
response = await client.get("/api/v1/images?tags=andcat,andfunny")
|
||||
@@ -43,7 +47,9 @@ async def test_and_filter_returns_only_matching_images(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_filter_excludes_partial_tag_match(client):
|
||||
async def test_filter_excludes_partial_tag_match(authed_client):
|
||||
client, token = authed_client
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
data = _minimal_gif()
|
||||
|
||||
# Image with only "exclcat"
|
||||
@@ -51,6 +57,7 @@ async def test_filter_excludes_partial_tag_match(client):
|
||||
"/api/v1/images",
|
||||
files={"file": ("partial.gif", io.BytesIO(data + b"\x01"), "image/gif")},
|
||||
data={"tags": "exclcat"},
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
# Filter requires both exclcat and exclother
|
||||
|
||||
@@ -29,11 +29,13 @@ def _minimal_webp() -> bytes:
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_returns_200_with_content(client):
|
||||
async def test_file_returns_200_with_content(authed_client):
|
||||
client, token = authed_client
|
||||
data = _minimal_webp()
|
||||
upload = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("img.webp", io.BytesIO(data), "image/webp")},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert upload.status_code in (200, 201)
|
||||
upload_body = upload.json()
|
||||
@@ -57,11 +59,13 @@ async def test_file_unknown_id_returns_404(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_response_exposes_no_storage_details(client):
|
||||
async def test_file_response_exposes_no_storage_details(authed_client):
|
||||
client, token = authed_client
|
||||
data = _minimal_webp()
|
||||
upload = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("img.webp", io.BytesIO(data), "image/webp")},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert upload.status_code in (200, 201)
|
||||
image_id = upload.json()["id"]
|
||||
@@ -75,11 +79,13 @@ async def test_file_response_exposes_no_storage_details(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_thumbnail_returns_webp(client):
|
||||
async def test_thumbnail_returns_webp(authed_client):
|
||||
client, token = authed_client
|
||||
data = _real_jpeg()
|
||||
upload = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("t.jpg", io.BytesIO(data), "image/jpeg")},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert upload.status_code == 201
|
||||
body = upload.json()
|
||||
@@ -95,11 +101,13 @@ async def test_thumbnail_returns_webp(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_thumbnail_fallback_returns_original(client, db_session):
|
||||
async def test_thumbnail_fallback_returns_original(authed_client, db_session):
|
||||
client, token = authed_client
|
||||
data = _real_jpeg()
|
||||
upload = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("fallback.jpg", io.BytesIO(data), "image/jpeg")},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert upload.status_code == 201
|
||||
image_id = upload.json()["id"]
|
||||
|
||||
@@ -31,12 +31,14 @@ def _minimal_png() -> bytes:
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_upload_with_tags_persists_tags(client):
|
||||
async def test_upload_with_tags_persists_tags(authed_client):
|
||||
client, token = authed_client
|
||||
data = _minimal_png()
|
||||
response = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("img.png", io.BytesIO(data), "image/png")},
|
||||
data={"tags": "cat,funny"},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert response.status_code == 201
|
||||
body = response.json()
|
||||
@@ -44,12 +46,15 @@ async def test_upload_with_tags_persists_tags(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_duplicate_upload_tags_unchanged(client):
|
||||
async def test_duplicate_upload_tags_unchanged(authed_client):
|
||||
client, token = authed_client
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
data = _minimal_png()
|
||||
r1 = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("img.png", io.BytesIO(data), "image/png")},
|
||||
data={"tags": "original-tag"},
|
||||
headers=headers,
|
||||
)
|
||||
assert r1.status_code in (200, 201)
|
||||
original_tags = set(r1.json()["tags"])
|
||||
@@ -58,6 +63,7 @@ async def test_duplicate_upload_tags_unchanged(client):
|
||||
"/api/v1/images",
|
||||
files={"file": ("img.png", io.BytesIO(data), "image/png")},
|
||||
data={"tags": "different-tag"},
|
||||
headers=headers,
|
||||
)
|
||||
assert r2.status_code == 200
|
||||
assert r2.json()["duplicate"] is True
|
||||
@@ -65,18 +71,22 @@ async def test_duplicate_upload_tags_unchanged(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_patch_replaces_tag_set(client):
|
||||
async def test_patch_replaces_tag_set(authed_client):
|
||||
client, token = authed_client
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
data = _minimal_png()
|
||||
r1 = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("patch-test.png", io.BytesIO(data), "image/png")},
|
||||
data={"tags": "old-tag"},
|
||||
headers=headers,
|
||||
)
|
||||
image_id = r1.json()["id"]
|
||||
|
||||
patch = await client.patch(
|
||||
f"/api/v1/images/{image_id}/tags",
|
||||
json={"tags": ["new-tag", "another"]},
|
||||
headers=headers,
|
||||
)
|
||||
assert patch.status_code == 200
|
||||
body = patch.json()
|
||||
@@ -85,17 +95,21 @@ async def test_patch_replaces_tag_set(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_patch_invalid_tag_returns_422(client):
|
||||
async def test_patch_invalid_tag_returns_422(authed_client):
|
||||
client, token = authed_client
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
data = _minimal_png()
|
||||
r1 = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("invalid-tag-test.png", io.BytesIO(data), "image/png")},
|
||||
headers=headers,
|
||||
)
|
||||
image_id = r1.json()["id"]
|
||||
|
||||
patch = await client.patch(
|
||||
f"/api/v1/images/{image_id}/tags",
|
||||
json={"tags": ["valid", "INVALID TAG WITH SPACES!"]},
|
||||
headers=headers,
|
||||
)
|
||||
assert patch.status_code == 422
|
||||
body = patch.json()
|
||||
@@ -103,12 +117,14 @@ async def test_patch_invalid_tag_returns_422(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_tags_alphabetical_with_counts(client):
|
||||
async def test_list_tags_alphabetical_with_counts(authed_client):
|
||||
client, token = authed_client
|
||||
data = _minimal_png()
|
||||
await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("tag-list-test.png", io.BytesIO(data), "image/png")},
|
||||
data={"tags": "zebra,apple"},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
response = await client.get("/api/v1/tags")
|
||||
assert response.status_code == 200
|
||||
@@ -121,12 +137,14 @@ async def test_list_tags_alphabetical_with_counts(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_tags_prefix_filter(client):
|
||||
async def test_list_tags_prefix_filter(authed_client):
|
||||
client, token = authed_client
|
||||
data = _minimal_png()
|
||||
await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("prefix-test.png", io.BytesIO(data), "image/png")},
|
||||
data={"tags": "cat,catfish,caterpillar,dog"},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
response = await client.get("/api/v1/tags?q=cat")
|
||||
assert response.status_code == 200
|
||||
@@ -155,13 +173,16 @@ def _unique_png(seed: int) -> bytes:
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_tags_sort_count_desc(client):
|
||||
async def test_list_tags_sort_count_desc(authed_client):
|
||||
client, token = authed_client
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
# popular-sort-tag appears on 2 images, rare-sort-tag on 1 — verify count_desc ordering
|
||||
for seed in (100, 101):
|
||||
await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": (f"sort-{seed}.png", io.BytesIO(_unique_png(seed)), "image/png")},
|
||||
data={"tags": "popular-sort-tag,rare-sort-tag" if seed == 100 else "popular-sort-tag"},
|
||||
headers=headers,
|
||||
)
|
||||
response = await client.get("/api/v1/tags?sort=count_desc")
|
||||
assert response.status_code == 200
|
||||
@@ -177,13 +198,16 @@ async def test_list_tags_sort_count_desc(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_tags_min_count_excludes_below_threshold(client):
|
||||
async def test_list_tags_min_count_excludes_below_threshold(authed_client):
|
||||
client, token = authed_client
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
# common-min-tag appears on 2 images, uncommon-min-tag on 1
|
||||
for seed in (200, 201):
|
||||
await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": (f"min-{seed}.png", io.BytesIO(_unique_png(seed)), "image/png")},
|
||||
data={"tags": "common-min-tag,uncommon-min-tag" if seed == 200 else "common-min-tag"},
|
||||
headers=headers,
|
||||
)
|
||||
# min_count=2 should exclude uncommon-min-tag (count=1) but keep common-min-tag (count=2)
|
||||
response = await client.get("/api/v1/tags?min_count=2")
|
||||
|
||||
@@ -6,6 +6,7 @@ T029 — file > MAX_UPLOAD_BYTES → 422 file_too_large
|
||||
T079 — GET /api/v1/images/{id} 404 → error envelope shape
|
||||
"""
|
||||
import io
|
||||
import uuid
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
@@ -27,11 +28,13 @@ def _minimal_jpeg() -> bytes:
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_upload_new_image_returns_201(client):
|
||||
async def test_upload_new_image_returns_201(authed_client):
|
||||
client, token = authed_client
|
||||
data = _minimal_jpeg()
|
||||
response = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("test.jpg", io.BytesIO(data), "image/jpeg")},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert response.status_code == 201
|
||||
body = response.json()
|
||||
@@ -44,12 +47,15 @@ async def test_upload_new_image_returns_201(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_upload_duplicate_returns_200_with_flag(client):
|
||||
async def test_upload_duplicate_returns_200_with_flag(authed_client):
|
||||
client, token = authed_client
|
||||
data = _minimal_jpeg()
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
# First upload
|
||||
r1 = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("test.jpg", io.BytesIO(data), "image/jpeg")},
|
||||
headers=headers,
|
||||
)
|
||||
assert r1.status_code in (200, 201)
|
||||
|
||||
@@ -57,6 +63,7 @@ async def test_upload_duplicate_returns_200_with_flag(client):
|
||||
r2 = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("test.jpg", io.BytesIO(data), "image/jpeg")},
|
||||
headers=headers,
|
||||
)
|
||||
assert r2.status_code == 200
|
||||
body = r2.json()
|
||||
@@ -65,10 +72,12 @@ async def test_upload_duplicate_returns_200_with_flag(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_upload_invalid_mime_type_returns_422(client):
|
||||
async def test_upload_invalid_mime_type_returns_422(authed_client):
|
||||
client, token = authed_client
|
||||
response = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("doc.pdf", io.BytesIO(b"%PDF-1.4"), "application/pdf")},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
body = response.json()
|
||||
@@ -77,11 +86,12 @@ async def test_upload_invalid_mime_type_returns_422(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_upload_oversized_file_returns_422(client):
|
||||
async def test_upload_oversized_file_returns_422(authed_client):
|
||||
import os
|
||||
|
||||
from app.config import get_settings
|
||||
|
||||
client, token = authed_client
|
||||
os.environ["MAX_UPLOAD_BYTES"] = "10"
|
||||
get_settings.cache_clear()
|
||||
|
||||
@@ -89,6 +99,7 @@ async def test_upload_oversized_file_returns_422(client):
|
||||
response = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("big.jpg", io.BytesIO(b"x" * 11), "image/jpeg")},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
body = response.json()
|
||||
@@ -100,7 +111,6 @@ async def test_upload_oversized_file_returns_422(client):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_unknown_image_returns_404_with_envelope(client):
|
||||
import uuid
|
||||
response = await client.get(f"/api/v1/images/{uuid.uuid4()}")
|
||||
assert response.status_code == 404
|
||||
body = response.json()
|
||||
@@ -109,11 +119,13 @@ async def test_get_unknown_image_returns_404_with_envelope(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_upload_returns_thumbnail_key(client):
|
||||
async def test_upload_returns_thumbnail_key(authed_client):
|
||||
client, token = authed_client
|
||||
data = _real_jpeg(color=(100, 150, 200))
|
||||
response = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("thumb_test.jpg", io.BytesIO(data), "image/jpeg")},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert response.status_code == 201
|
||||
body = response.json()
|
||||
@@ -123,17 +135,21 @@ async def test_upload_returns_thumbnail_key(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_duplicate_upload_reuses_thumbnail_key(client):
|
||||
async def test_duplicate_upload_reuses_thumbnail_key(authed_client):
|
||||
client, token = authed_client
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
data = _real_jpeg(color=(200, 100, 50))
|
||||
r1 = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("dup.jpg", io.BytesIO(data), "image/jpeg")},
|
||||
headers=headers,
|
||||
)
|
||||
assert r1.status_code in (200, 201)
|
||||
|
||||
r2 = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("dup.jpg", io.BytesIO(data), "image/jpeg")},
|
||||
headers=headers,
|
||||
)
|
||||
assert r2.status_code == 200
|
||||
|
||||
@@ -144,12 +160,14 @@ async def test_duplicate_upload_reuses_thumbnail_key(client):
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_upload_succeeds_when_thumbnail_fails(client):
|
||||
async def test_upload_succeeds_when_thumbnail_fails(authed_client):
|
||||
client, token = authed_client
|
||||
data = _real_jpeg(color=(50, 200, 150))
|
||||
with patch("app.routers.images.generate_thumbnail", side_effect=RuntimeError("simulated")):
|
||||
response = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("no_thumb.jpg", io.BytesIO(data), "image/jpeg")},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert response.status_code in (200, 201)
|
||||
body = response.json()
|
||||
|
||||
@@ -59,3 +59,39 @@ def test_settings_jwt_expiry_override(monkeypatch):
|
||||
|
||||
s = config_module.Settings()
|
||||
assert s.jwt_expiry_seconds == 3600
|
||||
|
||||
|
||||
def test_api_docs_enabled_default(monkeypatch):
|
||||
_apply_env(monkeypatch)
|
||||
|
||||
import importlib
|
||||
|
||||
import app.config as config_module
|
||||
importlib.reload(config_module)
|
||||
|
||||
s = config_module.Settings()
|
||||
assert s.api_docs_enabled is True
|
||||
|
||||
|
||||
def test_api_docs_enabled_false(monkeypatch):
|
||||
_apply_env(monkeypatch, {"API_DOCS_ENABLED": "false"})
|
||||
|
||||
import importlib
|
||||
|
||||
import app.config as config_module
|
||||
importlib.reload(config_module)
|
||||
|
||||
s = config_module.Settings()
|
||||
assert s.api_docs_enabled is False
|
||||
|
||||
|
||||
def test_api_docs_invalid_value_defaults_to_enabled(monkeypatch):
|
||||
_apply_env(monkeypatch, {"API_DOCS_ENABLED": "not-a-bool"})
|
||||
|
||||
import importlib
|
||||
|
||||
import app.config as config_module
|
||||
importlib.reload(config_module)
|
||||
|
||||
s = config_module.Settings()
|
||||
assert s.api_docs_enabled is True
|
||||
|
||||
98
api/tests/unit/test_rate_limiter.py
Normal file
98
api/tests/unit/test_rate_limiter.py
Normal file
@@ -0,0 +1,98 @@
|
||||
import ipaddress
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from starlette.requests import Request
|
||||
|
||||
from app.auth.rate_limiter import LoginRateLimiter, get_client_ip
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LoginRateLimiter tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def make_limiter():
|
||||
return LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=300)
|
||||
|
||||
|
||||
def test_not_blocked_initially():
|
||||
assert make_limiter().is_blocked("1.2.3.4") is False
|
||||
|
||||
|
||||
def test_blocked_after_threshold():
|
||||
limiter = make_limiter()
|
||||
for _ in range(3):
|
||||
limiter.record_failure("1.2.3.4")
|
||||
assert limiter.is_blocked("1.2.3.4") is True
|
||||
|
||||
|
||||
def test_success_clears_failures():
|
||||
limiter = make_limiter()
|
||||
limiter.record_failure("1.2.3.4")
|
||||
limiter.record_failure("1.2.3.4")
|
||||
limiter.record_success("1.2.3.4")
|
||||
assert limiter.is_blocked("1.2.3.4") is False
|
||||
|
||||
|
||||
def test_ips_are_isolated():
|
||||
limiter = make_limiter()
|
||||
for _ in range(3):
|
||||
limiter.record_failure("1.1.1.1")
|
||||
assert limiter.is_blocked("2.2.2.2") is False
|
||||
|
||||
|
||||
def test_window_resets_after_expiry():
|
||||
import time
|
||||
|
||||
limiter = LoginRateLimiter(max_failures=3, window_seconds=0, cooldown_seconds=300)
|
||||
limiter.record_failure("1.2.3.4")
|
||||
limiter.record_failure("1.2.3.4")
|
||||
time.sleep(0.01)
|
||||
limiter.record_failure("1.2.3.4")
|
||||
# window expired — counter reset on third call, so failures = 1, not 3
|
||||
assert limiter.is_blocked("1.2.3.4") is False
|
||||
|
||||
|
||||
def test_log_warning_on_lockout(caplog):
|
||||
import logging
|
||||
|
||||
limiter = make_limiter()
|
||||
with caplog.at_level(logging.WARNING, logger="app.auth.rate_limiter"):
|
||||
for _ in range(3):
|
||||
limiter.record_failure("5.6.7.8")
|
||||
assert "Login blocked" in caplog.text
|
||||
assert "5.6.7.8" in caplog.text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_client_ip tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def make_request(peer: str, headers: dict) -> MagicMock:
|
||||
req = MagicMock(spec=Request)
|
||||
req.client.host = peer
|
||||
req.headers = headers
|
||||
return req
|
||||
|
||||
|
||||
def test_get_client_ip_no_trusted_networks_returns_peer():
|
||||
req = make_request("203.0.113.1", {"X-Forwarded-For": "10.0.0.1"})
|
||||
assert get_client_ip(req, []) == "203.0.113.1"
|
||||
|
||||
|
||||
def test_get_client_ip_trusted_peer_uses_xff():
|
||||
req = make_request("10.0.0.1", {"X-Forwarded-For": "203.0.113.5"})
|
||||
nets = [ipaddress.ip_network("10.0.0.0/8")]
|
||||
assert get_client_ip(req, nets) == "203.0.113.5"
|
||||
|
||||
|
||||
def test_get_client_ip_untrusted_peer_ignores_xff():
|
||||
req = make_request("8.8.8.8", {"X-Forwarded-For": "203.0.113.5"})
|
||||
nets = [ipaddress.ip_network("10.0.0.0/8")]
|
||||
assert get_client_ip(req, nets) == "8.8.8.8"
|
||||
|
||||
|
||||
def test_get_client_ip_trusted_peer_falls_back_to_real_ip():
|
||||
req = make_request("10.0.0.1", {"X-Real-IP": "203.0.113.9"})
|
||||
nets = [ipaddress.ip_network("10.0.0.0/8")]
|
||||
assert get_client_ip(req, nets) == "203.0.113.9"
|
||||
67
docker-compose.test.yml
Normal file
67
docker-compose.test.yml
Normal file
@@ -0,0 +1,67 @@
|
||||
services:
|
||||
postgres-test:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_USER: reactbin
|
||||
POSTGRES_PASSWORD: reactbin
|
||||
POSTGRES_DB: reactbin_test
|
||||
ports:
|
||||
- "5433:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U reactbin"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
minio-test:
|
||||
image: minio/minio:latest
|
||||
command: server /data --console-address ":9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: minioadmin
|
||||
MINIO_ROOT_PASSWORD: minioadmin
|
||||
ports:
|
||||
- "9002:9000"
|
||||
- "9003:9001"
|
||||
healthcheck:
|
||||
test: ["CMD", "mc", "ready", "local"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
minio-init-test:
|
||||
image: minio/mc:latest
|
||||
depends_on:
|
||||
minio-test:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
MINIO_ROOT_USER: minioadmin
|
||||
MINIO_ROOT_PASSWORD: minioadmin
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
mc alias set local http://minio-test:9000 $$MINIO_ROOT_USER $$MINIO_ROOT_PASSWORD &&
|
||||
mc mb --ignore-existing local/reactbin-test
|
||||
"
|
||||
|
||||
api-test:
|
||||
build:
|
||||
context: ./api
|
||||
environment:
|
||||
TEST_DATABASE_URL: postgresql+asyncpg://reactbin:reactbin@postgres-test:5432/reactbin_test
|
||||
DATABASE_URL: postgresql+asyncpg://reactbin:reactbin@postgres-test:5432/reactbin_test
|
||||
S3_ENDPOINT_URL: http://minio-test:9000
|
||||
S3_BUCKET_NAME: reactbin-test
|
||||
S3_ACCESS_KEY_ID: minioadmin
|
||||
S3_SECRET_ACCESS_KEY: minioadmin
|
||||
S3_REGION: us-east-1
|
||||
JWT_SECRET_KEY: test-secret-key-for-testing-only
|
||||
OWNER_USERNAME: testowner
|
||||
OWNER_PASSWORD: testpassword
|
||||
API_BASE_URL: http://localhost:8000
|
||||
MAX_UPLOAD_BYTES: "52428800"
|
||||
depends_on:
|
||||
postgres-test:
|
||||
condition: service_healthy
|
||||
minio-init-test:
|
||||
condition: service_completed_successfully
|
||||
command: ["python", "-m", "pytest", "tests/", "-v"]
|
||||
working_dir: /app
|
||||
53
k8s/api/deployment.yaml
Normal file
53
k8s/api/deployment.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: api
|
||||
namespace: reactbin
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: api
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: api
|
||||
spec:
|
||||
initContainers:
|
||||
- name: migrate
|
||||
imagePullPolicy: Always
|
||||
image: git.juggalol.com/juggalol/reactbin-api:v1.0.0
|
||||
command: ["alembic", "upgrade", "head"]
|
||||
workingDir: /app
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: api-env
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1001
|
||||
containers:
|
||||
- name: api
|
||||
image: git.juggalol.com/juggalol/reactbin-api:v1.0.0
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: api-env
|
||||
env:
|
||||
- name: API_DOCS_ENABLED
|
||||
value: "false"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /api/v1/health
|
||||
port: 8000
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /api/v1/health
|
||||
port: 8000
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1001
|
||||
13
k8s/api/service.yaml
Normal file
13
k8s/api/service.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: api
|
||||
namespace: reactbin
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: api
|
||||
ports:
|
||||
- name: http
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
34
k8s/ingress.yaml
Normal file
34
k8s/ingress.yaml
Normal file
@@ -0,0 +1,34 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: reactbin
|
||||
namespace: reactbin
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
kubernetes.io/tls-acme: "true"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/proxy-body-size: "52m"
|
||||
spec:
|
||||
ingressClassName: nginx-public
|
||||
tls:
|
||||
- hosts:
|
||||
- reactbin.juggalol.com
|
||||
secretName: reactbin-tls
|
||||
rules:
|
||||
- host: reactbin.juggalol.com
|
||||
http:
|
||||
paths:
|
||||
- path: /api/
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: api
|
||||
port:
|
||||
number: 8000
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: ui
|
||||
port:
|
||||
number: 8080
|
||||
24
k8s/minio/init-job.yaml
Normal file
24
k8s/minio/init-job.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: minio-init
|
||||
namespace: reactbin
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: mc
|
||||
image: minio/mc:latest
|
||||
# mc runs as root by default; FR-013 exception documented in spec
|
||||
securityContext:
|
||||
runAsNonRoot: false
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
mc alias set local http://minio:9000 "$MINIO_ROOT_USER" "$MINIO_ROOT_PASSWORD"
|
||||
mc mb --ignore-existing local/reactbin
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: minio-credentials
|
||||
16
k8s/minio/service.yaml
Normal file
16
k8s/minio/service.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: minio
|
||||
namespace: reactbin
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: minio
|
||||
ports:
|
||||
- name: api
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
- name: console
|
||||
port: 9001
|
||||
targetPort: 9001
|
||||
59
k8s/minio/statefulset.yaml
Normal file
59
k8s/minio/statefulset.yaml
Normal file
@@ -0,0 +1,59 @@
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: minio
|
||||
namespace: reactbin
|
||||
spec:
|
||||
serviceName: minio
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: minio
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: minio
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
fsGroup: 1000
|
||||
containers:
|
||||
- name: minio
|
||||
image: minio/minio:latest
|
||||
args:
|
||||
- server
|
||||
- /data
|
||||
- --console-address
|
||||
- ":9001"
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
- containerPort: 9001
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: minio-credentials
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /minio/health/live
|
||||
port: 9000
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /minio/health/ready
|
||||
port: 9000
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: data
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
4
k8s/namespace.yaml
Normal file
4
k8s/namespace.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: reactbin
|
||||
30
k8s/ui/deployment.yaml
Normal file
30
k8s/ui/deployment.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: ui
|
||||
namespace: reactbin
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ui
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ui
|
||||
spec:
|
||||
containers:
|
||||
- name: ui
|
||||
imagePullPolicy: Always
|
||||
image: git.juggalol.com/juggalol/reactbin-ui:v1.0.0
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 8080
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 101 # nginxinc/nginx-unprivileged default UID
|
||||
13
k8s/ui/service.yaml
Normal file
13
k8s/ui/service.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ui
|
||||
namespace: reactbin
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: ui
|
||||
ports:
|
||||
- name: http
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
18
k8s/vault/api-secret.yaml
Normal file
18
k8s/vault/api-secret.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultStaticSecret
|
||||
metadata:
|
||||
name: api-secret
|
||||
namespace: reactbin
|
||||
spec:
|
||||
vaultAuthRef: reactbin-vault-auth
|
||||
mount: kv
|
||||
type: kv-v2
|
||||
# Required Vault keys at this path:
|
||||
# DATABASE_URL, JWT_SECRET_KEY, OWNER_USERNAME, OWNER_PASSWORD,
|
||||
# S3_ENDPOINT_URL, S3_BUCKET_NAME, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY,
|
||||
# API_BASE_URL
|
||||
path: reactbin/api/config
|
||||
refreshAfter: 1h
|
||||
destination:
|
||||
name: api-env
|
||||
create: true
|
||||
16
k8s/vault/minio-secret.yaml
Normal file
16
k8s/vault/minio-secret.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultStaticSecret
|
||||
metadata:
|
||||
name: minio-secret
|
||||
namespace: reactbin
|
||||
spec:
|
||||
vaultAuthRef: reactbin-vault-auth
|
||||
mount: kv
|
||||
type: kv-v2
|
||||
# Required Vault keys at this path:
|
||||
# MINIO_ROOT_USER, MINIO_ROOT_PASSWORD
|
||||
path: reactbin/minio/credentials
|
||||
refreshAfter: 1h
|
||||
destination:
|
||||
name: minio-credentials
|
||||
create: true
|
||||
22
k8s/vault/vault-auth.yaml
Normal file
22
k8s/vault/vault-auth.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: vso-reactbin
|
||||
namespace: reactbin
|
||||
---
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultAuth
|
||||
metadata:
|
||||
name: reactbin-vault-auth
|
||||
namespace: reactbin
|
||||
spec:
|
||||
method: kubernetes
|
||||
mount: kubernetes
|
||||
kubernetes:
|
||||
# The operator must create this role in Vault and bind it to the
|
||||
# default service account in the reactbin namespace with read access
|
||||
# to both reactbin/api/config and reactbin/minio/credentials.
|
||||
role: vso-reactbin
|
||||
serviceAccount: vso-reactbin
|
||||
audiences:
|
||||
- vault
|
||||
236
specs/008-postgres-integration-tests/plan.md
Normal file
236
specs/008-postgres-integration-tests/plan.md
Normal file
@@ -0,0 +1,236 @@
|
||||
# Implementation Plan: PostgreSQL Integration Test Infrastructure
|
||||
|
||||
**Branch**: `master` | **Date**: 2026-05-06 | **Spec**: specs/008-postgres-integration-tests/spec.md
|
||||
**Input**: Feature specification from `specs/008-postgres-integration-tests/spec.md`
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
Enforce the constitution's PostgreSQL mandate (§2.5, §5.2 v1.3.0) for integration tests. Three concrete deliverables: (1) a fast-fail guard in `conftest.py` that rejects non-PostgreSQL URLs before any test collects, (2) a `docker-compose.test.yml` that provides isolated `postgres-test` and `minio-test` services and an `api-test` runner, and (3) a `Makefile` + `.env.test.example` that document the canonical test commands.
|
||||
|
||||
---
|
||||
|
||||
## Technical Context
|
||||
|
||||
**Language/Version**: Python 3.12, Docker Compose v2
|
||||
**Primary Dependencies**: pytest, pytest-asyncio, asyncpg, SQLAlchemy 2.x (all already in `pyproject.toml [dev]`)
|
||||
**Storage**: PostgreSQL 16-alpine (test instance), MinIO (test instance)
|
||||
**Testing**: pytest — this feature *is* the test infrastructure change
|
||||
**Target Platform**: Developer workstation (Linux/macOS) with Docker
|
||||
**Project Type**: Infrastructure / developer-experience
|
||||
**Performance Goals**: Guard exits in < 2 s; full integration suite continues to run in < 60 s
|
||||
**Constraints**: Must not break the existing dev compose stack; no changes to application source code
|
||||
**Scale/Scope**: One guard, one compose file, one Makefile, one env example
|
||||
|
||||
---
|
||||
|
||||
## Constitution Check
|
||||
|
||||
| Principle | Status | Notes |
|
||||
|-----------|--------|-------|
|
||||
| §2.5 Database abstraction — no alternative DB in integration tests | ✅ ENFORCED | This feature implements the enforcement |
|
||||
| §5.1 TDD — failing test before implementation | ✅ | Guard itself is tested by running with a bad URL before adding the guard |
|
||||
| §5.2 Test pyramid — integration tests use real PostgreSQL | ✅ ENFORCED | docker-compose.test.yml provides the real instance |
|
||||
| §5.4 CI must pass before task is done | ✅ | Verified by running the full suite via compose |
|
||||
| §6 Tech stack — asyncpg driver, Docker Compose | ✅ | No new technologies introduced |
|
||||
| §7.1 One-command local start | ✅ | `docker compose -f docker-compose.test.yml run --rm api-test` |
|
||||
| §7.2 Environment config via env vars | ✅ | .env.test.example documents all vars |
|
||||
| §7.3 Linting not optional | ✅ | ruff will run as part of task validation |
|
||||
|
||||
No violations.
|
||||
|
||||
---
|
||||
|
||||
## Project Structure
|
||||
|
||||
### Documentation (this feature)
|
||||
|
||||
```text
|
||||
specs/008-postgres-integration-tests/
|
||||
├── plan.md ← this file
|
||||
├── research.md ← decisions made above
|
||||
├── spec.md ← feature specification
|
||||
└── tasks.md ← generated by /speckit-tasks
|
||||
```
|
||||
|
||||
### Source changes
|
||||
|
||||
```text
|
||||
# New files
|
||||
docker-compose.test.yml ← isolated test services + api-test runner
|
||||
.env.test.example ← documents test environment variables
|
||||
Makefile ← test-unit / test-integration targets
|
||||
|
||||
# Modified files
|
||||
api/tests/integration/conftest.py ← add postgresql+asyncpg:// dialect guard
|
||||
```
|
||||
|
||||
No application source files (`api/app/`) are modified. No UI files are touched.
|
||||
|
||||
---
|
||||
|
||||
## Detailed Design
|
||||
|
||||
### 1. conftest.py — dialect guard
|
||||
|
||||
Add a module-level `pytest_configure` hook at the top of `api/tests/integration/conftest.py`. It resolves the database URL (same logic as the `engine` fixture: prefer `TEST_DATABASE_URL`, fall back to `settings.database_url`) and calls `pytest.exit()` if the scheme is not `postgresql+asyncpg`:
|
||||
|
||||
```python
|
||||
def pytest_configure(config):
|
||||
import os
|
||||
db_url = os.getenv("TEST_DATABASE_URL") or os.getenv("DATABASE_URL", "")
|
||||
if not db_url.startswith("postgresql+asyncpg://"):
|
||||
pytest.exit(
|
||||
"Integration tests require a PostgreSQL database "
|
||||
"(postgresql+asyncpg://...). "
|
||||
"Set TEST_DATABASE_URL or DATABASE_URL accordingly. "
|
||||
f"Got: {db_url!r}",
|
||||
returncode=1,
|
||||
)
|
||||
```
|
||||
|
||||
The hook runs before any fixture or collection, giving an immediate, unambiguous error.
|
||||
|
||||
**Note**: This guard goes in `api/tests/integration/conftest.py` only, not in `api/tests/conftest.py`, so that unit tests (which use no database) are unaffected.
|
||||
|
||||
### 2. docker-compose.test.yml
|
||||
|
||||
```yaml
|
||||
services:
|
||||
postgres-test:
|
||||
image: postgres:16-alpine
|
||||
environment:
|
||||
POSTGRES_USER: reactbin
|
||||
POSTGRES_PASSWORD: reactbin
|
||||
POSTGRES_DB: reactbin_test
|
||||
ports:
|
||||
- "5433:5432"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U reactbin"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
minio-test:
|
||||
image: minio/minio:latest
|
||||
command: server /data --console-address ":9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: minioadmin
|
||||
MINIO_ROOT_PASSWORD: minioadmin
|
||||
ports:
|
||||
- "9002:9000"
|
||||
- "9003:9001"
|
||||
healthcheck:
|
||||
test: ["CMD", "mc", "ready", "local"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
minio-init-test:
|
||||
image: minio/mc:latest
|
||||
depends_on:
|
||||
minio-test:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
MINIO_ROOT_USER: minioadmin
|
||||
MINIO_ROOT_PASSWORD: minioadmin
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
mc alias set local http://minio-test:9000 $$MINIO_ROOT_USER $$MINIO_ROOT_PASSWORD &&
|
||||
mc mb --ignore-existing local/reactbin-test
|
||||
"
|
||||
|
||||
api-test:
|
||||
build:
|
||||
context: ./api
|
||||
environment:
|
||||
TEST_DATABASE_URL: postgresql+asyncpg://reactbin:reactbin@postgres-test:5432/reactbin_test
|
||||
DATABASE_URL: postgresql+asyncpg://reactbin:reactbin@postgres-test:5432/reactbin_test
|
||||
S3_ENDPOINT_URL: http://minio-test:9000
|
||||
S3_BUCKET_NAME: reactbin-test
|
||||
S3_ACCESS_KEY_ID: minioadmin
|
||||
S3_SECRET_ACCESS_KEY: minioadmin
|
||||
S3_REGION: us-east-1
|
||||
JWT_SECRET_KEY: test-secret-key-for-testing-only
|
||||
OWNER_USERNAME: testowner
|
||||
OWNER_PASSWORD: testpassword
|
||||
API_BASE_URL: http://localhost:8000
|
||||
MAX_UPLOAD_BYTES: "52428800"
|
||||
depends_on:
|
||||
postgres-test:
|
||||
condition: service_healthy
|
||||
minio-init-test:
|
||||
condition: service_completed_successfully
|
||||
command: ["python", "-m", "pytest", "tests/", "-v"]
|
||||
working_dir: /app
|
||||
```
|
||||
|
||||
### 3. .env.test.example
|
||||
|
||||
Documents the variables needed to run integration tests from the host (with postgres-test and minio-test already running via compose):
|
||||
|
||||
```bash
|
||||
# Integration test environment — used when running pytest directly on the host
|
||||
# Start test services first: docker compose -f docker-compose.test.yml up -d postgres-test minio-test minio-init-test
|
||||
|
||||
TEST_DATABASE_URL=postgresql+asyncpg://reactbin:reactbin@localhost:5433/reactbin_test
|
||||
DATABASE_URL=postgresql+asyncpg://reactbin:reactbin@localhost:5433/reactbin_test
|
||||
S3_ENDPOINT_URL=http://localhost:9002
|
||||
S3_BUCKET_NAME=reactbin-test
|
||||
S3_ACCESS_KEY_ID=minioadmin
|
||||
S3_SECRET_ACCESS_KEY=minioadmin
|
||||
S3_REGION=us-east-1
|
||||
JWT_SECRET_KEY=test-secret-key-for-testing-only
|
||||
OWNER_USERNAME=testowner
|
||||
OWNER_PASSWORD=testpassword
|
||||
API_BASE_URL=http://localhost:8000
|
||||
MAX_UPLOAD_BYTES=52428800
|
||||
```
|
||||
|
||||
### 4. Makefile
|
||||
|
||||
```makefile
|
||||
.PHONY: test-unit test-integration
|
||||
|
||||
test-unit:
|
||||
cd api && python -m pytest tests/unit/ -v
|
||||
|
||||
test-integration:
|
||||
docker compose -f docker-compose.test.yml run --rm api-test
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Phase Breakdown
|
||||
|
||||
### Phase 1: Guard (FR-001) — US1
|
||||
|
||||
- Write a failing test: run `pytest api/tests/integration/` with `TEST_DATABASE_URL=sqlite+aiosqlite:///test.db` — confirm it does NOT exit early (test that the guard is absent)
|
||||
- Add `pytest_configure` guard to `api/tests/integration/conftest.py`
|
||||
- Verify: running with SQLite URL now exits immediately with the correct message
|
||||
- Verify: running with a PostgreSQL URL proceeds normally
|
||||
|
||||
### Phase 2: Docker Compose test stack (FR-002, FR-003) — US2
|
||||
|
||||
- Write `docker-compose.test.yml` with `postgres-test`, `minio-test`, `minio-init-test`, `api-test`
|
||||
- Run `docker compose -f docker-compose.test.yml run --rm api-test` — all tests pass
|
||||
- Confirm dev stack (port 5432, 9000) is unaffected
|
||||
|
||||
### Phase 3: Documentation (FR-004, FR-005) — US3
|
||||
|
||||
- Write `.env.test.example`
|
||||
- Write `Makefile` with `test-unit` and `test-integration`
|
||||
- Verify `make test-unit` runs unit tests without Docker
|
||||
- Verify `make test-integration` invokes the compose command
|
||||
|
||||
### Phase 4: Polish
|
||||
|
||||
- `ruff check api/app/ api/tests/` — zero violations
|
||||
- `ng lint` is unaffected (no UI changes)
|
||||
|
||||
---
|
||||
|
||||
## No data model or API contracts
|
||||
|
||||
This feature touches only developer tooling. No new API endpoints, database schema changes, or UI components.
|
||||
38
specs/008-postgres-integration-tests/quickstart.md
Normal file
38
specs/008-postgres-integration-tests/quickstart.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# Quickstart: Integration Test Infrastructure
|
||||
|
||||
## Run the full integration test suite (Docker, recommended)
|
||||
|
||||
```bash
|
||||
docker compose -f docker-compose.test.yml run --rm api-test
|
||||
```
|
||||
|
||||
Test services start automatically. The command exits with pytest's return code.
|
||||
|
||||
## Run unit tests only (no Docker required)
|
||||
|
||||
```bash
|
||||
make test-unit
|
||||
# or directly:
|
||||
cd api && python -m pytest tests/unit/ -v
|
||||
```
|
||||
|
||||
## Run integration tests from the host (test services must be running)
|
||||
|
||||
```bash
|
||||
# Start test services
|
||||
docker compose -f docker-compose.test.yml up -d postgres-test minio-test minio-init-test
|
||||
|
||||
# Copy and source test env vars
|
||||
cp .env.test.example .env.test
|
||||
export $(cat .env.test | xargs)
|
||||
|
||||
# Run tests
|
||||
cd api && python -m pytest tests/integration/ -v
|
||||
```
|
||||
|
||||
## Validate the guard works
|
||||
|
||||
```bash
|
||||
TEST_DATABASE_URL=sqlite+aiosqlite:///test.db python -m pytest api/tests/integration/
|
||||
# Expected: exits immediately with "Integration tests require postgresql+asyncpg://"
|
||||
```
|
||||
55
specs/008-postgres-integration-tests/research.md
Normal file
55
specs/008-postgres-integration-tests/research.md
Normal file
@@ -0,0 +1,55 @@
|
||||
# Research: PostgreSQL Integration Test Infrastructure
|
||||
|
||||
## Decision 1: How to enforce the PostgreSQL dialect in conftest.py
|
||||
|
||||
**Decision**: Add a `pytest_configure` hook (or a module-level guard in `conftest.py`) that calls `pytest.exit()` if the resolved database URL does not start with `postgresql+asyncpg://`.
|
||||
|
||||
**Rationale**: `pytest_configure` runs before collection, giving the clearest possible failure signal. A module-level assertion would also work but produces a less readable traceback. `pytest.exit()` with a human-readable message is the idiomatic approach.
|
||||
|
||||
**Alternatives considered**:
|
||||
- A custom pytest plugin in a separate file — unnecessary complexity for a one-liner guard.
|
||||
- Raising an exception in the `engine` fixture — runs too late (after collection); developers see confusing fixture errors instead of a clear message.
|
||||
|
||||
---
|
||||
|
||||
## Decision 2: Separate docker-compose.test.yml vs profiles in docker-compose.yml
|
||||
|
||||
**Decision**: Use a standalone `docker-compose.test.yml` at the repo root.
|
||||
|
||||
**Rationale**: Docker Compose profiles require the developer to remember `--profile test` on every command. A separate file is explicit and self-contained. The test file can define its own service names and ports without touching the dev compose file at all.
|
||||
|
||||
**Alternatives considered**:
|
||||
- `docker-compose.yml` with a `test` profile — profile discovery is non-obvious; modifying the dev file risks breaking the dev stack.
|
||||
- A `docker-compose.override.yml` — override files apply automatically to `docker compose up`, which is the opposite of what we want for tests.
|
||||
|
||||
---
|
||||
|
||||
## Decision 3: Port assignments for test services
|
||||
|
||||
**Decision**:
|
||||
- `postgres-test`: host port 5433 (standard offset from dev 5432)
|
||||
- `minio-test` API: host port 9002 (offset from dev 9000)
|
||||
- `minio-test` console: host port 9003 (offset from dev 9001)
|
||||
|
||||
**Rationale**: Predictable offsets make it easy to remember. Developers running both stacks simultaneously won't hit port conflicts.
|
||||
|
||||
---
|
||||
|
||||
## Decision 4: S3 isolation strategy for tests
|
||||
|
||||
**Decision**: The `api-test` service sets `S3_BUCKET_NAME=reactbin-test` pointing to the dedicated `minio-test` instance. The `minio-init-test` sidecar creates that bucket before tests run.
|
||||
|
||||
**Rationale**: The existing conftest already manages database isolation via `create_all` / `drop_all`. MinIO requires bucket pre-creation (same as dev). A dedicated test bucket on a dedicated test MinIO instance gives full isolation. No changes to application storage code are needed.
|
||||
|
||||
---
|
||||
|
||||
## Decision 5: Makefile vs shell scripts
|
||||
|
||||
**Decision**: A `Makefile` at the repo root with `test-unit` and `test-integration` targets.
|
||||
|
||||
**Rationale**: `make` is universally available on Linux/macOS developer machines. The targets are short wrappers that document the canonical test invocation. No build logic; just convenience aliases.
|
||||
|
||||
**Alternatives considered**:
|
||||
- Shell scripts (`scripts/test.sh`) — no discoverability; `make help` is more ergonomic.
|
||||
- `package.json` scripts — wrong tool for a Python/Docker project.
|
||||
- `justfile` — not universally installed.
|
||||
95
specs/008-postgres-integration-tests/spec.md
Normal file
95
specs/008-postgres-integration-tests/spec.md
Normal file
@@ -0,0 +1,95 @@
|
||||
# Feature Specification: PostgreSQL Integration Test Infrastructure
|
||||
|
||||
**Feature Branch**: `008-postgres-integration-tests`
|
||||
**Created**: 2026-05-06
|
||||
**Status**: Draft
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Integration tests currently permit any SQLAlchemy-compatible database URL, including SQLite. This allowed a real production bug (incorrect `HAVING` without `GROUP BY`) to ship undetected because SQLite's permissive dialect did not reject it. The project constitution (§2.5, §5.2 v1.3.0) now explicitly mandates PostgreSQL for integration tests. This feature enforces that mandate with infrastructure and guardrails.
|
||||
|
||||
---
|
||||
|
||||
## User Scenarios & Testing
|
||||
|
||||
### User Story 1 — Integration tests are enforced to run against PostgreSQL (Priority: P1)
|
||||
|
||||
A developer running `pytest` against a non-PostgreSQL database URL receives an immediate, descriptive failure before any test runs.
|
||||
|
||||
**Why this priority**: Directly addresses the production bug that prompted this feature. Without this, the constitution mandate has no teeth.
|
||||
|
||||
**Independent Test**: Set `TEST_DATABASE_URL=sqlite+aiosqlite:///test.db` and run `pytest api/tests/integration/`. Confirm pytest exits immediately with a message identifying the dialect problem and naming the required scheme.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** `TEST_DATABASE_URL` is set to a SQLite URL, **When** `pytest api/tests/integration/` is invoked, **Then** pytest exits before collecting any test with an error: `Integration tests require postgresql+asyncpg://`.
|
||||
2. **Given** `DATABASE_URL` is unset and `TEST_DATABASE_URL` is unset, **When** pytest is invoked, **Then** pytest exits with a clear message about the missing database URL.
|
||||
3. **Given** `TEST_DATABASE_URL` is a valid `postgresql+asyncpg://` URL, **When** pytest is invoked, **Then** tests collect and run normally.
|
||||
|
||||
---
|
||||
|
||||
### User Story 2 — One-command integration test run against isolated services (Priority: P1)
|
||||
|
||||
A developer can run the entire integration test suite against dedicated, isolated PostgreSQL and MinIO instances with a single command.
|
||||
|
||||
**Why this priority**: Without this, the PostgreSQL requirement is mandated but impractical — developers have no easy way to satisfy it.
|
||||
|
||||
**Independent Test**: From the repo root with Docker available, run `docker compose -f docker-compose.test.yml run --rm api-test`. Confirm all integration tests pass, test containers start and stop cleanly, and dev database/bucket are untouched.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** Docker is running and dev services are stopped, **When** the test command is run, **Then** isolated `postgres-test` and `minio-test` services start, all tests run against them, and the command exits with pytest's return code.
|
||||
2. **Given** dev services are running on their normal ports, **When** the test command is run, **Then** test services use different ports (5433, 9002/9003) and do not interfere with the dev stack.
|
||||
3. **Given** any test data is written during the run, **When** the test run completes, **Then** all test schema is dropped (conftest teardown is unchanged).
|
||||
|
||||
---
|
||||
|
||||
### User Story 3 — Test infrastructure is documented (Priority: P2)
|
||||
|
||||
A developer new to the project can understand how to run unit tests vs integration tests without reading the source code.
|
||||
|
||||
**Independent Test**: Read `.env.test.example` and `Makefile`. Confirm all required environment variables are documented and `make test-unit` / `make test-integration` targets are present.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** a fresh clone, **When** the developer reads `.env.test.example`, **Then** they see every variable needed to run integration tests outside Docker, with example values.
|
||||
2. **Given** the Makefile, **When** the developer runs `make test-unit`, **Then** the pytest unit suite runs without requiring Docker.
|
||||
3. **Given** the Makefile, **When** the developer runs `make test-integration`, **Then** the Docker Compose test command runs.
|
||||
|
||||
---
|
||||
|
||||
### Edge Cases
|
||||
|
||||
- What if `TEST_DATABASE_URL` is set but malformed? — The guard should still catch a non-PostgreSQL scheme; asyncpg will raise its own error for a malformed URL.
|
||||
- What if Docker is not available? — `make test-integration` fails at the Docker level with Docker's own error; the Makefile does not need to guard for this.
|
||||
- What if the test PostgreSQL port (5433) is already in use? — Standard Docker port conflict error; no special handling needed.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
### Functional Requirements
|
||||
|
||||
- **FR-001**: `conftest.py` MUST assert the resolved database URL starts with `postgresql+asyncpg://` and call `pytest.exit()` with a descriptive message before any test collects.
|
||||
- **FR-002**: A `docker-compose.test.yml` MUST define isolated `postgres-test` (port 5433) and `minio-test` (ports 9002/9003) services and an `api-test` runner service.
|
||||
- **FR-003**: The `api-test` service MUST set `TEST_DATABASE_URL` pointing to `postgres-test` and all S3 env vars pointing to `minio-test`.
|
||||
- **FR-004**: A `.env.test.example` MUST document all environment variables required to run integration tests outside Docker.
|
||||
- **FR-005**: A `Makefile` MUST provide `test-unit` and `test-integration` targets.
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- **SC-001**: Running `pytest api/tests/integration/` with a SQLite URL exits in under 2 seconds with a clear error message — no tests run.
|
||||
- **SC-002**: `docker compose -f docker-compose.test.yml run --rm api-test` completes successfully with all integration tests passing.
|
||||
- **SC-003**: Dev services (postgres on 5432, minio on 9000) are unaffected when the test command runs.
|
||||
|
||||
---
|
||||
|
||||
## Assumptions
|
||||
|
||||
- Docker Compose v2 (`docker compose`) is available in the developer environment.
|
||||
- The existing `conftest.py` `engine` fixture (session-scoped `create_all` / `drop_all`) continues to handle schema lifecycle; no per-test transaction rollback mechanism is introduced.
|
||||
- CI/CD pipeline configuration is out of scope for this feature.
|
||||
113
specs/008-postgres-integration-tests/tasks.md
Normal file
113
specs/008-postgres-integration-tests/tasks.md
Normal file
@@ -0,0 +1,113 @@
|
||||
# Tasks: PostgreSQL Integration Test Infrastructure
|
||||
|
||||
**Input**: Design documents from `specs/008-postgres-integration-tests/`
|
||||
**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, quickstart.md ✅
|
||||
|
||||
**Tests**: TDD is non-negotiable (§5.1). For infrastructure tasks the "failing test" is a verification step that confirms the thing being built is absent before building it, then confirms it works after. Every user story has an explicit TDD red step before its implementation task.
|
||||
|
||||
**Organization**: No foundational blocking phase — all three user stories touch independent files and can proceed in order.
|
||||
|
||||
## Format: `[ID] [P?] [Story] Description`
|
||||
|
||||
- **[P]**: Can run in parallel with other [P] tasks in the same phase
|
||||
- **[Story]**: Which user story this task belongs to
|
||||
- Exact file paths included in every task description
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Setup
|
||||
|
||||
No new project structure required. The existing layout accommodates all changes.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: User Story 1 — Dialect guard in conftest (Priority: P1) 🎯 MVP
|
||||
|
||||
**Goal**: `pytest api/tests/integration/` exits immediately with a clear message if the database URL is not `postgresql+asyncpg://`.
|
||||
|
||||
**Independent Test**: Run `TEST_DATABASE_URL=sqlite+aiosqlite:///test.db python -m pytest api/tests/integration/ -q` — command exits in < 2 s with the error message `Integration tests require postgresql+asyncpg://` and no tests are collected.
|
||||
|
||||
- [X] T001 [US1] Confirm guard is absent (TDD red): from `api/`, run `TEST_DATABASE_URL=sqlite+aiosqlite:///test.db python -m pytest tests/integration/ -q --co 2>&1 | head -20` — observe that tests ARE collected and note the count (guard not yet in place)
|
||||
- [X] T002 [US1] Add `pytest_configure` hook to `api/tests/integration/conftest.py` — resolve URL via `os.getenv("TEST_DATABASE_URL") or os.getenv("DATABASE_URL", "")`, call `pytest.exit("Integration tests require postgresql+asyncpg://...", returncode=1)` if URL does not start with `postgresql+asyncpg://`; place hook before any imports that depend on the database URL
|
||||
- [X] T003 [US1] Verify guard works (TDD green): run `TEST_DATABASE_URL=sqlite+aiosqlite:///test.db python -m pytest api/tests/integration/ -q` — confirm immediate exit with the correct error message and zero tests collected; also confirm a valid `postgresql+asyncpg://` URL does not trigger the guard
|
||||
|
||||
**Checkpoint**: Dialect-mismatched test runs are blocked before any test collects.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: User Story 2 — Docker Compose test stack (Priority: P1)
|
||||
|
||||
**Goal**: `docker compose -f docker-compose.test.yml run --rm api-test` runs the full integration suite against isolated PostgreSQL and MinIO services on different ports than the dev stack.
|
||||
|
||||
**Independent Test**: Run `docker compose -f docker-compose.test.yml run --rm api-test` from the repo root — all tests pass; verify `docker compose ps` shows dev services (if running) are unaffected on their original ports.
|
||||
|
||||
- [X] T004 [US2] Confirm compose file is absent (TDD red): run `test -f docker-compose.test.yml && echo EXISTS || echo ABSENT` — confirm output is `ABSENT`
|
||||
- [X] T005 [US2] Create `docker-compose.test.yml` at the repo root with four services: `postgres-test` (image `postgres:16-alpine`, host port 5433, db `reactbin_test`), `minio-test` (image `minio/minio:latest`, host ports 9002/9003), `minio-init-test` (creates bucket `reactbin-test`, depends on `minio-test` healthy), and `api-test` (builds from `./api`, runs `python -m pytest tests/ -v`, depends on `postgres-test` healthy and `minio-init-test` completed, environment sets `TEST_DATABASE_URL=postgresql+asyncpg://reactbin:reactbin@postgres-test:5432/reactbin_test`, `DATABASE_URL` to same value, and all S3 vars pointing to `minio-test:9000` with bucket `reactbin-test`) — follow exact design in `specs/008-postgres-integration-tests/plan.md`
|
||||
- [X] T006 [US2] Verify compose stack (TDD green): run `docker compose -f docker-compose.test.yml run --rm api-test` — confirm all integration tests pass; confirm no errors about missing env vars or connection failures
|
||||
|
||||
**Checkpoint**: Full integration suite runs against real PostgreSQL via one command.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: User Story 3 — Test documentation (Priority: P2)
|
||||
|
||||
**Goal**: `.env.test.example` and `Makefile` document how to run both test tiers.
|
||||
|
||||
**Independent Test**: Read `.env.test.example` — all variables needed for integration tests are present with example values. Run `make test-unit` — pytest unit suite runs without Docker and passes.
|
||||
|
||||
- [X] T007 [P] [US3] Create `.env.test.example` at the repo root documenting all variables required to run integration tests outside Docker: `TEST_DATABASE_URL`, `DATABASE_URL`, `S3_ENDPOINT_URL`, `S3_BUCKET_NAME`, `S3_ACCESS_KEY_ID`, `S3_SECRET_ACCESS_KEY`, `S3_REGION`, `JWT_SECRET_KEY`, `OWNER_USERNAME`, `OWNER_PASSWORD`, `API_BASE_URL`, `MAX_UPLOAD_BYTES` — with example values pointing to `localhost:5433` and `localhost:9002` (test service ports); include a comment explaining how to start test services first — follow exact design in `specs/008-postgres-integration-tests/plan.md`
|
||||
- [X] T008 [P] [US3] Create `Makefile` at the repo root with `.PHONY: test-unit test-integration`, `test-unit` target running `cd api && python -m pytest tests/unit/ -v`, and `test-integration` target running `docker compose -f docker-compose.test.yml run --rm api-test`
|
||||
- [X] T009 [US3] Verify `make test-unit` — unit tests pass without Docker (validates the Makefile target and confirms unit tests have no Docker dependency)
|
||||
- [X] T010 Verify `make test-integration` — Docker integration suite passes end-to-end (cross-story verification: exercises the US2 compose stack via the US3 Makefile target)
|
||||
|
||||
**Checkpoint**: All three user stories independently functional.
|
||||
|
||||
---
|
||||
|
||||
## Phase 5: Polish & Cross-Cutting Concerns
|
||||
|
||||
- [X] T011 Run `ruff check api/app/ api/tests/` — zero violations (conftest change must pass ruff; fix any issues)
|
||||
|
||||
---
|
||||
|
||||
## Dependencies & Execution Order
|
||||
|
||||
### Phase Dependencies
|
||||
|
||||
- **Phase 2 (US1)**: No external dependencies — can start immediately
|
||||
- **Phase 3 (US2)**: Depends on Phase 2 (guard must be in place so the compose stack run exercises it)
|
||||
- **Phase 4 (US3)**: T007 and T008 are independent file writes (can run in parallel with each other after Phase 3); T009 requires T008; T010 requires T008 and T006
|
||||
- **Phase 5 (Polish)**: Depends on all prior phases
|
||||
|
||||
### Within Phase 4
|
||||
|
||||
- T007 ∥ T008 (different files, no dependency)
|
||||
- T009 after T008 (Makefile must exist)
|
||||
- T010 after T008 and T006 (requires both Makefile and compose stack)
|
||||
|
||||
### Execution Order Summary
|
||||
|
||||
```
|
||||
Step 1: T001, T002, T003 (sequential — TDD for guard)
|
||||
Step 2: T004, T005, T006 (sequential — TDD for compose stack)
|
||||
Step 3 (parallel): T007, T008
|
||||
Step 4: T009 (after T008), T010 (after T008 + T006)
|
||||
Step 5: T011
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Strategy
|
||||
|
||||
### MVP (US1 — the guard)
|
||||
|
||||
1. Complete T001–T003
|
||||
2. **Validate**: SQLite URL is blocked; PostgreSQL URL proceeds
|
||||
3. US2 and US3 add the infrastructure to make the mandate practical
|
||||
|
||||
### Incremental Delivery
|
||||
|
||||
- After Phase 2: Dialect bugs are caught immediately — core safety net is in place
|
||||
- After Phase 3: Full integration suite runs against PostgreSQL via one Docker command
|
||||
- After Phase 4: Both test tiers are documented and accessible via `make`
|
||||
- After Phase 5: Lint clean, ready for merge
|
||||
34
specs/009-login-rate-limiting/checklists/requirements.md
Normal file
34
specs/009-login-rate-limiting/checklists/requirements.md
Normal file
@@ -0,0 +1,34 @@
|
||||
# Specification Quality Checklist: Login Brute-Force Protection
|
||||
|
||||
**Purpose**: Validate specification completeness and quality before proceeding to planning
|
||||
**Created**: 2026-05-06
|
||||
**Feature**: [spec.md](../spec.md)
|
||||
|
||||
## Content Quality
|
||||
|
||||
- [X] No implementation details (languages, frameworks, APIs)
|
||||
- [X] Focused on user value and business needs
|
||||
- [X] Written for non-technical stakeholders
|
||||
- [X] All mandatory sections completed
|
||||
|
||||
## Requirement Completeness
|
||||
|
||||
- [X] No [NEEDS CLARIFICATION] markers remain
|
||||
- [X] Requirements are testable and unambiguous
|
||||
- [X] Success criteria are measurable
|
||||
- [X] Success criteria are technology-agnostic (no implementation details)
|
||||
- [X] All acceptance scenarios are defined
|
||||
- [X] Edge cases are identified
|
||||
- [X] Scope is clearly bounded
|
||||
- [X] Dependencies and assumptions identified
|
||||
|
||||
## Feature Readiness
|
||||
|
||||
- [X] All functional requirements have clear acceptance criteria
|
||||
- [X] User scenarios cover primary flows
|
||||
- [X] Feature meets measurable outcomes defined in Success Criteria
|
||||
- [X] No implementation details leak into specification
|
||||
|
||||
## Notes
|
||||
|
||||
- All items pass. Spec is ready for `/speckit-plan`.
|
||||
85
specs/009-login-rate-limiting/contracts/auth.md
Normal file
85
specs/009-login-rate-limiting/contracts/auth.md
Normal file
@@ -0,0 +1,85 @@
|
||||
# API Contract: Authentication
|
||||
|
||||
## POST /api/v1/auth/token
|
||||
|
||||
Authenticates the owner and returns a JWT access token.
|
||||
|
||||
**This endpoint is modified by feature 009** to enforce brute-force protection.
|
||||
All previous behaviour is preserved. One new response code (429) is added.
|
||||
|
||||
### Request
|
||||
|
||||
```
|
||||
POST /api/v1/auth/token
|
||||
Content-Type: application/json
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"username": "string",
|
||||
"password": "string"
|
||||
}
|
||||
```
|
||||
|
||||
### Responses
|
||||
|
||||
#### 200 OK — Credentials accepted
|
||||
|
||||
```json
|
||||
{
|
||||
"access_token": "<jwt>",
|
||||
"token_type": "bearer",
|
||||
"expires_in": 86400
|
||||
}
|
||||
```
|
||||
|
||||
Side effect: resets the failure counter for the caller's IP address.
|
||||
|
||||
---
|
||||
|
||||
#### 401 Unauthorized — Credentials rejected
|
||||
|
||||
```json
|
||||
{
|
||||
"detail": "Invalid credentials",
|
||||
"code": "invalid_credentials"
|
||||
}
|
||||
```
|
||||
|
||||
Side effect: increments the failure counter for the caller's IP address. If the
|
||||
counter reaches `LOGIN_MAX_FAILURES`, subsequent requests from this IP will receive
|
||||
429 until the cooldown expires.
|
||||
|
||||
---
|
||||
|
||||
#### 429 Too Many Requests — Source blocked after repeated failures
|
||||
|
||||
**This response is new in feature 009.**
|
||||
|
||||
```
|
||||
HTTP/1.1 429 Too Many Requests
|
||||
Retry-After: 900
|
||||
Content-Type: application/json
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"detail": "Too many failed login attempts. Please try again later.",
|
||||
"code": "login_rate_limited"
|
||||
}
|
||||
```
|
||||
|
||||
The `Retry-After` header value is the configured cooldown duration in seconds (default: 900).
|
||||
It reflects the maximum possible wait, not the exact remaining lockout time.
|
||||
|
||||
No credentials are verified when this response is returned — the request is
|
||||
rejected before authentication is attempted.
|
||||
|
||||
---
|
||||
|
||||
### Notes
|
||||
|
||||
- The failure counter is per source IP address (TCP peer, not forwarded headers).
|
||||
- Threshold values (`LOGIN_MAX_FAILURES`, `LOGIN_WINDOW_SECONDS`, `LOGIN_COOLDOWN_SECONDS`)
|
||||
are not disclosed in any response.
|
||||
- Counters are in-memory and reset on process restart.
|
||||
53
specs/009-login-rate-limiting/data-model.md
Normal file
53
specs/009-login-rate-limiting/data-model.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# Data Model: Login Brute-Force Protection
|
||||
|
||||
## Overview
|
||||
|
||||
This feature introduces no new database tables. The only data entity is a transient,
|
||||
in-memory rate-limit record that does not survive process restarts. This is intentional
|
||||
(see research.md Decision 3).
|
||||
|
||||
---
|
||||
|
||||
## Entity: Rate-Limit Record (in-memory only)
|
||||
|
||||
| Field | Type | Description |
|
||||
|----------------|---------|-----------------------------------------------------------------------------|
|
||||
| `failures` | int | Count of consecutive failed login attempts in the current window |
|
||||
| `window_start` | float | Unix timestamp marking when the current counting window began |
|
||||
| `blocked_until`| float | Unix timestamp after which the source is no longer blocked; 0.0 if not blocked |
|
||||
|
||||
**Keyed by**: resolved client IP address string (e.g., `"192.168.1.1"`); see `get_client_ip()` in `rate_limiter.py` for resolution logic
|
||||
|
||||
**Lifecycle**:
|
||||
1. Record is created on the first failed login from a source.
|
||||
2. `failures` increments on each subsequent failure within the window.
|
||||
3. When `failures >= LOGIN_MAX_FAILURES`, `blocked_until` is set to `now + LOGIN_COOLDOWN_SECONDS`.
|
||||
4. When `blocked_until` has passed, the record is deleted on the next request from that source.
|
||||
5. A successful login deletes the record immediately (failure counter reset).
|
||||
6. If `now - window_start > LOGIN_WINDOW_SECONDS` without triggering lockout, the counter resets within the existing record.
|
||||
|
||||
**State machine**:
|
||||
|
||||
```
|
||||
[no record]
|
||||
│ first failure
|
||||
▼
|
||||
[tracking] ──── failure N ≥ max ────► [blocked]
|
||||
│ │
|
||||
│ success / window expires │ cooldown expires
|
||||
▼ ▼
|
||||
[no record] ◄─────────────────────── [no record]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Configuration Entity: Rate-Limit Settings
|
||||
|
||||
Stored as environment variables; loaded via `app.config.Settings`:
|
||||
|
||||
| Env Var | Default | Description |
|
||||
|----------------------------|---------|----------------------------------------------------------|
|
||||
| `LOGIN_MAX_FAILURES` | `5` | Failures within window before lockout |
|
||||
| `LOGIN_WINDOW_SECONDS` | `300` | Rolling window duration in seconds (5 minutes) |
|
||||
| `LOGIN_COOLDOWN_SECONDS` | `900` | Lockout duration in seconds after threshold exceeded (15 minutes) |
|
||||
| `LOGIN_TRUSTED_PROXY_IPS` | `""` | Comma-separated IPs/CIDRs of trusted upstream proxies (e.g., `10.0.0.0/8`); empty = disabled |
|
||||
388
specs/009-login-rate-limiting/plan.md
Normal file
388
specs/009-login-rate-limiting/plan.md
Normal file
@@ -0,0 +1,388 @@
|
||||
# Implementation Plan: Login Brute-Force Protection
|
||||
|
||||
**Branch**: `009-login-rate-limiting` | **Date**: 2026-05-06 | **Spec**: [spec.md](spec.md)
|
||||
**Input**: Feature specification from `specs/009-login-rate-limiting/spec.md`
|
||||
|
||||
## Summary
|
||||
|
||||
Add failure-counting brute-force protection to the login endpoint (`POST /api/v1/auth/token`).
|
||||
After a configurable number of consecutive failed attempts from the same resolved client IP,
|
||||
the endpoint returns HTTP 429 with a `Retry-After` header for a configurable cooldown period.
|
||||
A successful login resets the counter. All thresholds are configurable via environment variables.
|
||||
When deployed behind a reverse proxy (nginx, Kubernetes ingress), a `LOGIN_TRUSTED_PROXY_IPS`
|
||||
setting enables extraction of the real client IP from `X-Forwarded-For`. No new infrastructure
|
||||
(no Redis, no new DB table) — counters live in process memory.
|
||||
|
||||
---
|
||||
|
||||
## Technical Context
|
||||
|
||||
**Language/Version**: Python 3.12+
|
||||
**Primary Dependencies**: FastAPI, pydantic-settings (already in use); no new dependencies added
|
||||
**Storage**: In-memory `dict` (no persistence across restarts — intentional)
|
||||
**Testing**: pytest + pytest-asyncio (existing test infrastructure)
|
||||
**Target Platform**: Linux server (Docker)
|
||||
**Project Type**: Web service (API only — this feature has no UI surface)
|
||||
**Performance Goals**: Rate limiter adds negligible overhead (dict lookup + lock acquisition; sub-millisecond)
|
||||
**Constraints**: Must not add new runtime service dependencies; must not change any auth behaviour for non-blocked sources
|
||||
**Scale/Scope**: Single process, single user; in-memory store is sufficient
|
||||
|
||||
---
|
||||
|
||||
## Constitution Check
|
||||
|
||||
| Principle | Status | Notes |
|
||||
|-----------|--------|-------|
|
||||
| §2.4 Auth abstraction (AuthProvider interface) | ✅ Pass | Rate limiter is a guard *before* `JWTAuthProvider.verify_credentials()`, not a bypass of the interface |
|
||||
| §2.5 DB abstraction (repository layer) | ✅ Pass | No database access; in-memory only |
|
||||
| §2.6 No speculative abstraction | ✅ Pass | Concrete `LoginRateLimiter` class, no interface; only one implementation planned |
|
||||
| §3.3 Error envelope (`detail` + `code`) | ✅ Pass | 429 response uses `{"detail": "...", "code": "login_rate_limited"}` |
|
||||
| §5.1 TDD | ✅ Required | Tasks follow red → green order |
|
||||
| §5.2 Integration tests against PostgreSQL | ✅ Pass | Integration test for the login endpoint will run against the Docker PostgreSQL stack |
|
||||
| §7.2 Environment configuration | ✅ Pass | `LOGIN_MAX_FAILURES`, `LOGIN_WINDOW_SECONDS`, `LOGIN_COOLDOWN_SECONDS`, `LOGIN_TRUSTED_PROXY_IPS` from env vars |
|
||||
| §7.3 Linting (ruff) | ✅ Required | All new files must pass `ruff check` |
|
||||
|
||||
**Gate result**: No violations. Cleared to proceed.
|
||||
|
||||
---
|
||||
|
||||
## Project Structure
|
||||
|
||||
### Documentation (this feature)
|
||||
|
||||
```text
|
||||
specs/009-login-rate-limiting/
|
||||
├── plan.md ← this file
|
||||
├── research.md ← decisions on approach
|
||||
├── data-model.md ← rate-limit record entity
|
||||
├── quickstart.md ← curl runbook
|
||||
├── contracts/
|
||||
│ └── auth.md ← updated POST /api/v1/auth/token with 429
|
||||
└── tasks.md ← generated by /speckit-tasks
|
||||
```
|
||||
|
||||
### Source Code Changes
|
||||
|
||||
```text
|
||||
api/
|
||||
├── app/
|
||||
│ ├── auth/
|
||||
│ │ ├── rate_limiter.py ← NEW: LoginRateLimiter class
|
||||
│ │ ├── jwt_provider.py (unchanged)
|
||||
│ │ ├── noop.py (unchanged)
|
||||
│ │ └── provider.py (unchanged)
|
||||
│ ├── config.py ← add login_max_failures, login_window_seconds, login_cooldown_seconds, login_trusted_proxy_ips
|
||||
│ ├── main.py ← init LoginRateLimiter in lifespan, attach to app.state
|
||||
│ └── routers/
|
||||
│ └── auth.py ← check rate limit before auth, record outcome
|
||||
└── tests/
|
||||
├── unit/
|
||||
│ └── test_rate_limiter.py ← NEW: unit tests for LoginRateLimiter logic
|
||||
└── integration/
|
||||
└── test_login_rate_limit.py ← NEW: integration tests for 429 behaviour via HTTP
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Detail
|
||||
|
||||
### `api/app/auth/rate_limiter.py`
|
||||
|
||||
```python
|
||||
import ipaddress
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from ipaddress import IPv4Network, IPv6Network
|
||||
from threading import Lock
|
||||
|
||||
from starlette.requests import Request
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_client_ip(
|
||||
request: Request,
|
||||
trusted_networks: list[IPv4Network | IPv6Network],
|
||||
) -> str:
|
||||
"""Return the resolved client IP, honouring X-Forwarded-For when the
|
||||
TCP peer is a trusted upstream proxy. Falls back to the TCP peer address
|
||||
when no trusted networks are configured or the peer is not in the list."""
|
||||
peer = request.client.host if request.client else "unknown"
|
||||
if trusted_networks and peer != "unknown":
|
||||
try:
|
||||
peer_addr = ipaddress.ip_address(peer)
|
||||
if any(peer_addr in net for net in trusted_networks):
|
||||
xff = request.headers.get("X-Forwarded-For", "").split(",")[0].strip()
|
||||
if xff:
|
||||
return xff
|
||||
real_ip = request.headers.get("X-Real-IP", "").strip()
|
||||
if real_ip:
|
||||
return real_ip
|
||||
except ValueError:
|
||||
pass
|
||||
return peer
|
||||
|
||||
|
||||
@dataclass
|
||||
class _Record:
|
||||
failures: int = 0
|
||||
window_start: float = field(default_factory=time.time)
|
||||
blocked_until: float = 0.0
|
||||
|
||||
|
||||
class LoginRateLimiter:
|
||||
def __init__(
|
||||
self,
|
||||
max_failures: int = 5,
|
||||
window_seconds: int = 300,
|
||||
cooldown_seconds: int = 900,
|
||||
) -> None:
|
||||
self._max = max_failures
|
||||
self._window = window_seconds
|
||||
self._cooldown = cooldown_seconds
|
||||
self._store: dict[str, _Record] = {}
|
||||
self._lock = Lock()
|
||||
|
||||
@property
|
||||
def cooldown_seconds(self) -> int:
|
||||
return self._cooldown
|
||||
|
||||
def is_blocked(self, ip: str) -> bool:
|
||||
now = time.time()
|
||||
with self._lock:
|
||||
rec = self._store.get(ip)
|
||||
if rec is None:
|
||||
return False
|
||||
if rec.blocked_until > now:
|
||||
return True
|
||||
if rec.blocked_until > 0:
|
||||
del self._store[ip]
|
||||
return False
|
||||
|
||||
def record_failure(self, ip: str) -> None:
|
||||
now = time.time()
|
||||
with self._lock:
|
||||
rec = self._store.get(ip)
|
||||
if rec is None:
|
||||
rec = _Record(window_start=now)
|
||||
self._store[ip] = rec
|
||||
if now - rec.window_start > self._window:
|
||||
rec.failures = 0
|
||||
rec.window_start = now
|
||||
rec.failures += 1
|
||||
if rec.failures >= self._max:
|
||||
rec.blocked_until = now + self._cooldown
|
||||
logger.warning(
|
||||
"Login blocked for %s after %d failures", ip, rec.failures
|
||||
)
|
||||
|
||||
def record_success(self, ip: str) -> None:
|
||||
with self._lock:
|
||||
self._store.pop(ip, None)
|
||||
```
|
||||
|
||||
### `api/app/config.py` additions
|
||||
|
||||
```python
|
||||
login_max_failures: int = 5
|
||||
login_window_seconds: int = 300
|
||||
login_cooldown_seconds: int = 900
|
||||
login_trusted_proxy_ips: str = "" # comma-separated IPs/CIDRs; empty = disabled
|
||||
```
|
||||
|
||||
### `api/app/main.py` lifespan update
|
||||
|
||||
```python
|
||||
import ipaddress
|
||||
|
||||
from app.auth.rate_limiter import LoginRateLimiter
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(application: FastAPI):
|
||||
settings = get_settings()
|
||||
application.state.login_rate_limiter = LoginRateLimiter(
|
||||
max_failures=settings.login_max_failures,
|
||||
window_seconds=settings.login_window_seconds,
|
||||
cooldown_seconds=settings.login_cooldown_seconds,
|
||||
)
|
||||
trusted_networks = []
|
||||
for part in settings.login_trusted_proxy_ips.split(","):
|
||||
part = part.strip()
|
||||
if part:
|
||||
try:
|
||||
trusted_networks.append(ipaddress.ip_network(part, strict=False))
|
||||
except ValueError:
|
||||
pass # invalid entry — skip silently
|
||||
application.state.login_trusted_networks = trusted_networks
|
||||
# ... existing DB setup unchanged
|
||||
engine = get_engine()
|
||||
async with engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
yield
|
||||
await engine.dispose()
|
||||
```
|
||||
|
||||
### `api/app/routers/auth.py` update
|
||||
|
||||
```python
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.auth.jwt_provider import JWTAuthProvider
|
||||
from app.auth.rate_limiter import LoginRateLimiter, get_client_ip
|
||||
from app.dependencies import get_jwt_auth
|
||||
|
||||
router = APIRouter(tags=["auth"])
|
||||
|
||||
|
||||
class LoginRequest(BaseModel):
|
||||
username: str
|
||||
password: str
|
||||
|
||||
|
||||
class TokenResponse(BaseModel):
|
||||
access_token: str
|
||||
token_type: str = "bearer"
|
||||
expires_in: int
|
||||
|
||||
|
||||
@router.post("/auth/token", response_model=TokenResponse)
|
||||
async def login(
|
||||
request: Request,
|
||||
body: LoginRequest,
|
||||
auth: JWTAuthProvider = Depends(get_jwt_auth),
|
||||
):
|
||||
limiter: LoginRateLimiter = request.app.state.login_rate_limiter
|
||||
ip: str = get_client_ip(request, request.app.state.login_trusted_networks)
|
||||
|
||||
if limiter.is_blocked(ip):
|
||||
return JSONResponse(
|
||||
status_code=429,
|
||||
content={
|
||||
"detail": "Too many failed login attempts. Please try again later.",
|
||||
"code": "login_rate_limited",
|
||||
},
|
||||
headers={"Retry-After": str(limiter.cooldown_seconds)},
|
||||
)
|
||||
|
||||
if not auth.verify_credentials(body.username, body.password):
|
||||
limiter.record_failure(ip)
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail={"detail": "Invalid credentials", "code": "invalid_credentials"},
|
||||
)
|
||||
|
||||
limiter.record_success(ip)
|
||||
token = auth.create_token()
|
||||
return TokenResponse(
|
||||
access_token=token,
|
||||
token_type="bearer",
|
||||
expires_in=auth._expiry_seconds,
|
||||
)
|
||||
```
|
||||
|
||||
### `api/tests/unit/test_rate_limiter.py` (representative cases)
|
||||
|
||||
```python
|
||||
import time
|
||||
import pytest
|
||||
from app.auth.rate_limiter import LoginRateLimiter
|
||||
|
||||
|
||||
def test_not_blocked_initially():
|
||||
limiter = LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=300)
|
||||
assert limiter.is_blocked("1.2.3.4") is False
|
||||
|
||||
|
||||
def test_blocked_after_threshold():
|
||||
limiter = LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=300)
|
||||
for _ in range(3):
|
||||
limiter.record_failure("1.2.3.4")
|
||||
assert limiter.is_blocked("1.2.3.4") is True
|
||||
|
||||
|
||||
def test_success_clears_failures():
|
||||
limiter = LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=300)
|
||||
limiter.record_failure("1.2.3.4")
|
||||
limiter.record_failure("1.2.3.4")
|
||||
limiter.record_success("1.2.3.4")
|
||||
assert limiter.is_blocked("1.2.3.4") is False
|
||||
|
||||
|
||||
def test_ips_are_isolated():
|
||||
limiter = LoginRateLimiter(max_failures=2, window_seconds=60, cooldown_seconds=300)
|
||||
limiter.record_failure("1.1.1.1")
|
||||
limiter.record_failure("1.1.1.1")
|
||||
assert limiter.is_blocked("2.2.2.2") is False
|
||||
```
|
||||
|
||||
### `api/tests/integration/test_login_rate_limit.py` (representative cases)
|
||||
|
||||
```python
|
||||
import pytest
|
||||
from httpx import AsyncClient
|
||||
|
||||
# Uses the 'client' fixture (NoOpAuthProvider) from conftest — sufficient for this
|
||||
# endpoint since we're testing the rate-limit layer, not auth correctness.
|
||||
# The login endpoint instantiates its own limiter via app.state, so we need
|
||||
# the full ASGI app.
|
||||
|
||||
BAD_CREDS = {"username": "attacker", "password": "wrong"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_repeated_failures_trigger_429(client: AsyncClient):
|
||||
# Use a custom limiter with low threshold to avoid slow tests
|
||||
# (the app.state.login_rate_limiter is set in lifespan; override for test)
|
||||
from app.auth.rate_limiter import LoginRateLimiter
|
||||
from app.main import app
|
||||
original = app.state.login_rate_limiter
|
||||
app.state.login_rate_limiter = LoginRateLimiter(
|
||||
max_failures=3, window_seconds=60, cooldown_seconds=30
|
||||
)
|
||||
try:
|
||||
for _ in range(3):
|
||||
await client.post("/api/v1/auth/token", json=BAD_CREDS)
|
||||
resp = await client.post("/api/v1/auth/token", json=BAD_CREDS)
|
||||
assert resp.status_code == 429
|
||||
assert resp.json()["code"] == "login_rate_limited"
|
||||
assert "Retry-After" in resp.headers
|
||||
finally:
|
||||
app.state.login_rate_limiter = original
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Phases
|
||||
|
||||
### Phase 1 (MVP — P1): Blocking after repeated failures
|
||||
|
||||
1. Add `login_max_failures`, `login_window_seconds`, `login_cooldown_seconds`, `login_trusted_proxy_ips` to `api/app/config.py`
|
||||
2. Create `api/app/auth/rate_limiter.py` with `LoginRateLimiter` and `get_client_ip()`
|
||||
3. Initialize rate limiter and parse trusted networks in `api/app/main.py` lifespan; attach both to `app.state`
|
||||
4. Update `api/app/routers/auth.py` to resolve client IP via `get_client_ip()`, then check + record outcomes
|
||||
5. Unit tests: `api/tests/unit/test_rate_limiter.py`
|
||||
6. Integration tests: `api/tests/integration/test_login_rate_limit.py`
|
||||
|
||||
### Phase 2 (US2 — observability): Logging and response hints
|
||||
|
||||
Delivered as part of Phase 1 (the `logger.warning(...)` call and `Retry-After` header
|
||||
are embedded in the same implementation). No separate phase needed.
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables to Add to `.env.example`
|
||||
|
||||
```dotenv
|
||||
# Login brute-force protection
|
||||
LOGIN_MAX_FAILURES=5
|
||||
LOGIN_WINDOW_SECONDS=300
|
||||
LOGIN_COOLDOWN_SECONDS=900
|
||||
# Comma-separated IPs/CIDRs of trusted upstream proxies (e.g. nginx ingress pod CIDR).
|
||||
# Leave empty when not behind a reverse proxy.
|
||||
LOGIN_TRUSTED_PROXY_IPS=
|
||||
```
|
||||
|
||||
These are optional (have defaults) so existing `.env` files without them continue working.
|
||||
112
specs/009-login-rate-limiting/quickstart.md
Normal file
112
specs/009-login-rate-limiting/quickstart.md
Normal file
@@ -0,0 +1,112 @@
|
||||
# Quickstart: Login Brute-Force Protection
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- API running (via `docker compose up` or locally with `.env` set)
|
||||
- `curl` available
|
||||
|
||||
---
|
||||
|
||||
## Scenario 1: Trigger the rate limiter
|
||||
|
||||
Send 6 consecutive failed login attempts (default threshold is 5):
|
||||
|
||||
```bash
|
||||
for i in $(seq 1 6); do
|
||||
echo "Attempt $i:"
|
||||
curl -s -o /dev/null -w "%{http_code}\n" \
|
||||
-X POST http://localhost:8000/api/v1/auth/token \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"username": "wrong", "password": "wrong"}'
|
||||
done
|
||||
```
|
||||
|
||||
Expected output:
|
||||
```
|
||||
Attempt 1: 401
|
||||
Attempt 2: 401
|
||||
Attempt 3: 401
|
||||
Attempt 4: 401
|
||||
Attempt 5: 401
|
||||
Attempt 6: 429
|
||||
```
|
||||
|
||||
The 6th attempt returns 429. Inspect the headers:
|
||||
|
||||
```bash
|
||||
curl -i -X POST http://localhost:8000/api/v1/auth/token \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"username": "wrong", "password": "wrong"}'
|
||||
```
|
||||
|
||||
Expected headers include:
|
||||
```
|
||||
HTTP/1.1 429 Too Many Requests
|
||||
Retry-After: 900
|
||||
```
|
||||
|
||||
Expected body:
|
||||
```json
|
||||
{"detail": "Too many failed login attempts. Please try again later.", "code": "login_rate_limited"}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Scenario 2: Successful login resets the counter
|
||||
|
||||
Make some failed attempts, then log in with valid credentials:
|
||||
|
||||
```bash
|
||||
# Fail twice
|
||||
for i in 1 2; do
|
||||
curl -s -o /dev/null -w "fail $i: %{http_code}\n" \
|
||||
-X POST http://localhost:8000/api/v1/auth/token \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"username": "wrong", "password": "wrong"}'
|
||||
done
|
||||
|
||||
# Succeed — resets counter
|
||||
curl -s -o /dev/null -w "success: %{http_code}\n" \
|
||||
-X POST http://localhost:8000/api/v1/auth/token \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"username": "'"$OWNER_USERNAME"'", "password": "'"$OWNER_PASSWORD"'"}'
|
||||
|
||||
# Now fail 5 more times — counter was reset, so no 429 yet
|
||||
for i in $(seq 1 5); do
|
||||
curl -s -o /dev/null -w "fail after reset $i: %{http_code}\n" \
|
||||
-X POST http://localhost:8000/api/v1/auth/token \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"username": "wrong", "password": "wrong"}'
|
||||
done
|
||||
```
|
||||
|
||||
Expected: all "fail after reset" lines return 401 (not 429), confirming the counter was reset.
|
||||
|
||||
---
|
||||
|
||||
## Scenario 3: Observe log output
|
||||
|
||||
While triggering the rate limiter (Scenario 1), watch API logs:
|
||||
|
||||
```bash
|
||||
docker compose logs -f api
|
||||
```
|
||||
|
||||
After the threshold is crossed you should see a line like:
|
||||
|
||||
```
|
||||
WARNING app.auth.rate_limiter:rate_limiter.py:NN Login blocked for 172.18.0.1 after 5 failures
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Environment variable overrides
|
||||
|
||||
To test with a lower threshold without code changes:
|
||||
|
||||
```bash
|
||||
LOGIN_MAX_FAILURES=2 LOGIN_WINDOW_SECONDS=60 LOGIN_COOLDOWN_SECONDS=30 \
|
||||
uvicorn app.main:app --reload
|
||||
```
|
||||
|
||||
Then only 2 failures trigger the lockout, and it clears after 30 seconds.
|
||||
67
specs/009-login-rate-limiting/research.md
Normal file
67
specs/009-login-rate-limiting/research.md
Normal file
@@ -0,0 +1,67 @@
|
||||
# Research: Login Brute-Force Protection
|
||||
|
||||
## Decision 1: Library vs. custom implementation
|
||||
|
||||
**Decision**: Custom in-memory failure tracker (no new library dependency)
|
||||
|
||||
**Rationale**: The requirement is to count *failed* login attempts specifically and reset on success — not to rate-limit all requests regardless of outcome. Popular libraries like `slowapi` count all requests to a route, which would break FR-004 (reset on success) without significant workarounds. A purpose-built 60-line class is simpler, more auditable, and has no dependency footprint.
|
||||
|
||||
**Alternatives considered**:
|
||||
- `slowapi` (built on `limits`): Counts all requests, not failures. Requires patching the exception handler to decrement on success — fragile and non-obvious.
|
||||
- `slowapi` with a custom key function: Could be done, but the library's storage model doesn't expose a "reset this key" API in a clean way.
|
||||
- Redis-backed counter: Overkill for a single-user personal app with one instance. No new infrastructure justified.
|
||||
|
||||
---
|
||||
|
||||
## Decision 2: Fixed window vs. sliding window
|
||||
|
||||
**Decision**: Fixed window with per-source reset on successful login
|
||||
|
||||
**Rationale**: Fixed window is simpler to implement correctly and sufficient for this use case. The main attack — rapid sequential guessing — is fully addressed. The known "burst at window boundary" weakness is irrelevant here because: (a) the cooldown period is separate from the counting window, and (b) a successful login resets the counter entirely.
|
||||
|
||||
**Alternatives considered**:
|
||||
- Sliding window: More accurate, but adds complexity (requires storing timestamps of each request). The marginal security benefit doesn't justify the implementation cost for a personal single-user app.
|
||||
|
||||
---
|
||||
|
||||
## Decision 3: In-memory backing store
|
||||
|
||||
**Decision**: Python `dict` keyed by source IP, protected by a threading `Lock`
|
||||
|
||||
**Rationale**: The application runs as a single process. In-memory storage means counters reset on restart — this is acceptable and matches the "fail open" assumption in the spec. No new infrastructure (Redis, database table) is required.
|
||||
|
||||
**Alternatives considered**:
|
||||
- Database-backed counters: Persistent across restarts, but adds a DB round-trip to every login request (including successful ones). Not justified.
|
||||
- Redis: Distributed-safe and persistent, but requires a new service dependency. Out of scope for a personal single-instance app.
|
||||
|
||||
---
|
||||
|
||||
## Decision 4: Source identifier
|
||||
|
||||
**Decision**: `request.client.host` (the TCP peer address)
|
||||
|
||||
**Rationale**: The spec explicitly states not to trust `X-Forwarded-For` headers unless the app is known to be behind a trusted proxy. `request.client.host` in Starlette/FastAPI is the actual TCP peer IP — it cannot be spoofed by an attacker sending arbitrary headers.
|
||||
|
||||
**Alternatives considered**:
|
||||
- `X-Forwarded-For` first value: Spoofable if the app is not behind a trusted proxy (attacker can set arbitrary header values).
|
||||
- `X-Real-IP`: Same spoofing concern.
|
||||
|
||||
---
|
||||
|
||||
## Decision 5: 429 response and Retry-After header
|
||||
|
||||
**Decision**: Return HTTP 429 with `{"detail": "...", "code": "login_rate_limited"}` and a `Retry-After` header set to the configured cooldown duration in seconds
|
||||
|
||||
**Rationale**: HTTP 429 is the standard "Too Many Requests" status. The `Retry-After` header is explicitly mentioned in the spec (US2 acceptance scenario) and is required by RFC 6585 for rate-limit responses. Setting it to the *configured* cooldown (not the exact remaining time) satisfies FR-005: it doesn't reveal precise expiry, just the maximum wait. The response body follows §3.3 of the constitution (error envelope with `detail` and `code`).
|
||||
|
||||
---
|
||||
|
||||
## Decision 6: Default threshold values
|
||||
|
||||
**Decision**: `LOGIN_MAX_FAILURES=5`, `LOGIN_WINDOW_SECONDS=300` (5 min), `LOGIN_COOLDOWN_SECONDS=900` (15 min)
|
||||
|
||||
**Rationale**: Industry standard for web apps. 5 attempts is enough for legitimate typos but makes brute-force infeasible at human scale. A 5-minute counting window matches typical "I fat-fingered my password" retry patterns. A 15-minute cooldown is a meaningful deterrent without locking out a legitimate owner indefinitely.
|
||||
|
||||
**Alternatives considered**:
|
||||
- 3 failures / 60 s window / 300 s cooldown: More aggressive, but too likely to lock out the legitimate owner on a bad day.
|
||||
- 10 failures: Too permissive for a brute-force defense.
|
||||
84
specs/009-login-rate-limiting/spec.md
Normal file
84
specs/009-login-rate-limiting/spec.md
Normal file
@@ -0,0 +1,84 @@
|
||||
# Feature Specification: Login Brute-Force Protection
|
||||
|
||||
**Feature Branch**: `009-login-rate-limiting`
|
||||
**Created**: 2026-05-06
|
||||
**Status**: Draft
|
||||
**Input**: User description: "Login API endpoints should be rate limited or otherwise protected against brute force attacks"
|
||||
|
||||
## User Scenarios & Testing *(mandatory)*
|
||||
|
||||
### User Story 1 - Repeated failed logins are blocked (Priority: P1)
|
||||
|
||||
An attacker (or misconfigured client) sending many rapid login attempts with the wrong password is slowed or blocked before they can exhaustively guess credentials. After a threshold number of consecutive failures from the same source, the system refuses further attempts for a cooldown period and returns a clear, non-leaking error.
|
||||
|
||||
**Why this priority**: Directly prevents credential-stuffing and brute-force attacks against the sole privileged account. Without this, the owner account is exposed to automated password guessing with no friction.
|
||||
|
||||
**Independent Test**: Send more than the allowed number of failed login requests in quick succession and confirm that subsequent attempts are rejected with a rate-limit or lockout response — without knowing or changing the real password.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** an attacker sends N+1 failed login attempts within the configured window, **When** the (N+1)th request arrives, **Then** the system returns an error response indicating the request is blocked (not the normal "invalid credentials" error) and does not process the login attempt.
|
||||
2. **Given** a legitimate user has been temporarily blocked after too many failures, **When** the cooldown period elapses and they retry with the correct password, **Then** they are logged in successfully.
|
||||
3. **Given** a legitimate user makes a few failed attempts and then waits beyond the cooldown window, **When** they retry within the next window, **Then** their failure counter resets and they are not blocked.
|
||||
|
||||
---
|
||||
|
||||
### User Story 2 - Operators can observe and reason about blocking activity (Priority: P2)
|
||||
|
||||
When the protection triggers, the system produces enough observable signal (log entries, response metadata) that an operator can confirm the feature is working, diagnose false positives, and tune thresholds — without exposing sensitive details to the client.
|
||||
|
||||
**Why this priority**: Invisible security controls are unmanageable. Operators need to know the system is doing what it claims, and blocked legitimate users need a clear (but not exploitable) explanation.
|
||||
|
||||
**Independent Test**: Trigger the rate limiter and confirm that: (a) the response body or headers communicate that the request was blocked and when the client may retry; (b) the server logs an entry identifying the blocked source and the reason.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** a source is blocked, **When** they receive the rejection response, **Then** the response indicates they should wait before retrying (e.g., a `Retry-After` hint) without disclosing the exact threshold values.
|
||||
2. **Given** the rate limiter fires, **When** an operator inspects server logs, **Then** there is a log entry at WARNING level or above recording the blocked source and timestamp.
|
||||
|
||||
---
|
||||
|
||||
### Edge Cases
|
||||
|
||||
- What happens when a distributed attacker rotates IPs to avoid per-IP limits?
|
||||
- How does the system behave if the backing store for rate-limit counters is temporarily unavailable — does it fail open (allow all) or fail closed (block all)?
|
||||
- Are IPv6 addresses and IPv4-mapped-IPv6 addresses treated consistently?
|
||||
- Does a successful login reset the failure counter for that source?
|
||||
- What happens if many legitimate users share a NAT/proxy IP (e.g., corporate network)?
|
||||
- What if `TRUSTED_PROXY_IPS` is configured to include an IP that an external attacker controls? (An attacker could then spoof `X-Forwarded-For` and rotate fake source IPs to bypass the rate limiter — operators must only list genuinely trusted upstream infrastructure.)
|
||||
|
||||
## Requirements *(mandatory)*
|
||||
|
||||
### Functional Requirements
|
||||
|
||||
- **FR-001**: The system MUST enforce a maximum number of failed login attempts per source identifier (the resolved client IP address) within a rolling time window before blocking further attempts.
|
||||
- **FR-002**: Once a source exceeds the failure threshold, the system MUST reject subsequent login requests for a configurable cooldown period, returning a distinct response (not the normal invalid-credentials response).
|
||||
- **FR-003**: After the cooldown period expires, the system MUST permit the source to attempt login again, resetting its failure count.
|
||||
- **FR-004**: A successful login MUST reset the failure counter for that source, preventing accumulation of old failures from blocking future legitimate access.
|
||||
- **FR-005**: The rejection response MUST NOT reveal the specific threshold values or remaining lockout duration in a way that aids an attacker in timing their attempts, but MUST provide enough information (e.g., "try again later") for a legitimate user to understand the situation.
|
||||
- **FR-006**: The system MUST log a structured warning event whenever a source is blocked, including the source identifier and timestamp.
|
||||
- **FR-007**: Rate-limit thresholds (maximum attempts, window duration, cooldown duration) MUST be configurable without code changes.
|
||||
- **FR-008**: The system MUST support a configurable list of trusted upstream proxy IP addresses and CIDR ranges. When the TCP peer address matches a trusted proxy, the resolved client IP MUST be extracted from the `X-Forwarded-For` request header (first entry) or, if absent, `X-Real-IP`. When no trusted proxies are configured, the TCP peer address MUST be used directly and forwarded-IP headers MUST be ignored.
|
||||
|
||||
### Key Entities
|
||||
|
||||
- **Rate-limit record**: Tracks the number of consecutive failures and the window start time for a given source identifier; expires automatically after the cooldown period.
|
||||
- **Source identifier**: The resolved client IP address used to key rate-limit records. When `LOGIN_TRUSTED_PROXY_IPS` is empty (default), this is the TCP peer address. When one or more proxy IPs/CIDRs are configured and the TCP peer matches, the first `X-Forwarded-For` entry (or `X-Real-IP`) is used instead.
|
||||
|
||||
## Success Criteria *(mandatory)*
|
||||
|
||||
### Measurable Outcomes
|
||||
|
||||
- **SC-001**: An automated script sending 100 consecutive failed login requests completes with at least 90 of those requests rejected after the threshold is crossed — verified in a controlled test environment.
|
||||
- **SC-002**: A legitimate user who has been temporarily blocked can successfully log in within 5 minutes of the cooldown period expiring without any manual intervention.
|
||||
- **SC-003**: Zero information about threshold values or exact lockout expiry is present in blocked response bodies or headers.
|
||||
- **SC-004**: Every blocking event produces a corresponding log entry; 100% of triggered blocking events are observable in logs during testing.
|
||||
|
||||
## Assumptions
|
||||
|
||||
- The application has a single login endpoint used by all clients (the owner login introduced in feature 004).
|
||||
- Source identification uses the resolved client IP address. By default (when `LOGIN_TRUSTED_PROXY_IPS` is empty) this is the TCP peer address. When one or more proxy IPs/CIDRs are configured, the first entry of `X-Forwarded-For` (or `X-Real-IP`) is used instead — but only when the TCP peer is in the trusted list, preventing header spoofing by external clients.
|
||||
- If the rate-limit backing store is unavailable, the system fails open (allows the attempt through) rather than blocking all logins — this preserves the owner's access, which is critical for a single-user admin application.
|
||||
- No CAPTCHA or multi-factor step is in scope; protection is purely count/time-based.
|
||||
- The feature targets the login endpoint only; other endpoints are out of scope.
|
||||
- The single-user nature of the app means IP-based identification is sufficient — there is no need for per-username lockout, and using IP (rather than username) avoids contributing to username enumeration risk.
|
||||
120
specs/009-login-rate-limiting/tasks.md
Normal file
120
specs/009-login-rate-limiting/tasks.md
Normal file
@@ -0,0 +1,120 @@
|
||||
# Tasks: Login Brute-Force Protection
|
||||
|
||||
**Input**: Design documents from `specs/009-login-rate-limiting/`
|
||||
**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, data-model.md ✅, contracts/auth.md ✅, quickstart.md ✅
|
||||
|
||||
**Tests**: TDD is non-negotiable (§5.1). Every test task appears before the implementation task it covers. For each red step, run the test and confirm it fails before proceeding to the implementation.
|
||||
|
||||
**Organization**: Phase 1 adds env vars; Phase 2 adds config fields (shared by both stories); Phase 3 implements the core blocking behaviour (US1 MVP); Phase 4 adds observability-specific test coverage (US2); Phase 5 is polish.
|
||||
|
||||
## Format: `[ID] [P?] [Story] Description`
|
||||
|
||||
- **[P]**: Can run in parallel with other [P] tasks in the same phase
|
||||
- **[Story]**: Which user story this task belongs to
|
||||
- Exact file paths included in every task description
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Setup
|
||||
|
||||
- [X] T001 Add a `# Login brute-force protection` comment block with `LOGIN_MAX_FAILURES=5`, `LOGIN_WINDOW_SECONDS=300`, `LOGIN_COOLDOWN_SECONDS=900`, and `LOGIN_TRUSTED_PROXY_IPS=` (empty by default, with an inline comment explaining it accepts comma-separated IPs/CIDRs) to both `.env.example` and `.env.test.example` at the repo root
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Foundational
|
||||
|
||||
**Purpose**: Add the three new settings fields — required before any story implementation.
|
||||
|
||||
- [X] T002 Add `login_max_failures: int = 5`, `login_window_seconds: int = 300`, `login_cooldown_seconds: int = 900`, `login_trusted_proxy_ips: str = ""` to the `Settings` class in `api/app/config.py` (append after `owner_password`)
|
||||
|
||||
**Checkpoint**: `api/app/config.py` accepts all three new env vars with defaults.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: User Story 1 — Repeated failed logins are blocked (Priority: P1) 🎯 MVP
|
||||
|
||||
**Goal**: After `LOGIN_MAX_FAILURES` consecutive failed login attempts from the same source IP within `LOGIN_WINDOW_SECONDS`, `POST /api/v1/auth/token` returns HTTP 429 for `LOGIN_COOLDOWN_SECONDS`. A successful login resets the counter.
|
||||
|
||||
**Independent Test**: `cd api && python -m pytest tests/unit/test_rate_limiter.py tests/integration/test_login_rate_limit.py::test_repeated_failures_trigger_429 tests/integration/test_login_rate_limit.py::test_success_resets_counter tests/integration/test_login_rate_limit.py::test_429_has_retry_after_header tests/integration/test_login_rate_limit.py::test_xff_header_ignored_when_no_trusted_networks -v` — all pass.
|
||||
|
||||
### Tests for User Story 1 (TDD red — write first, confirm failure before T005)
|
||||
|
||||
- [X] T003 [P] [US1] Create `api/tests/unit/test_rate_limiter.py` with ten failing unit tests — import `LoginRateLimiter` and `get_client_ip` from `app.auth.rate_limiter`; for `LoginRateLimiter` (instantiate with `max_failures=3, window_seconds=60, cooldown_seconds=300`): `test_not_blocked_initially`, `test_blocked_after_threshold`, `test_success_clears_failures`, `test_ips_are_isolated`, `test_window_resets_after_expiry`, `test_log_warning_on_lockout` (caplog at WARNING level: call `record_failure()` until threshold, assert `"Login blocked" in caplog.text` and IP in log output); for `get_client_ip` (construct a mock using `from unittest.mock import MagicMock` and `from starlette.requests import Request`: `req = MagicMock(spec=Request); req.client.host = "10.0.0.1"; req.headers = {"X-Forwarded-For": "203.0.113.5"}`): `test_get_client_ip_no_trusted_networks_returns_peer` (empty `trusted_networks=[]` → returns `req.client.host`), `test_get_client_ip_trusted_peer_uses_xff` (peer `"10.0.0.1"` in trusted CIDR `"10.0.0.0/8"` → returns `"203.0.113.5"`), `test_get_client_ip_untrusted_peer_ignores_xff` (peer `"8.8.8.8"` not in trusted CIDR `"10.0.0.0/8"` → returns `"8.8.8.8"` despite XFF), `test_get_client_ip_trusted_peer_falls_back_to_real_ip` (peer trusted, no XFF header, `X-Real-IP: "203.0.113.9"` → returns `"203.0.113.9"`); run `python -m pytest tests/unit/test_rate_limiter.py -v` and confirm `ImportError` or `ModuleNotFoundError` (red)
|
||||
- [X] T004 [P] [US1] Create `api/tests/integration/test_login_rate_limit.py` with four failing integration tests; each must override both `app.state.login_rate_limiter` (fresh `LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=30)`) and `app.state.login_trusted_networks` (set to `[]` for all four tests — the `ASGITransport` peer is `"testclient"`, not a valid IP, so trusted-network matching can't be exercised here; proxy extraction is fully covered by T003 unit tests) via try/finally: (1) `test_repeated_failures_trigger_429` — POST three bad-credential requests then assert fourth returns 429 with `resp.json()["code"] == "login_rate_limited"`; (2) `test_success_resets_counter` — two failures → one valid login using `{"username": os.environ["OWNER_USERNAME"], "password": os.environ["OWNER_PASSWORD"]}` (matching conftest.py defaults: `testowner`/`testpassword`) → three more failures → assert all three return 401, not 429; (3) `test_429_has_retry_after_header` — trigger lockout (three failures), then assert `"Retry-After" in resp.headers` and `int(resp.headers["Retry-After"]) > 0`; (4) `test_xff_header_ignored_when_no_trusted_networks` — send three bad-cred requests with `headers={"X-Forwarded-For": "1.2.3.4"}` then a fourth with `headers={"X-Forwarded-For": "9.9.9.9"}` — assert the fourth returns 429 (not 401), proving the limiter tracked the real peer `"testclient"` for all requests and XFF was ignored; run `python -m pytest tests/integration/test_login_rate_limit.py -v` and confirm failure (red)
|
||||
|
||||
### Implementation for User Story 1
|
||||
|
||||
- [X] T005 [US1] Create `api/app/auth/rate_limiter.py` with two exports: (1) `get_client_ip(request: Request, trusted_networks: list[IPv4Network | IPv6Network]) -> str` — imports `ipaddress`, `from ipaddress import IPv4Network, IPv6Network`, `from starlette.requests import Request`; extracts `peer = request.client.host if request.client else "unknown"`; if `trusted_networks` is non-empty and peer is parseable as an IP address and falls within any trusted network, returns first `X-Forwarded-For` entry (strip whitespace) or `X-Real-IP` value, otherwise returns `peer`; wraps `ipaddress.ip_address(peer)` in `try/except ValueError` and falls back to `peer` on error; (2) `LoginRateLimiter` class: `__init__(self, max_failures: int = 5, window_seconds: int = 300, cooldown_seconds: int = 900)` storing params as `_max`, `_window`, `_cooldown`; `_store: dict[str, _Record]` and `_lock: threading.Lock`; `@dataclass _Record` with `failures: int = 0`, `window_start: float = field(default_factory=time.time)`, `blocked_until: float = 0.0`; `is_blocked(ip: str) -> bool`, `record_failure(ip: str) -> None` (logs WARNING on lockout), `record_success(ip: str) -> None`, `cooldown_seconds` property; stdlib imports: `import ipaddress, logging, time`, `from dataclasses import dataclass, field`, `from threading import Lock`
|
||||
- [X] T006 [US1] Update `api/app/main.py` lifespan: add `import ipaddress` at top; import `LoginRateLimiter` from `app.auth.rate_limiter`; inside `lifespan` before `engine = get_engine()`, consolidate to `settings = get_settings()` (remove the existing bare `get_settings()` call), then set `application.state.login_rate_limiter = LoginRateLimiter(max_failures=settings.login_max_failures, window_seconds=settings.login_window_seconds, cooldown_seconds=settings.login_cooldown_seconds)`; then parse `settings.login_trusted_proxy_ips` — split on `","`, strip each part, skip empty strings, call `ipaddress.ip_network(part, strict=False)` inside a `try/except ValueError` (skip invalid entries silently), collect results into `trusted_networks: list`; set `application.state.login_trusted_networks = trusted_networks`
|
||||
- [X] T007 [US1] Update `api/app/routers/auth.py` login endpoint: add `Request` to FastAPI imports and add `from fastapi.responses import JSONResponse`; add `from app.auth.rate_limiter import LoginRateLimiter, get_client_ip`; add `request: Request` as first parameter to `login()`; extract `limiter: LoginRateLimiter = request.app.state.login_rate_limiter` and `ip: str = get_client_ip(request, request.app.state.login_trusted_networks)`; add guard block — if `limiter.is_blocked(ip)`: return `JSONResponse(status_code=429, content={"detail": "Too many failed login attempts. Please try again later.", "code": "login_rate_limited"}, headers={"Retry-After": str(limiter.cooldown_seconds)})`; after `verify_credentials` returns False: call `limiter.record_failure(ip)` before the existing `HTTPException`; after `auth.create_token()`: call `limiter.record_success(ip)` before returning `TokenResponse`
|
||||
- [X] T008 [US1] Verify TDD green: run `cd api && python -m pytest tests/unit/test_rate_limiter.py -v` — all 10 pass; run `make test-integration` — all tests pass including `test_repeated_failures_trigger_429`, `test_success_resets_counter`, `test_429_has_retry_after_header`, and `test_xff_header_ignored_when_no_trusted_networks`
|
||||
|
||||
**Checkpoint**: Brute-force blocking is live. Automated repeated failures are stopped after threshold; the owner can still log in after cooldown; unit and integration tests pass.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: User Story 2 — Operators can observe blocking activity (Priority: P2)
|
||||
|
||||
**Goal**: The 429 response includes a `Retry-After` header with a positive integer; the response body `code` is `"login_rate_limited"` and contains no threshold numeric values; server logs a WARNING when blocking triggers.
|
||||
|
||||
**Independent Test**: Trigger the rate limiter (already works from Phase 3) and assert `Retry-After` header is present in the response and `code` field is `"login_rate_limited"`.
|
||||
|
||||
### Tests for User Story 2 (TDD red — extend existing file)
|
||||
|
||||
- [X] T009 [US2] Add one test to `api/tests/integration/test_login_rate_limit.py` targeting observability properties not yet covered: `test_429_body_shape` — override `app.state.login_rate_limiter` with a fresh `LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=30)` via try/finally (same isolation pattern as T004), trigger lockout (three failures), then assert `resp.json() == {"detail": "Too many failed login attempts. Please try again later.", "code": "login_rate_limited"}` (exact match — confirms no threshold values leak and shape is correct); confirm this test is green immediately against the US1 implementation (T007 already returns this exact body)
|
||||
|
||||
**Checkpoint**: US2 observability properties are explicitly exercised by integration tests; a future regression in the Retry-After header or code field will be caught.
|
||||
|
||||
---
|
||||
|
||||
## Phase 5: Polish & Cross-Cutting Concerns
|
||||
|
||||
- [X] T010 Run `cd api && ruff check app/auth/rate_limiter.py app/routers/auth.py app/config.py app/main.py tests/unit/test_rate_limiter.py tests/integration/test_login_rate_limit.py` — fix any violations
|
||||
|
||||
---
|
||||
|
||||
## Dependencies & Execution Order
|
||||
|
||||
### Phase Dependencies
|
||||
|
||||
- **Phase 1 (Setup)**: No external dependencies — can start immediately
|
||||
- **Phase 2 (Foundational)**: No external dependencies — can start immediately (parallel with Phase 1)
|
||||
- **Phase 3 (US1)**: Depends on Phase 2 (T002 must exist before T006 can use `settings.login_max_failures`)
|
||||
- **Phase 4 (US2)**: Depends on Phase 3 (tests verify behaviour implemented in T007)
|
||||
- **Phase 5 (Polish)**: Depends on all prior phases
|
||||
|
||||
### Within Phase 3
|
||||
|
||||
- T003 ∥ T004 (different files, no dependency — write tests in parallel)
|
||||
- T005 after T003, T004 (implement after tests confirm they fail)
|
||||
- T006 ∥ T007 after T005 (both import from `rate_limiter.py`; write to different files — `main.py` and `auth.py`; T006 sets `app.state.login_trusted_networks` which T007's router reads)
|
||||
- T008 after T005, T006, T007 (verify all pass)
|
||||
|
||||
### Execution Order Summary
|
||||
|
||||
```
|
||||
Step 1: T001 ∥ T002 (setup + foundational — parallel, different files)
|
||||
Step 2: T003 ∥ T004 (write failing tests — parallel)
|
||||
Step 3: T005 (implement LoginRateLimiter — after red tests confirmed)
|
||||
Step 4: T006 ∥ T007 (wire limiter into app — parallel, different files)
|
||||
Step 5: T008 (verify green)
|
||||
Step 6: T009 (US2 observability tests — verify green immediately)
|
||||
Step 7: T010 (ruff clean)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Strategy
|
||||
|
||||
### MVP (US1 — the blocker)
|
||||
|
||||
1. Complete T001–T002 (config setup)
|
||||
2. Complete T003–T008 (core blocking)
|
||||
3. **Validate**: Run `make test-integration` — all 88 existing tests still pass; 2 new rate-limit tests pass
|
||||
4. US2 adds verification coverage for already-implemented observability features
|
||||
|
||||
### Incremental Delivery
|
||||
|
||||
- After Phase 3: Brute-force attacks on the login endpoint are blocked — core security net is in place
|
||||
- After Phase 4: Observability properties are explicitly tested — regressions in headers/logs will be caught
|
||||
- After Phase 5: Lint clean, ready for merge
|
||||
34
specs/010-api-prod-dockerfile/checklists/requirements.md
Normal file
34
specs/010-api-prod-dockerfile/checklists/requirements.md
Normal file
@@ -0,0 +1,34 @@
|
||||
# Specification Quality Checklist: Production-Grade API Container Image
|
||||
|
||||
**Purpose**: Validate specification completeness and quality before proceeding to planning
|
||||
**Created**: 2026-05-07
|
||||
**Feature**: [spec.md](../spec.md)
|
||||
|
||||
## Content Quality
|
||||
|
||||
- [X] No implementation details (languages, frameworks, APIs)
|
||||
- [X] Focused on user value and business needs
|
||||
- [X] Written for non-technical stakeholders
|
||||
- [X] All mandatory sections completed
|
||||
|
||||
## Requirement Completeness
|
||||
|
||||
- [X] No [NEEDS CLARIFICATION] markers remain
|
||||
- [X] Requirements are testable and unambiguous
|
||||
- [X] Success criteria are measurable
|
||||
- [X] Success criteria are technology-agnostic (no implementation details)
|
||||
- [X] All acceptance scenarios are defined
|
||||
- [X] Edge cases are identified
|
||||
- [X] Scope is clearly bounded
|
||||
- [X] Dependencies and assumptions identified
|
||||
|
||||
## Feature Readiness
|
||||
|
||||
- [X] All functional requirements have clear acceptance criteria
|
||||
- [X] User scenarios cover primary flows
|
||||
- [X] Feature meets measurable outcomes defined in Success Criteria
|
||||
- [X] No implementation details leak into specification
|
||||
|
||||
## Notes
|
||||
|
||||
- All items pass. Ready for `/speckit-plan`.
|
||||
122
specs/010-api-prod-dockerfile/contracts/container.md
Normal file
122
specs/010-api-prod-dockerfile/contracts/container.md
Normal file
@@ -0,0 +1,122 @@
|
||||
# Contract: Production API Container Image
|
||||
|
||||
This document defines the observable interface of the `reactbin-api-prod` container image. Any orchestration layer (Kubernetes manifests, Docker Compose, CI pipeline) MUST be written against this contract.
|
||||
|
||||
---
|
||||
|
||||
## Network Interface
|
||||
|
||||
| Property | Value |
|
||||
|----------|-------|
|
||||
| Protocol | HTTP/1.1 |
|
||||
| Port | 8000 (TCP) |
|
||||
| Bind address | `0.0.0.0` (all interfaces inside the container) |
|
||||
|
||||
---
|
||||
|
||||
## Health Check
|
||||
|
||||
The container exposes a health check at the existing API health endpoint:
|
||||
|
||||
```
|
||||
GET /api/v1/health
|
||||
```
|
||||
|
||||
**Success response** (`200 OK`):
|
||||
```json
|
||||
{ "status": "ok" }
|
||||
```
|
||||
|
||||
The container declares a built-in `HEALTHCHECK` with the following defaults:
|
||||
|
||||
| Parameter | Value |
|
||||
|-----------|-------|
|
||||
| Interval | 30s |
|
||||
| Timeout | 5s |
|
||||
| Start period | 10s |
|
||||
| Retries | 3 |
|
||||
|
||||
Orchestrators that define their own probes (e.g. Kubernetes `livenessProbe` / `readinessProbe`) SHOULD use this same endpoint.
|
||||
|
||||
---
|
||||
|
||||
## Required Environment Variables
|
||||
|
||||
All configuration is supplied at runtime via environment variables. The image contains no defaults for secret or environment-specific values.
|
||||
|
||||
| Variable | Description | Example |
|
||||
|----------|-------------|---------|
|
||||
| `JWT_SECRET_KEY` | HS256 signing key for bearer tokens | `change-me-long-random-string` |
|
||||
| `OWNER_USERNAME` | Username of the single owner account | `owner` |
|
||||
| `OWNER_PASSWORD` | Password of the single owner account | `change-me` |
|
||||
| `DATABASE_URL` | PostgreSQL connection URL (asyncpg scheme) | `postgresql+asyncpg://user:pass@host:5432/db` |
|
||||
| `S3_ENDPOINT_URL` | S3-compatible object storage endpoint | `https://s3.amazonaws.com` |
|
||||
| `S3_BUCKET_NAME` | Storage bucket name | `reactbin-prod` |
|
||||
| `S3_ACCESS_KEY_ID` | Storage access key | `AKIAIOSFODNN7EXAMPLE` |
|
||||
| `S3_SECRET_ACCESS_KEY` | Storage secret key | `wJalrXUtnFEMI/K7MDENG` |
|
||||
| `S3_REGION` | Storage region | `us-east-1` |
|
||||
|
||||
**Optional environment variables** (safe defaults apply):
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `JWT_EXPIRY_SECONDS` | `86400` | Token lifetime in seconds |
|
||||
| `MAX_UPLOAD_BYTES` | `52428800` | Maximum upload file size (50 MB) |
|
||||
| `LOGIN_MAX_FAILURES` | `5` | Brute-force lock threshold |
|
||||
| `LOGIN_WINDOW_SECONDS` | `300` | Failure counting window |
|
||||
| `LOGIN_COOLDOWN_SECONDS` | `900` | Lock duration after threshold |
|
||||
| `LOGIN_TRUSTED_PROXY_IPS` | `` | Comma-separated trusted proxy CIDRs |
|
||||
| `API_BASE_URL` | _(not required at runtime)_ | Used only by client tooling |
|
||||
|
||||
**Startup failure behaviour**: If a required variable is absent, the application exits with a non-zero code before accepting any requests. The error is logged to stderr identifying the missing variable.
|
||||
|
||||
---
|
||||
|
||||
## Signal Handling
|
||||
|
||||
| Signal | Behaviour |
|
||||
|--------|-----------|
|
||||
| `SIGTERM` | Stop accepting new connections; drain in-flight requests; exit 0 within 30s |
|
||||
| `SIGKILL` | Immediate termination (OS-level; no graceful drain possible) |
|
||||
|
||||
Kubernetes should configure `terminationGracePeriodSeconds ≥ 30` to allow the full drain window.
|
||||
|
||||
---
|
||||
|
||||
## Process Identity
|
||||
|
||||
| Property | Value |
|
||||
|----------|-------|
|
||||
| User | `appuser` |
|
||||
| UID | `1001` |
|
||||
| GID | `1001` |
|
||||
| Root privileges | None |
|
||||
|
||||
The container MUST NOT be run with `--privileged` or as UID 0.
|
||||
|
||||
---
|
||||
|
||||
## Filesystem
|
||||
|
||||
- **Working directory**: `/app`
|
||||
- **Application source**: `/app/app/`
|
||||
- **Virtual environment**: `/app/.venv/`
|
||||
- **No writable state**: The container requires no persistent local storage. All state is in PostgreSQL and S3.
|
||||
- **Read-only root**: The container is compatible with `--read-only` (no writes to the filesystem at runtime).
|
||||
|
||||
---
|
||||
|
||||
## Logging
|
||||
|
||||
All log output is written to **stdout** (info/debug) and **stderr** (warnings/errors). No log files are written inside the container. The container runtime log driver captures all output without additional configuration.
|
||||
|
||||
---
|
||||
|
||||
## Image Tags
|
||||
|
||||
| Tag pattern | Meaning |
|
||||
|-------------|---------|
|
||||
| `reactbin-api-prod:latest` | Latest build from `master` |
|
||||
| `reactbin-api-prod:<git-sha>` | Immutable build for a specific commit |
|
||||
|
||||
Deployments SHOULD pin to a specific git SHA tag, not `latest`.
|
||||
242
specs/010-api-prod-dockerfile/plan.md
Normal file
242
specs/010-api-prod-dockerfile/plan.md
Normal file
@@ -0,0 +1,242 @@
|
||||
# Implementation Plan: Production-Grade API Container Image
|
||||
|
||||
**Branch**: `010-api-prod-dockerfile` | **Date**: 2026-05-07 | **Spec**: [spec.md](spec.md)
|
||||
**Input**: Feature specification from `specs/010-api-prod-dockerfile/spec.md`
|
||||
|
||||
## Summary
|
||||
|
||||
Produce a production-ready `api/Dockerfile.prod` using a two-stage build: a uv builder stage that installs lockfile-pinned, production-only dependencies into a virtual environment, and a lean `python:3.12-slim` runtime stage that contains only the venv, application source, and `curl` for health checks. The runtime process runs as a non-root user (UID 1001), handles SIGTERM gracefully via uvicorn's built-in drain, and logs exclusively to stdout/stderr. Behavioral verification is automated via a shell script (`api/tests/build/verify_production_image.sh`) written before the Dockerfile (§5.1 TDD).
|
||||
|
||||
---
|
||||
|
||||
## Technical Context
|
||||
|
||||
**Language/Version**: Python 3.12 (existing API), Docker multi-stage build
|
||||
**Build tool**: uv (lockfile: `api/uv.lock`, already committed)
|
||||
**Base images**: `ghcr.io/astral-sh/uv:python3.12-bookworm-slim` (builder), `python:3.12-slim` (runtime)
|
||||
**Testing**: Shell verification script (`verify_production_image.sh`) + `make verify-prod` target
|
||||
**Target Platform**: linux/amd64 container (Kubernetes or Docker host)
|
||||
**Performance Goals**: Container starts and passes health check within 30s; rebuild from warm cache in under 60s
|
||||
**Constraints**: No root process, no hardcoded secrets, no dev deps in final image, compatible with `--read-only` filesystem
|
||||
**Scale/Scope**: Single-file addition (`Dockerfile.prod`) + shell test + two Makefile targets; zero changes to existing source code
|
||||
|
||||
---
|
||||
|
||||
## Constitution Check
|
||||
|
||||
*GATE: Must pass before Phase 0 research. Re-checked post-design below.*
|
||||
|
||||
| Principle | Status | Notes |
|
||||
|-----------|--------|-------|
|
||||
| §5.1 TDD non-negotiable | **COMPLIANT** | `verify_production_image.sh` written before `Dockerfile.prod`; script fails (red) because the build file is absent, then passes (green) after |
|
||||
| §5.2 Test pyramid | **COMPLIANT** | Shell verification script is the integration-level test for this build artefact; no unit tests applicable (no Python business logic added) |
|
||||
| §5.4 CI must pass | **COMPLIANT** | `make verify-prod` target is runnable in host CI (requires Docker on the runner, which the existing `make test-integration` already requires) |
|
||||
| §6 Tech Stack — Docker | **COMPLIANT** | Docker + Docker Compose are mandated; this adds a production Docker file within that constraint |
|
||||
| §7.1 One-command local start | **COMPLIANT** | `api/Dockerfile` (dev stack) is unchanged; `docker compose up` is unaffected |
|
||||
| §7.2 Environment configuration | **COMPLIANT** | `Dockerfile.prod` contains zero hardcoded env values; all config is injected at runtime |
|
||||
| §7.3 Ruff/lint | **COMPLIANT** | No new Python files; shell script linted with `shellcheck` |
|
||||
| §2.6 No speculative abstraction | **COMPLIANT** | Single Dockerfile, no plugin system or generics |
|
||||
| §8 Scope boundaries | **COMPLIANT** | Purely infrastructure; no new API routes, data model, or UI changes |
|
||||
|
||||
**Post-design re-check**: All gates remain green. No violations.
|
||||
|
||||
---
|
||||
|
||||
## Project Structure
|
||||
|
||||
### Documentation (this feature)
|
||||
|
||||
```text
|
||||
specs/010-api-prod-dockerfile/
|
||||
├── plan.md # This file
|
||||
├── research.md # Phase 0 decisions
|
||||
├── contracts/
|
||||
│ └── container.md # Container interface contract (port, env vars, signals, user)
|
||||
├── quickstart.md # Build and verification scenarios
|
||||
└── tasks.md # Generated by /speckit-tasks
|
||||
```
|
||||
|
||||
### Source Code Changes
|
||||
|
||||
```text
|
||||
api/
|
||||
├── Dockerfile # Existing dev/test image — UNCHANGED
|
||||
├── Dockerfile.prod # NEW: production multi-stage image
|
||||
├── .dockerignore # Existing — verify test files are excluded from build context
|
||||
└── tests/
|
||||
└── build/
|
||||
└── verify_production_image.sh # NEW: TDD verification script (written first)
|
||||
|
||||
Makefile # Root Makefile — add build-prod and verify-prod targets
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dockerfile.prod — Annotated Reference
|
||||
|
||||
```dockerfile
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
# ════════════════════════════════════════════════
|
||||
# Build stage: install production deps via uv
|
||||
# ════════════════════════════════════════════════
|
||||
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Pre-compile bytecode; use copy mode for cross-layer compatibility
|
||||
ENV UV_COMPILE_BYTECODE=1 \
|
||||
UV_LINK_MODE=copy \
|
||||
UV_PYTHON_DOWNLOADS=never
|
||||
|
||||
# ── Layer cache split: deps only (changes rarely) ──
|
||||
COPY pyproject.toml uv.lock ./
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv sync --frozen --no-dev --no-install-project
|
||||
|
||||
# ── Layer cache split: source (changes often) ──
|
||||
COPY app/ ./app/
|
||||
|
||||
# ════════════════════════════════════════════════
|
||||
# Runtime stage: lean image with venv + source
|
||||
# ════════════════════════════════════════════════
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# curl for HEALTHCHECK — only tool added beyond base Python
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Non-root system user (UID/GID 1001)
|
||||
RUN groupadd --system --gid 1001 appgroup \
|
||||
&& useradd --system --uid 1001 --gid 1001 --no-create-home appuser
|
||||
|
||||
# Copy venv from builder; copy source directly from build context
|
||||
COPY --from=builder --chown=appuser:appgroup /app/.venv /app/.venv
|
||||
COPY --chown=appuser:appgroup app/ ./app/
|
||||
|
||||
USER appuser
|
||||
|
||||
# Activate the venv by prepending its bin to PATH
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
||||
CMD curl -f http://localhost:8000/api/v1/health || exit 1
|
||||
|
||||
# uvicorn handles SIGTERM; --timeout-graceful-shutdown gives 30s to drain requests
|
||||
CMD ["uvicorn", "app.main:app", \
|
||||
"--host", "0.0.0.0", \
|
||||
"--port", "8000", \
|
||||
"--timeout-graceful-shutdown", "30"]
|
||||
```
|
||||
|
||||
> **Note on COPY paths**: Build context is `api/` (as set by the Makefile target). `COPY app/ ./app/` in both stages refers to `api/app/`. The runtime stage copies source directly from the build context, not from the builder stage — this is simpler and avoids an extra intermediate layer.
|
||||
|
||||
---
|
||||
|
||||
## verify_production_image.sh — Structure
|
||||
|
||||
```sh
|
||||
#!/usr/bin/env bash
|
||||
# TDD verification script for api/Dockerfile.prod
|
||||
# Fails (red) if Dockerfile.prod does not exist or any check fails.
|
||||
set -euo pipefail
|
||||
|
||||
IMAGE="reactbin-api-prod:verify-$$"
|
||||
|
||||
cleanup() { docker rm -f "$CONTAINER" 2>/dev/null || true; docker rmi "$IMAGE" 2>/dev/null || true; }
|
||||
trap cleanup EXIT
|
||||
|
||||
# Step 1: Build — fails red if Dockerfile.prod is absent
|
||||
docker build -f api/Dockerfile.prod api/ -t "$IMAGE"
|
||||
|
||||
# Step 2: Start container with minimal env vars
|
||||
CONTAINER=$(docker run -d -p 18000:8000 \
|
||||
-e JWT_SECRET_KEY=verify-test-key \
|
||||
-e OWNER_USERNAME=testowner \
|
||||
-e OWNER_PASSWORD=testpassword \
|
||||
-e DATABASE_URL=postgresql+asyncpg://noop:noop@noop/noop \
|
||||
-e S3_ENDPOINT_URL=http://noop:9000 \
|
||||
-e S3_BUCKET_NAME=noop \
|
||||
-e S3_ACCESS_KEY_ID=noop \
|
||||
-e S3_SECRET_ACCESS_KEY=noop \
|
||||
-e S3_REGION=us-east-1 \
|
||||
"$IMAGE")
|
||||
|
||||
# Step 3: Poll health endpoint (app will fail to connect to DB, but /health is pre-DB)
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf http://localhost:18000/api/v1/health > /dev/null; then break; fi
|
||||
sleep 1
|
||||
[[ $i -eq 30 ]] && { echo "FAIL: health check timed out"; exit 1; }
|
||||
done
|
||||
|
||||
# Step 4: Assert non-root user
|
||||
UID_IN_CONTAINER=$(docker exec "$CONTAINER" id -u)
|
||||
[[ "$UID_IN_CONTAINER" -ne 0 ]] || { echo "FAIL: process running as root"; exit 1; }
|
||||
|
||||
# Step 5: Graceful shutdown
|
||||
docker stop "$CONTAINER" # sends SIGTERM
|
||||
EXIT_CODE=$(docker wait "$CONTAINER")
|
||||
[[ "$EXIT_CODE" -eq 0 ]] || { echo "FAIL: non-zero exit code $EXIT_CODE"; exit 1; }
|
||||
|
||||
# Step 6: Dev deps absent
|
||||
if docker run --rm "$IMAGE" /app/.venv/bin/python -c "import pytest" 2>/dev/null; then
|
||||
echo "FAIL: pytest importable in production image (dev deps present)"; exit 1
|
||||
fi
|
||||
|
||||
echo "All production image checks passed."
|
||||
```
|
||||
|
||||
> **Note on health check feasibility**: `/api/v1/health` is a simple JSON response that does not require a database connection (confirmed in `api/app/main.py`). The verification script can therefore pass even without a real PostgreSQL instance.
|
||||
|
||||
---
|
||||
|
||||
## Makefile Targets
|
||||
|
||||
Add to root `Makefile`:
|
||||
|
||||
```makefile
|
||||
.PHONY: build-prod verify-prod
|
||||
|
||||
build-prod:
|
||||
docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:latest
|
||||
|
||||
verify-prod:
|
||||
bash api/tests/build/verify_production_image.sh
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## `.dockerignore` Review
|
||||
|
||||
The existing `api/.dockerignore` already excludes `.venv/`, `__pycache__/`, `.env`, etc. Two additions improve the production build context:
|
||||
|
||||
```
|
||||
tests/
|
||||
*.egg-info/
|
||||
alembic/
|
||||
alembic.ini
|
||||
```
|
||||
|
||||
`tests/` and `alembic/` are not needed in the production image (we `COPY app/ ./app/` explicitly). Excluding them from the build context reduces the data sent to the Docker daemon.
|
||||
|
||||
> `*.egg-info/` is already present in the existing `.dockerignore`.
|
||||
|
||||
---
|
||||
|
||||
## Implementation Order
|
||||
|
||||
Tasks are generated by `/speckit-tasks`, but the logical dependency order is:
|
||||
|
||||
1. **Write `verify_production_image.sh`** (TDD red — build fails because `Dockerfile.prod` absent)
|
||||
2. **Add `Makefile` targets** (`build-prod`, `verify-prod`) — references the script
|
||||
3. **Write `api/Dockerfile.prod`** (implement to make TDD pass)
|
||||
4. **Update `api/.dockerignore`** (exclude `tests/`, `alembic/` from build context)
|
||||
5. **Run `make verify-prod`** (TDD green — all 6 checks pass)
|
||||
6. **Run `shellcheck`** on `verify_production_image.sh`
|
||||
|
||||
No existing tests are modified. `make test-integration` continues to use `api/Dockerfile` unchanged.
|
||||
138
specs/010-api-prod-dockerfile/quickstart.md
Normal file
138
specs/010-api-prod-dockerfile/quickstart.md
Normal file
@@ -0,0 +1,138 @@
|
||||
# Quickstart: Production API Container Image
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Docker 24+ installed and running on the host
|
||||
- `make` available
|
||||
- A copy of `.env` (or the env vars from `.env.example`) for smoke-testing
|
||||
|
||||
---
|
||||
|
||||
## Build the Production Image
|
||||
|
||||
```sh
|
||||
make build-prod
|
||||
# Equivalent: docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:latest
|
||||
```
|
||||
|
||||
On a warm cache (deps unchanged), the build should complete in under 60 seconds because the dependency layer is reused.
|
||||
|
||||
---
|
||||
|
||||
## Verify the Production Image (TDD Smoke Test)
|
||||
|
||||
```sh
|
||||
make verify-prod
|
||||
```
|
||||
|
||||
This runs `api/tests/build/verify_production_image.sh`, which:
|
||||
1. Builds the image (fails fast if `Dockerfile.prod` is missing — the **red** TDD state)
|
||||
2. Starts the container with test env vars
|
||||
3. Polls `/api/v1/health` until it returns 200 (or times out after 30s)
|
||||
4. Asserts the API process is running as a non-root user (UID ≠ 0)
|
||||
5. Sends SIGTERM and asserts the container exits with code 0 within 30s
|
||||
6. Asserts `pytest` is NOT importable inside the container (dev deps excluded)
|
||||
|
||||
**Expected output (green)**:
|
||||
```
|
||||
[verify] Building reactbin-api-prod:test ...
|
||||
[verify] Build OK
|
||||
[verify] Starting container ...
|
||||
[verify] Health check passed (GET /api/v1/health → 200)
|
||||
[verify] Process user: 1001 (non-root ✓)
|
||||
[verify] Sending SIGTERM ...
|
||||
[verify] Container exited with code 0 (graceful shutdown ✓)
|
||||
[verify] Dev deps absent ✓
|
||||
[verify] All checks passed.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## User Story Integration Scenarios
|
||||
|
||||
### US1 — API Runs Reliably in Production
|
||||
|
||||
```sh
|
||||
# Start container with real (or test) env vars
|
||||
docker run --rm -d \
|
||||
--name reactbin-test \
|
||||
-p 8000:8000 \
|
||||
-e JWT_SECRET_KEY=my-secret \
|
||||
-e OWNER_USERNAME=owner \
|
||||
-e OWNER_PASSWORD=changeme \
|
||||
-e DATABASE_URL=postgresql+asyncpg://user:pass@host:5432/db \
|
||||
-e S3_ENDPOINT_URL=http://minio:9000 \
|
||||
-e S3_BUCKET_NAME=reactbin \
|
||||
-e S3_ACCESS_KEY_ID=minioadmin \
|
||||
-e S3_SECRET_ACCESS_KEY=minioadmin \
|
||||
-e S3_REGION=us-east-1 \
|
||||
reactbin-api-prod:latest
|
||||
|
||||
# Check health
|
||||
curl http://localhost:8000/api/v1/health
|
||||
# → {"status":"ok"}
|
||||
|
||||
# Graceful shutdown
|
||||
docker stop reactbin-test # sends SIGTERM
|
||||
docker wait reactbin-test # → exit code 0
|
||||
```
|
||||
|
||||
### US2 — Minimal, Secure Container
|
||||
|
||||
```sh
|
||||
# Verify non-root user
|
||||
docker inspect --format='{{.Config.User}}' reactbin-api-prod:latest
|
||||
# → appuser (or 1001)
|
||||
|
||||
# Verify no dev packages (pytest should not be importable)
|
||||
docker run --rm reactbin-api-prod:latest \
|
||||
/app/.venv/bin/python -c "import pytest" 2>&1
|
||||
# → ModuleNotFoundError: No module named 'pytest'
|
||||
|
||||
# Verify no source control or test files in image
|
||||
docker run --rm reactbin-api-prod:latest ls /app
|
||||
# → app .venv (no tests/, no alembic/, no .git/)
|
||||
```
|
||||
|
||||
### US3 — Fast, Reproducible Builds
|
||||
|
||||
```sh
|
||||
# First build (cold): installs all deps
|
||||
time docker build --no-cache -f api/Dockerfile.prod api/ -t reactbin-api-prod:cold
|
||||
|
||||
# Touch a source file only (no dep change)
|
||||
touch api/app/main.py
|
||||
|
||||
# Second build: dependency layer served from cache
|
||||
time docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:warm
|
||||
# Expect: warm build < 30s; cold build varies (network-dependent)
|
||||
|
||||
# Confirm same health response from both
|
||||
docker run --rm ... reactbin-api-prod:cold
|
||||
docker run --rm ... reactbin-api-prod:warm
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Missing Env Var Behaviour
|
||||
|
||||
```sh
|
||||
docker run --rm \
|
||||
-e JWT_SECRET_KEY=my-secret \
|
||||
# OWNER_USERNAME intentionally omitted
|
||||
reactbin-api-prod:latest
|
||||
# → Container exits non-zero, stderr logs: "field required: owner_username"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Read-Only Filesystem Compatibility
|
||||
|
||||
```sh
|
||||
docker run --rm --read-only \
|
||||
-e JWT_SECRET_KEY=... [other env vars] \
|
||||
reactbin-api-prod:latest &
|
||||
|
||||
curl http://localhost:8000/api/v1/health
|
||||
# → {"status":"ok"}
|
||||
```
|
||||
94
specs/010-api-prod-dockerfile/research.md
Normal file
94
specs/010-api-prod-dockerfile/research.md
Normal file
@@ -0,0 +1,94 @@
|
||||
# Research: Production API Container Image
|
||||
|
||||
## Decision 1 — Use a Separate `Dockerfile.prod`
|
||||
|
||||
**Decision**: Add `api/Dockerfile.prod` alongside the existing `api/Dockerfile`.
|
||||
|
||||
**Rationale**: The existing `api/Dockerfile` installs dev dependencies (`.[dev]`), mounts source with `--reload`, and is used by the Docker Compose integration test stack. Modifying it would break `make test-integration`. A separate file keeps the two images independent with zero coupling.
|
||||
|
||||
**Alternatives considered**:
|
||||
- Build-arg flag in a single Dockerfile: adds conditional complexity and makes both files harder to read.
|
||||
- Rename existing to `Dockerfile.dev` and make `Dockerfile` the production image: would require updating `docker-compose.test.yml` with an explicit file reference — a wider change than needed for this feature.
|
||||
|
||||
---
|
||||
|
||||
## Decision 2 — Multi-Stage Build: uv Builder + python:3.12-slim Runtime
|
||||
|
||||
**Decision**: Two-stage build. Stage 1 (`builder`) uses `ghcr.io/astral-sh/uv:python3.12-bookworm-slim` to install production dependencies into a virtual environment. Stage 2 (`runtime`) uses `python:3.12-slim` and copies only the `.venv` and application source from the builder. uv is not present in the final image.
|
||||
|
||||
**Rationale**:
|
||||
- uv's official Docker image is the fastest, most correct way to produce a pinned, bytecode-compiled venv from `uv.lock`.
|
||||
- Keeping uv out of the runtime image reduces attack surface and image size.
|
||||
- `python:3.12-slim` is a well-maintained, widely scanned base; using it for the runtime stage aligns with existing project images.
|
||||
|
||||
**Layer caching strategy**:
|
||||
```
|
||||
COPY pyproject.toml uv.lock ./
|
||||
RUN uv sync --frozen --no-dev --no-install-project ← cache hits when only source changes
|
||||
COPY app/ ./app/ ← only reaches here on source changes
|
||||
```
|
||||
`--no-install-project` installs all listed dependencies without the project package itself. The project source is then copied separately. This means a source-only change reuses the dependency layer from cache.
|
||||
|
||||
**Environment variables for optimal builds**:
|
||||
- `UV_COMPILE_BYTECODE=1` — pre-compile `.pyc` files; slightly larger venv but faster cold starts.
|
||||
- `UV_LINK_MODE=copy` — avoids hard-link issues when copying between image layers.
|
||||
- `UV_PYTHON_DOWNLOADS=never` — ensures the builder stage uses the bundled Python, not a downloaded one.
|
||||
|
||||
**Alternatives considered**:
|
||||
- Installing deps into the system Python (`--system`): rejected because it pollutes the base image and makes it harder to copy deps cleanly into the runtime stage.
|
||||
- Using a single `FROM python:3.12-slim` with pip: slower builds, no lockfile pinning, no bytecode compilation step.
|
||||
|
||||
---
|
||||
|
||||
## Decision 3 — Non-Root User (UID 1001, System User)
|
||||
|
||||
**Decision**: Create a system user `appuser` with GID/UID 1001 in the runtime stage. All owned files are `chown`-ed at `COPY` time using `--chown=appuser:appgroup`.
|
||||
|
||||
**Rationale**: Running as root inside a container is a container breakout risk. A numeric UID (rather than a named user that might not exist on the host) is required by some Kubernetes pod security admission policies. UID 1001 avoids collision with UID 1000 (the typical first interactive user on a Linux host) while remaining a predictable, inspectable value.
|
||||
|
||||
**Alternatives considered**:
|
||||
- UID 1000: small risk of collision with host user when bind mounts are involved.
|
||||
- `USER nobody`: `nobody` (UID 65534) works but its name and UID are not consistent across distros.
|
||||
|
||||
---
|
||||
|
||||
## Decision 4 — SIGTERM Graceful Shutdown via uvicorn `--timeout-graceful-shutdown`
|
||||
|
||||
**Decision**: Use `uvicorn`'s built-in `--timeout-graceful-shutdown 30` flag. No process supervisor (tini, s6) is required.
|
||||
|
||||
**Rationale**: uvicorn handles SIGTERM natively when run as PID 1 in single-worker mode (the production Dockerfile runs one worker). On SIGTERM it stops accepting new connections, waits up to `--timeout-graceful-shutdown` seconds for in-flight requests to complete, then exits with code 0. No additional init system is needed.
|
||||
|
||||
**Alternatives considered**:
|
||||
- tini: adds a small init shim that reaps zombies and forwards signals. Not necessary with a single uvicorn worker (no child processes to reap).
|
||||
- Gunicorn + uvicorn workers: more complex; appropriate for multi-worker setups but the deployment platform (Kubernetes) scales horizontally via pod replicas rather than in-process workers.
|
||||
|
||||
---
|
||||
|
||||
## Decision 5 — `curl` for HEALTHCHECK
|
||||
|
||||
**Decision**: Install `curl` (via `apt-get --no-install-recommends`) in the runtime stage and use it in the `HEALTHCHECK` directive.
|
||||
|
||||
**Rationale**: The existing dev Dockerfile already installs `curl` for the same reason. `curl -f` exits non-zero on HTTP errors, making it a reliable single-command health probe. A Python one-liner adds interpreter startup overhead (~100ms) per check; `curl` is ~5ms.
|
||||
|
||||
**Alternatives considered**:
|
||||
- `wget -q --spider`: available on Alpine but not on Debian-slim by default; requires separate install.
|
||||
- Python `urllib.request`: no extra install, but slower and adds noise to the process table during health checks.
|
||||
|
||||
---
|
||||
|
||||
## Decision 6 — TDD Verification via Shell Script
|
||||
|
||||
**Decision**: Write `api/tests/build/verify_production_image.sh` before `Dockerfile.prod`. The script builds the image and runs behavioral checks (health endpoint, non-root user, clean SIGTERM exit). It is the "failing test" per §5.1.
|
||||
|
||||
**Rationale**: The production image is a build artifact, not Python business logic. pytest cannot test a Docker image without Docker-in-Docker, which the current CI stack does not support. A shell script run on the host (via `make verify-prod`) is the appropriate TDD vehicle for this artefact type.
|
||||
|
||||
**Verification steps the script covers**:
|
||||
1. `docker build -f api/Dockerfile.prod api/` → fails (red) until Dockerfile.prod exists.
|
||||
2. Run container with required env vars; wait for health endpoint → `GET /api/v1/health` returns 200.
|
||||
3. Inspect running process user → UID ≠ 0 (non-root).
|
||||
4. Send SIGTERM to container; assert exit code 0 within 30s (graceful shutdown).
|
||||
5. Assert dev packages are absent: `pip show pytest` inside container must return non-zero.
|
||||
|
||||
**Alternatives considered**:
|
||||
- pytest with docker SDK: requires `docker` Python package and DinD in CI; rejected as over-engineered for a single-file build artifact.
|
||||
- Manual verification only: rejected because §5.1 mandates automated failing tests before production code.
|
||||
96
specs/010-api-prod-dockerfile/spec.md
Normal file
96
specs/010-api-prod-dockerfile/spec.md
Normal file
@@ -0,0 +1,96 @@
|
||||
# Feature Specification: Production-Grade API Container Image
|
||||
|
||||
**Feature Branch**: `010-api-prod-dockerfile`
|
||||
**Created**: 2026-05-07
|
||||
**Status**: Draft
|
||||
**Input**: User description: "We need a production-grade Dockerfile for the API to start preparing for a production deployment."
|
||||
|
||||
## User Scenarios & Testing *(mandatory)*
|
||||
|
||||
### User Story 1 — API Runs Reliably in Production (Priority: P1)
|
||||
|
||||
An operator builds and runs the API container in a production environment. The container starts successfully, serves requests, and can be health-checked by an orchestrator (e.g., Kubernetes). When the orchestrator signals shutdown, the container drains in-flight requests before exiting cleanly, avoiding dropped connections.
|
||||
|
||||
**Why this priority**: Without a correctly functioning container, no production deployment is possible. This is the baseline that all other stories depend on.
|
||||
|
||||
**Independent Test**: Build the image from source, run the container with required env vars, call the health endpoint, send SIGTERM, and verify the process exits cleanly with code 0. No other stories are required.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** a built container image and all required env vars, **When** the container starts, **Then** it begins serving requests within 30 seconds and the health endpoint returns a success response.
|
||||
2. **Given** a running container, **When** a SIGTERM is received, **Then** the process finishes any in-flight requests and exits with code 0 within 30 seconds.
|
||||
3. **Given** a running container, **When** a required env var is absent, **Then** the process exits immediately with a non-zero code and logs a clear error message identifying the missing variable.
|
||||
|
||||
---
|
||||
|
||||
### User Story 2 — Minimal, Secure Container (Priority: P2)
|
||||
|
||||
A security-conscious operator audits the container image before promotion to production. They verify the API process does not run as root, the image contains no development tooling or test artefacts, and no credentials are baked into the image layers.
|
||||
|
||||
**Why this priority**: Running as root or including unnecessary tools increases the blast radius of any container breakout. This is a production-readiness requirement, not optional hardening.
|
||||
|
||||
**Independent Test**: Inspect the built image to confirm the runtime user is non-root, confirm no dev/test files are present in the image layers, and scan the image with a standard vulnerability scanner. Passes independently of any deployment environment.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** a built container image, **When** the running process user is inspected, **Then** the API process runs as a non-root user with a numeric UID.
|
||||
2. **Given** a built container image, **When** the image layers are inspected, **Then** no development dependencies, test files, or local configuration are present.
|
||||
3. **Given** a built container image, **When** the image layers are scanned for hardcoded secrets, **Then** no credentials, API keys, or secret values are found embedded in any layer.
|
||||
|
||||
---
|
||||
|
||||
### User Story 3 — Fast, Reproducible Builds (Priority: P3)
|
||||
|
||||
A developer rebuilds the container image after a code change. The build completes quickly because unchanged layers (dependencies) are cached. Given identical source inputs, the resulting image is functionally equivalent across builds, enabling confident CI/CD promotion.
|
||||
|
||||
**Why this priority**: Slow or non-deterministic builds reduce developer confidence and slow deployment pipelines. Important for velocity, but the container already works (P1, P2) before this is optimised.
|
||||
|
||||
**Independent Test**: Build the image twice from the same source; confirm the second build reuses dependency layers from cache and completes significantly faster than the first.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** an image built once, **When** only application source files change and the image is rebuilt, **Then** the dependency installation step is served from cache and the rebuild completes faster than a clean build.
|
||||
2. **Given** two builds from the same source commit, **When** the images are run, **Then** both produce identical API behaviour.
|
||||
|
||||
---
|
||||
|
||||
### Edge Cases
|
||||
|
||||
- What happens when the database is unavailable at container startup?
|
||||
- What happens when the container is sent SIGKILL instead of SIGTERM (hard kill by orchestrator)?
|
||||
- What happens if the container runs out of memory mid-request?
|
||||
- How does the image behave when run read-only filesystem (`--read-only`)?
|
||||
|
||||
## Requirements *(mandatory)*
|
||||
|
||||
### Functional Requirements
|
||||
|
||||
- **FR-001**: The container image MUST start the API service and begin accepting requests without manual intervention after supplying required env vars.
|
||||
- **FR-002**: The container image MUST expose a health check that an orchestrator can poll to determine service readiness.
|
||||
- **FR-003**: The container image MUST handle the SIGTERM signal by completing in-flight requests then exiting cleanly within 30 seconds.
|
||||
- **FR-004**: The container image MUST run the API process as a non-root, non-privileged user.
|
||||
- **FR-005**: The container image MUST NOT contain development dependencies, test files, source control metadata, or local configuration files.
|
||||
- **FR-006**: The container image MUST NOT contain any hardcoded credentials, secrets, or environment-specific values — all configuration MUST be supplied via environment variables at runtime.
|
||||
- **FR-007**: The container image MUST log to standard output and standard error so logs are captured by the container runtime without additional configuration.
|
||||
- **FR-008**: The container image MUST be buildable reproducibly from the same source inputs — a rebuild from the same commit MUST produce a functionally equivalent image.
|
||||
- **FR-009**: Rebuilding the image after a source-only change (no dependency changes) MUST reuse the cached dependency installation layer.
|
||||
|
||||
## Success Criteria *(mandatory)*
|
||||
|
||||
### Measurable Outcomes
|
||||
|
||||
- **SC-001**: The container starts and serves its first successful health-check response within 30 seconds of launch with all required env vars present.
|
||||
- **SC-002**: The container exits cleanly (code 0) within 30 seconds of receiving a SIGTERM, with no in-flight requests dropped.
|
||||
- **SC-003**: The API process inside the container runs as a non-root user (inspectable via container runtime tooling).
|
||||
- **SC-004**: A rebuild after a source-only change completes in under 60 seconds on a warm cache (dependency layer reused).
|
||||
- **SC-005**: The image contains zero hardcoded secrets (verifiable by static layer inspection).
|
||||
- **SC-006**: All API logs appear on stdout/stderr and are captured by the container runtime log driver without additional sidecar or configuration.
|
||||
|
||||
## Assumptions
|
||||
|
||||
- The existing test Dockerfile (used by the integration test stack) is not suitable for production and will remain separate; this feature produces a distinct production image.
|
||||
- All required runtime configuration (database URL, S3 credentials, JWT secret, etc.) will be injected as environment variables by the deployment platform — the image itself carries no environment-specific values.
|
||||
- The deployment target supports OCI-compatible container images (Kubernetes, Docker, etc.).
|
||||
- No persistent local storage is needed by the API container; all state lives in the database and object storage.
|
||||
- The production image does not need to run database migrations; migrations are applied by a separate step in the deployment pipeline.
|
||||
- A single-architecture image (linux/amd64) is sufficient for the initial production target.
|
||||
158
specs/010-api-prod-dockerfile/tasks.md
Normal file
158
specs/010-api-prod-dockerfile/tasks.md
Normal file
@@ -0,0 +1,158 @@
|
||||
# Tasks: Production-Grade API Container Image
|
||||
|
||||
**Input**: Design documents from `specs/010-api-prod-dockerfile/`
|
||||
**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, contracts/container.md ✅, quickstart.md ✅
|
||||
|
||||
**Tests**: TDD is non-negotiable (§5.1). The "test" for a Docker build artefact is `api/tests/build/verify_production_image.sh`, written before `api/Dockerfile.prod` exists. Running the script immediately fails (red) because the build step cannot find the file; writing `Dockerfile.prod` turns it green.
|
||||
|
||||
**Organization**: Phase 1 sets up Makefile targets and `.dockerignore`; Phase 3 (US1) writes the verification script and the Dockerfile; Phase 4 (US2) extends the script with security checks; Phase 5 (US3) extends it with a cache-hit check; Phase 6 polishes.
|
||||
|
||||
## Format: `[ID] [P?] [Story] Description`
|
||||
|
||||
- **[P]**: Can run in parallel with other [P] tasks in the same phase
|
||||
- **[Story]**: Which user story this task belongs to
|
||||
- Exact file paths included in every task description
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Setup
|
||||
|
||||
- [X] T001 Add `build-prod` and `verify-prod` targets (and their `.PHONY` entries) to the root `Makefile` at `/workspace/Makefile`: `build-prod` runs `docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:latest`; `verify-prod` runs `bash api/tests/build/verify_production_image.sh`
|
||||
|
||||
- [X] T002 Update `api/.dockerignore` at `/workspace/api/.dockerignore`: append three lines — `tests/`, `alembic/`, and `alembic.ini` — so these are excluded from the production build context (the Dockerfile.prod copies only `app/` explicitly, but excluding them from the context keeps the transfer to the Docker daemon fast)
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Foundational
|
||||
|
||||
- [X] T003 Create directory `api/tests/build/` at `/workspace/api/tests/build/` with `mkdir -p` and add a `.gitkeep` so the directory is tracked
|
||||
|
||||
**Checkpoint**: Directory structure is ready; Makefile and .dockerignore are updated.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: User Story 1 — API Runs Reliably in Production (Priority: P1) 🎯 MVP
|
||||
|
||||
**Goal**: The container builds, starts, serves the health endpoint, and exits cleanly on SIGTERM.
|
||||
|
||||
**Independent Test**: `make verify-prod` — passes when `Dockerfile.prod` exists and all US1 checks pass.
|
||||
|
||||
### Test for User Story 1 (TDD red — write first, confirm failure before T005)
|
||||
|
||||
- [X] T004 [US1] Create `api/tests/build/verify_production_image.sh` as an executable bash script (`chmod +x`) with `#!/usr/bin/env bash` and `set -euo pipefail`; the script MUST:
|
||||
1. Set `IMAGE="reactbin-api-prod:verify-$$"` and `PG_CONTAINER=""` and `APP_CONTAINER=""`;
|
||||
2. Define a `cleanup()` function that runs `docker rm -f "$APP_CONTAINER" "$PG_CONTAINER" 2>/dev/null || true` and `docker rmi "$IMAGE" 2>/dev/null || true`, and register it with `trap cleanup EXIT`;
|
||||
3. **[US1 check 1 — build]** Run `docker build -f api/Dockerfile.prod api/ -t "$IMAGE"` — this is the line that fails **red** because `api/Dockerfile.prod` does not yet exist; print `[verify] Building $IMAGE...` before and `[verify] Build OK` after;
|
||||
4. **[US1 check 2 — start with real DB]** Launch a throwaway postgres: `PG_CONTAINER=$(docker run -d -e POSTGRES_DB=reactbin_verify -e POSTGRES_USER=verify -e POSTGRES_PASSWORD=verify postgres:16-alpine)`; poll `docker exec "$PG_CONTAINER" pg_isready -U verify` up to 30 × 1s, fail if timeout; capture `PG_IP=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$PG_CONTAINER")`;
|
||||
5. Start the production container: `APP_CONTAINER=$(docker run -d -p 18000:8000 -e JWT_SECRET_KEY=verify-key -e OWNER_USERNAME=testowner -e OWNER_PASSWORD=testpassword -e DATABASE_URL="postgresql+asyncpg://verify:verify@${PG_IP}:5432/reactbin_verify" -e S3_ENDPOINT_URL=http://noop:9000 -e S3_BUCKET_NAME=noop -e S3_ACCESS_KEY_ID=noop -e S3_SECRET_ACCESS_KEY=noop -e S3_REGION=us-east-1 "$IMAGE")`; note — S3 credentials are placeholders; the health endpoint does not require S3;
|
||||
6. **[US1 check 3 — health endpoint]** Poll `curl -sf http://localhost:18000/api/v1/health` up to 30 × 1s, fail with a message if timeout; print `[verify] Health check passed` on success;
|
||||
7. **[US1 check 4 — SIGTERM → exit 0]** Run `docker stop "$APP_CONTAINER"` (sends SIGTERM); capture `EXIT_CODE=$(docker wait "$APP_CONTAINER")`; assert `"$EXIT_CODE" -eq 0`, fail with `FAIL: non-zero exit $EXIT_CODE` otherwise; print `[verify] Graceful shutdown OK (exit $EXIT_CODE)`;
|
||||
8. Print `[verify] US1 checks passed.`
|
||||
9. **[C3 — missing env var → non-zero exit]** Run `docker run --rm -e JWT_SECRET_KEY=verify-key "$IMAGE" 2>&1`; assert the exit code is **non-zero** (OWNER_USERNAME is absent so Pydantic settings validation must fail at startup); print `[verify] Missing-env-var exit check OK`;
|
||||
After writing the script, run `make verify-prod` and confirm it **fails** with a Docker build error (red state — `Dockerfile.prod` does not exist).
|
||||
|
||||
### Implementation for User Story 1
|
||||
|
||||
- [X] T005 [US1] Create `api/Dockerfile.prod` at `/workspace/api/Dockerfile.prod` — a two-stage multi-stage build:
|
||||
**Stage 1 (builder)**: `FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder`; `WORKDIR /app`; set `ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy UV_PYTHON_DOWNLOADS=never`; `COPY pyproject.toml uv.lock ./`; `RUN --mount=type=cache,target=/root/.cache/uv uv sync --frozen --no-dev --no-install-project`; `COPY app/ ./app/`
|
||||
**Stage 2 (runtime)**: `FROM python:3.12-slim`; `WORKDIR /app`; `RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*`; `RUN groupadd --system --gid 1001 appgroup && useradd --system --uid 1001 --gid 1001 --no-create-home appuser`; `COPY --from=builder --chown=appuser:appgroup /app/.venv /app/.venv`; `COPY --chown=appuser:appgroup app/ ./app/`; `USER appuser`; `ENV PATH="/app/.venv/bin:$PATH"`; `EXPOSE 8000`; `HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 CMD curl -f http://localhost:8000/api/v1/health || exit 1`; `CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--timeout-graceful-shutdown", "30"]`
|
||||
|
||||
- [X] T006 [US1] Verify TDD green for US1: run `make verify-prod` and confirm all four US1 checks pass — build OK, health endpoint returns 200, SIGTERM produces exit code 0, and `[verify] US1 checks passed.` is printed.
|
||||
|
||||
**Checkpoint**: US1 is complete. Production container builds, starts, serves traffic, and shuts down gracefully.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: User Story 2 — Minimal, Secure Container (Priority: P2)
|
||||
|
||||
**Goal**: The production image runs as non-root and contains no dev dependencies or embedded secrets.
|
||||
|
||||
**Independent Test**: US2 checks in `make verify-prod` — the same script extended with non-root and dev-deps-absent assertions.
|
||||
|
||||
### Tests for User Story 2 (TDD extension — add checks, confirm they pass against existing Dockerfile.prod)
|
||||
|
||||
- [X] T007 [US2] Extend `api/tests/build/verify_production_image.sh` with two US2 checks inserted after the SIGTERM check (before the final `US1 checks passed` line):
|
||||
**[US2 check 1 — non-root]** After the container is running (before `docker stop`), run `UID_IN_CONTAINER=$(docker exec "$APP_CONTAINER" id -u)`; assert `"$UID_IN_CONTAINER" -ne 0`, fail with `FAIL: process running as root (UID 0)` if violated; print `[verify] Non-root user OK (UID $UID_IN_CONTAINER)`;
|
||||
**[US2 check 2 — dev deps absent]** After cleanup of APP_CONTAINER but still holding the image, run `docker run --rm "$IMAGE" /app/.venv/bin/python -c "import pytest" 2>/dev/null`; assert the command returns **non-zero** (i.e., pytest is NOT importable); if it returns 0, fail with `FAIL: pytest importable in production image (dev deps present)`; print `[verify] Dev deps absent OK`;
|
||||
**[C1 — stdout log capture]** Run `docker logs "$APP_CONTAINER" 2>&1`; assert the output is non-empty and contains `Started server` or `Application startup complete` (uvicorn startup lines); fail with `FAIL: no startup logs found on stdout/stderr` if absent; print `[verify] Stdout logging OK`; note — insert this check while APP_CONTAINER is still running, before the `docker stop` call;
|
||||
**[C2 — no hardcoded secrets in layers]** Run `docker history --no-trunc "$IMAGE" 2>&1`; pipe through `grep -iE "(password|secret_key|api_key|token)" `; assert zero matching lines; if any match, fail with `FAIL: potential secret found in image history`; print `[verify] No secrets in image layers OK`;
|
||||
Update the final success line to `[verify] All checks passed (US1 + US2).`; confirm `make verify-prod` passes.
|
||||
|
||||
**Checkpoint**: US2 is verified. Image runs as UID 1001 and contains no test tooling.
|
||||
|
||||
---
|
||||
|
||||
## Phase 5: User Story 3 — Fast, Reproducible Builds (Priority: P3)
|
||||
|
||||
**Goal**: Rebuilding after a source-only change reuses the dependency layer from cache.
|
||||
|
||||
**Independent Test**: US3 check in `make verify-prod` — a timed second build after touching a source file asserts the dep layer was cached.
|
||||
|
||||
### Tests for User Story 3 (TDD extension)
|
||||
|
||||
- [X] T008 [US3] Extend `api/tests/build/verify_production_image.sh` with a US3 cache check appended after all other checks (before final success line):
|
||||
**[US3 check — dep layer cached on source-only rebuild]** Set `IMAGE2="reactbin-api-prod:verify-cache-$$"`; `touch api/app/main.py`; capture the output of `docker build --progress=plain -f api/Dockerfile.prod api/ -t "$IMAGE2" 2>&1` (the `--progress=plain` flag ensures consistent `CACHED` output regardless of Docker version or TTY settings); assert the output contains the string `CACHED`; if `CACHED` is absent, fail with `FAIL: dependency layer not reused on source-only rebuild`; add `docker rmi "$IMAGE2" 2>/dev/null || true` to the `cleanup()` function; print `[verify] Dep layer cache hit confirmed (US3 OK)`;
|
||||
Update the final success line to `[verify] All checks passed (US1 + US2 + US3).`
|
||||
|
||||
- [X] T009 [US3] Verify TDD green for US3: run `make verify-prod` and confirm the full script passes including the cache check — the build output for the second image must contain `CACHED`, and `[verify] All checks passed (US1 + US2 + US3).` must print.
|
||||
|
||||
**Checkpoint**: All three user stories are verified end-to-end by `make verify-prod`.
|
||||
|
||||
---
|
||||
|
||||
## Phase 6: Polish & Cross-Cutting Concerns
|
||||
|
||||
- [X] T010 Run `make test-integration` from `/workspace` and confirm all 102 existing tests still pass — verifies that the `.dockerignore` additions (T002) do not break the existing test Dockerfile build or any integration test (§5.4 regression gate)
|
||||
|
||||
- [X] T011 Run `shellcheck api/tests/build/verify_production_image.sh` and fix any violations (common: unquoted variables, `[ ]` vs `[[ ]]`, missing `--` before arguments)
|
||||
|
||||
---
|
||||
|
||||
## Dependencies & Execution Order
|
||||
|
||||
### Phase Dependencies
|
||||
|
||||
- **Phase 1 (Setup)**: No external dependencies — start immediately
|
||||
- **Phase 2 (Foundational)**: No dependencies — start immediately (parallel with Phase 1)
|
||||
- **Phase 3 (US1)**: Depends on Phase 1 (Makefile + .dockerignore must exist before `make verify-prod` can run) and Phase 2 (test directory must exist)
|
||||
- **Phase 4 (US2)**: Depends on Phase 3 (US1 script and Dockerfile must exist to extend)
|
||||
- **Phase 5 (US3)**: Depends on Phase 4 (full US2 script must exist to extend)
|
||||
- **Phase 6 (Polish)**: Depends on all prior phases; T010 (regression test) must precede T011 (shellcheck)
|
||||
|
||||
### Within Phase 3
|
||||
|
||||
- T004 before T005 (write test script before writing the Dockerfile)
|
||||
- T005 after T004 (implement Dockerfile after confirming red state)
|
||||
- T006 after T005 (verify green after implementation)
|
||||
|
||||
### Execution Order Summary
|
||||
|
||||
```
|
||||
Step 1: T001 ∥ T002 ∥ T003 (setup — parallel, different files)
|
||||
Step 2: T004 (write verification script — TDD red)
|
||||
Step 3: T005 (write Dockerfile.prod — implementation)
|
||||
Step 4: T006 (verify US1 green)
|
||||
Step 5: T007 (extend script with US2 checks, verify pass)
|
||||
Step 6: T008 (extend script with US3 check)
|
||||
Step 7: T009 (verify US3 green)
|
||||
Step 8: T010 (make test-integration — regression gate)
|
||||
Step 9: T011 (shellcheck polish)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Strategy
|
||||
|
||||
### MVP (US1 — reliable production run)
|
||||
|
||||
1. Complete T001–T003 (setup)
|
||||
2. Complete T004–T006 (core blocking: write script → write Dockerfile → verify green)
|
||||
3. **Validate**: `make verify-prod` passes; `make test-integration` still passes (no regressions)
|
||||
4. US2 and US3 add explicit verification coverage for properties already implemented
|
||||
|
||||
### Incremental Delivery
|
||||
|
||||
- After Phase 3: Production image builds, starts, and shuts down gracefully — safe to deploy
|
||||
- After Phase 4: Security properties (non-root, no dev deps) are explicitly verified
|
||||
- After Phase 5: Build efficiency (layer caching) is confirmed by automated check
|
||||
- After Phase 6: Script is lint-clean, ready for CI integration
|
||||
34
specs/011-ui-prod-dockerfile/checklists/requirements.md
Normal file
34
specs/011-ui-prod-dockerfile/checklists/requirements.md
Normal file
@@ -0,0 +1,34 @@
|
||||
# Specification Quality Checklist: Production-Grade UI Container Image
|
||||
|
||||
**Purpose**: Validate specification completeness and quality before proceeding to planning
|
||||
**Created**: 2026-05-07
|
||||
**Feature**: [spec.md](../spec.md)
|
||||
|
||||
## Content Quality
|
||||
|
||||
- [X] No implementation details (languages, frameworks, APIs)
|
||||
- [X] Focused on user value and business needs
|
||||
- [X] Written for non-technical stakeholders
|
||||
- [X] All mandatory sections completed
|
||||
|
||||
## Requirement Completeness
|
||||
|
||||
- [X] No [NEEDS CLARIFICATION] markers remain
|
||||
- [X] Requirements are testable and unambiguous
|
||||
- [X] Success criteria are measurable
|
||||
- [X] Success criteria are technology-agnostic (no implementation details)
|
||||
- [X] All acceptance scenarios are defined
|
||||
- [X] Edge cases are identified
|
||||
- [X] Scope is clearly bounded
|
||||
- [X] Dependencies and assumptions identified
|
||||
|
||||
## Feature Readiness
|
||||
|
||||
- [X] All functional requirements have clear acceptance criteria
|
||||
- [X] User scenarios cover primary flows
|
||||
- [X] Feature meets measurable outcomes defined in Success Criteria
|
||||
- [X] No implementation details leak into specification
|
||||
|
||||
## Notes
|
||||
|
||||
- All items pass. Spec is ready for `/speckit-plan`.
|
||||
90
specs/011-ui-prod-dockerfile/contracts/container.md
Normal file
90
specs/011-ui-prod-dockerfile/contracts/container.md
Normal file
@@ -0,0 +1,90 @@
|
||||
# Container Interface Contract: UI Production Image
|
||||
|
||||
## Image Identity
|
||||
|
||||
| Property | Value |
|
||||
|-------------|------------------------------|
|
||||
| Image name | `reactbin-ui-prod` |
|
||||
| Runtime | nginx-unprivileged (Alpine) |
|
||||
| Listen port | `8080` |
|
||||
| Run user | non-root (UID ≠ 0) |
|
||||
|
||||
## Runtime Inputs
|
||||
|
||||
### Environment Variables
|
||||
|
||||
The UI container is a static file server. It has **no required environment variables at runtime** — all configuration is compiled into the static assets at build time by the Angular build toolchain.
|
||||
|
||||
> Note: The API base URL is baked in at build time via Angular's environment configuration. A future iteration may introduce runtime environment injection via a served `config.json`, but this is out of scope for v1.
|
||||
|
||||
## Runtime Outputs
|
||||
|
||||
### HTTP Interface
|
||||
|
||||
| Route pattern | Behaviour |
|
||||
|--------------------|-------------------------------------------------------------------|
|
||||
| `/` | Returns `index.html` with HTTP 200 |
|
||||
| `/` (any SPA path) | Returns `index.html` with HTTP 200 (SPA fallback via `try_files`)|
|
||||
| `/main.*.js` | Returns fingerprinted JS bundle with long-lived cache headers |
|
||||
| `/styles.*.css` | Returns fingerprinted CSS with long-lived cache headers |
|
||||
| `/assets/*` | Returns static assets |
|
||||
| Any path not found | Returns `index.html` with HTTP 200 (Angular router handles 404) |
|
||||
|
||||
### Cache Headers
|
||||
|
||||
| Asset type | Cache-Control header |
|
||||
|-------------------------------------|-----------------------------------------------|
|
||||
| Fingerprinted bundles (`.js`, `.css`, fonts) | `public, max-age=31536000, immutable` |
|
||||
| `index.html` | `no-store, no-cache, must-revalidate` |
|
||||
|
||||
### Process Exit
|
||||
|
||||
| Signal | Expected exit code | Maximum wait |
|
||||
|----------|--------------------|--------------|
|
||||
| SIGTERM | 0 | 30 seconds |
|
||||
| SIGKILL | non-zero | immediate |
|
||||
|
||||
## Health Check
|
||||
|
||||
| Property | Value |
|
||||
|-----------------|--------------------------------|
|
||||
| Command | `wget -qO- http://localhost:8080/` |
|
||||
| Interval | 30 seconds |
|
||||
| Timeout | 5 seconds |
|
||||
| Start period | 15 seconds |
|
||||
| Retries | 3 |
|
||||
|
||||
The health check passes when nginx responds with any 2xx status on the root path.
|
||||
|
||||
## Image Constraints
|
||||
|
||||
| Constraint | Requirement |
|
||||
|-------------------------|-----------------------------------------------|
|
||||
| Node.js runtime present | MUST NOT be present in runtime image |
|
||||
| `node_modules/` present | MUST NOT be present in runtime image |
|
||||
| Source TypeScript files | MUST NOT be present in runtime image |
|
||||
| Secrets in layer history| MUST NOT appear in any `docker history` layer |
|
||||
| Run as root | MUST NOT — process UID MUST be non-zero |
|
||||
|
||||
## Build Interface
|
||||
|
||||
| Property | Value |
|
||||
|-----------------|----------------------------------------------|
|
||||
| Dockerfile path | `ui/Dockerfile.prod` |
|
||||
| Build context | `ui/` directory |
|
||||
| Build command | `docker build -f ui/Dockerfile.prod ui/ -t reactbin-ui-prod:latest` |
|
||||
|
||||
### Build Context Exclusions (`.dockerignore`)
|
||||
|
||||
The following MUST be excluded from the build context to keep transfers fast and avoid leaking dev state:
|
||||
|
||||
- `node_modules/` — always rebuilt via `npm ci` in the build stage
|
||||
- `dist/` — always rebuilt; must not pollute the build stage
|
||||
- `.git/` — not needed for build
|
||||
- `*.spec.ts` — test files not compiled into production output
|
||||
- `.env*` — dev environment files
|
||||
- `src/**/*.spec.ts` — test specs
|
||||
|
||||
## Verification
|
||||
|
||||
The contract is verified end-to-end by `ui/tests/build/verify_production_image.sh`. Running `make verify-ui-prod` MUST pass all contract checks.
|
||||
152
specs/011-ui-prod-dockerfile/plan.md
Normal file
152
specs/011-ui-prod-dockerfile/plan.md
Normal file
@@ -0,0 +1,152 @@
|
||||
# Implementation Plan: Production-Grade UI Container Image
|
||||
|
||||
**Branch**: `011-ui-prod-dockerfile` | **Date**: 2026-05-07 | **Spec**: [spec.md](spec.md)
|
||||
**Input**: Feature specification from `specs/011-ui-prod-dockerfile/spec.md`
|
||||
|
||||
## Summary
|
||||
|
||||
Build a production-grade multi-stage Docker image for the Angular UI. A `node:22-slim` build stage compiles the Angular app into static assets; an `nginxinc/nginx-unprivileged:alpine` runtime stage serves those assets on port 8080 as a non-root user with SPA fallback routing, long-lived cache headers for fingerprinted bundles, and clean SIGTERM handling. The image is verified by a TDD shell script that covers all three user stories (reliable service, security, build caching) in one `make verify-ui-prod` run.
|
||||
|
||||
## Technical Context
|
||||
|
||||
**Language/Version**: Node.js 22 (build stage); no runtime language in the final image
|
||||
**Primary Dependencies**: Angular CLI 19 (`npm run build`); nginx-unprivileged (runtime web server)
|
||||
**Storage**: None — container serves pre-compiled static files
|
||||
**Testing**: `ui/tests/build/verify_production_image.sh` (shell script TDD artefact, same pattern as `api/tests/build/verify_production_image.sh`)
|
||||
**Target Platform**: Linux container (amd64); Docker 23+ with BuildKit enabled (default); `--mount=type=cache` used for npm cache layer
|
||||
**Project Type**: Static file server (SPA)
|
||||
**Performance Goals**: Cold build < 3 minutes; warm (source-only) rebuild < 30 seconds; health check response < 500ms
|
||||
**Constraints**: Non-root process (UID ≠ 0); Node.js absent from runtime image; no secrets in image layers
|
||||
**Scale/Scope**: Single container; no horizontal scaling concerns at this stage
|
||||
|
||||
## Constitution Check
|
||||
|
||||
### Pre-research gates
|
||||
|
||||
| Principle | Requirement | Status |
|
||||
|-----------|-------------|--------|
|
||||
| §5.1 TDD | Failing test (verify script) must exist before `Dockerfile.prod` | ✅ Plan includes TDD-first task ordering |
|
||||
| §5.3 Tests next to code | `ui/tests/build/` mirrors `api/tests/build/` | ✅ Correct location |
|
||||
| §5.4 CI before done | All tasks marked done only after verify passes | ✅ Enforced in task ordering |
|
||||
| §7.1 One-command start | `docker compose up` must still work | ✅ Only adds prod Dockerfile; dev Dockerfile unchanged |
|
||||
| §7.2 Env config | No hardcoded credentials in Dockerfile | ✅ No runtime env vars needed; build-time config via Angular environment files |
|
||||
| §7.3 Linting | shellcheck on verify script | ✅ T011 in task plan |
|
||||
| §8 Scope | Server-side rendering, OIDC, multi-user — not addressed | ✅ Spec scoped to static asset serving only |
|
||||
|
||||
**No violations. All gates pass.**
|
||||
|
||||
### Post-design re-check
|
||||
|
||||
Same gates apply. No design decisions introduced in Phase 1 conflict with the constitution.
|
||||
|
||||
## Project Structure
|
||||
|
||||
### Documentation (this feature)
|
||||
|
||||
```text
|
||||
specs/011-ui-prod-dockerfile/
|
||||
├── plan.md ← this file
|
||||
├── research.md ← technology decisions (10 decisions)
|
||||
├── contracts/
|
||||
│ └── container.md ← container interface contract
|
||||
├── quickstart.md ← build and verify scenarios
|
||||
└── tasks.md ← generated by /speckit-tasks
|
||||
```
|
||||
|
||||
### Source Code Changes
|
||||
|
||||
```text
|
||||
ui/
|
||||
├── Dockerfile.prod ← NEW (multi-stage production build)
|
||||
├── nginx.conf ← NEW (SPA routing + cache headers)
|
||||
├── .dockerignore ← NEW (does not exist yet; created for production build)
|
||||
└── tests/
|
||||
└── build/
|
||||
├── .gitkeep ← NEW (track directory in git)
|
||||
└── verify_production_image.sh ← NEW (TDD verification script)
|
||||
|
||||
Makefile ← MODIFIED (add build-ui-prod, verify-ui-prod targets)
|
||||
```
|
||||
|
||||
## Dockerfile Design
|
||||
|
||||
### Stage 1 — Builder (`node:22-slim`)
|
||||
|
||||
```
|
||||
COPY package.json package-lock.json ./ # layer: deps (cached until lockfile changes)
|
||||
RUN --mount=type=cache,target=/root/.npm npm ci # reproducible install; npm cache mounted
|
||||
COPY . . # layer: source (invalidated on every change)
|
||||
RUN npm run build # ng build --configuration production
|
||||
```
|
||||
|
||||
Output of `npm run build`: `dist/reactbin-ui/browser/` (confirmed: Angular 19 application builder creates `browser/` subdirectory under `outputPath`).
|
||||
|
||||
### Stage 2 — Runtime (`nginxinc/nginx-unprivileged:alpine`)
|
||||
|
||||
- Runs as non-root by design (no manual `useradd` needed)
|
||||
- Listens on port 8080
|
||||
- `COPY --from=builder /app/dist/reactbin-ui/browser /usr/share/nginx/html`
|
||||
- `COPY nginx.conf /etc/nginx/conf.d/default.conf`
|
||||
- HEALTHCHECK via `wget` (curl not present in Alpine nginx-unprivileged)
|
||||
- No CMD override needed — the base image entrypoint starts nginx
|
||||
|
||||
### nginx.conf
|
||||
|
||||
```nginx
|
||||
server {
|
||||
listen 8080;
|
||||
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
# SPA fallback — unmatched paths return app shell
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
# Long-lived cache for fingerprinted assets
|
||||
location ~* \.(js|css|woff2?|ttf|eot|svg|png|jpg|jpeg|gif|ico)$ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
# Never cache the entry point
|
||||
location = /index.html {
|
||||
add_header Cache-Control "no-store, no-cache, must-revalidate";
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Verification Script Design (`ui/tests/build/verify_production_image.sh`)
|
||||
|
||||
Mirrors `api/tests/build/verify_production_image.sh` structure:
|
||||
|
||||
| Check | Story | Description |
|
||||
|-------|-------|-------------|
|
||||
| Build | US1 | `docker build -f ui/Dockerfile.prod ui/` succeeds |
|
||||
| Health endpoint | US1 | `wget -q http://localhost:18080/` returns 200 within 30s |
|
||||
| SPA routing | US1 | `curl http://localhost:18080/library` returns 200 |
|
||||
| Graceful shutdown | US1 | `docker stop` → exit code 0 |
|
||||
| Non-root user | US2 | `docker exec id -u` ≠ 0 |
|
||||
| Node.js absent | US2 | `docker run node --version` exits non-zero |
|
||||
| No secrets in history | US2 | `docker history --no-trunc` contains no secret-like strings |
|
||||
| Dep layer cache hit | US3 | `touch ui/src/app/app.component.ts` + rebuild → output contains `CACHED` |
|
||||
|
||||
## Makefile Additions
|
||||
|
||||
```makefile
|
||||
build-ui-prod:
|
||||
docker build -f ui/Dockerfile.prod ui/ -t reactbin-ui-prod:latest
|
||||
|
||||
verify-ui-prod:
|
||||
bash ui/tests/build/verify_production_image.sh
|
||||
```
|
||||
|
||||
## Dependencies & Risks
|
||||
|
||||
| Item | Risk | Mitigation |
|
||||
|------|------|------------|
|
||||
| `dist/reactbin-ui/browser/` path | If Angular changes the output directory structure in a future version, the COPY path breaks | Path is verified in research; a test build during verify catches drift |
|
||||
| `nginxinc/nginx-unprivileged` UID | UID may vary between image versions | Check is `UID ≠ 0`, not a specific UID value |
|
||||
| `wget` availability | Alpine images may change toolset | HEALTHCHECK is tested as part of US1 verify |
|
||||
| Port 18080 collision | Another process may use 18080 during verify | Acceptable risk for a dev-time test; port is not a system service |
|
||||
100
specs/011-ui-prod-dockerfile/quickstart.md
Normal file
100
specs/011-ui-prod-dockerfile/quickstart.md
Normal file
@@ -0,0 +1,100 @@
|
||||
# Quickstart: UI Production Image
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Docker with BuildKit enabled (default in Docker 23+)
|
||||
- `make` available in the shell
|
||||
|
||||
## Build the Image
|
||||
|
||||
```bash
|
||||
make build-ui-prod
|
||||
# Equivalent: docker build -f ui/Dockerfile.prod ui/ -t reactbin-ui-prod:latest
|
||||
```
|
||||
|
||||
Expected: Build completes in ~2 minutes on first run (npm install), ~15 seconds on subsequent source-only changes.
|
||||
|
||||
## Run the Container
|
||||
|
||||
```bash
|
||||
docker run --rm -p 4200:8080 reactbin-ui-prod:latest
|
||||
```
|
||||
|
||||
Open http://localhost:4200 — the app shell loads. Navigate to `/library` or `/tags` — the page loads (SPA routing returns `index.html`).
|
||||
|
||||
## Verify All Production Checks
|
||||
|
||||
```bash
|
||||
make verify-ui-prod
|
||||
```
|
||||
|
||||
This runs `ui/tests/build/verify_production_image.sh`, which exercises all three user stories:
|
||||
|
||||
```
|
||||
[verify] Building reactbin-ui-prod:verify-<PID>...
|
||||
[verify] Build OK
|
||||
[verify] Polling health endpoint...
|
||||
[verify] Health check passed
|
||||
[verify] SPA routing OK (/library → 200)
|
||||
[verify] Non-root user OK (UID <n>)
|
||||
[verify] Stdout logging OK
|
||||
[verify] Graceful shutdown OK (exit 0)
|
||||
[verify] Node.js absent in runtime image OK
|
||||
[verify] No secrets in image layers OK
|
||||
[verify] Dep layer cache hit confirmed (US3 OK)
|
||||
[verify] All checks passed (US1 + US2 + US3).
|
||||
```
|
||||
|
||||
## Integration Test Scenarios
|
||||
|
||||
### Scenario 1: Initial Build (Cold Cache)
|
||||
|
||||
```bash
|
||||
docker rmi reactbin-ui-prod:latest 2>/dev/null || true
|
||||
make build-ui-prod
|
||||
```
|
||||
|
||||
Expected: `npm ci` runs fully (~30–90s depending on network). All packages installed from lockfile.
|
||||
|
||||
### Scenario 2: Source-Only Rebuild (Warm Cache)
|
||||
|
||||
```bash
|
||||
touch ui/src/app/app.component.ts
|
||||
make build-ui-prod
|
||||
```
|
||||
|
||||
Expected: `npm ci` step is CACHED (skipped). Only the Angular compilation runs (~10–20s).
|
||||
|
||||
### Scenario 3: Dependency Change (Cache Invalidation)
|
||||
|
||||
```bash
|
||||
# Simulate a lockfile change
|
||||
touch ui/package-lock.json
|
||||
make build-ui-prod
|
||||
```
|
||||
|
||||
Expected: `npm ci` runs fresh (cache miss is intentional and correct).
|
||||
|
||||
### Scenario 4: SPA Deep-Link Routing
|
||||
|
||||
```bash
|
||||
docker run --rm -d -p 4200:8080 --name ui-test reactbin-ui-prod:latest
|
||||
curl -sf http://localhost:4200/library # 200 + index.html
|
||||
curl -sf http://localhost:4200/tags # 200 + index.html
|
||||
curl -sf http://localhost:4200/nonexistent # 200 + index.html (Angular handles 404)
|
||||
docker stop ui-test
|
||||
```
|
||||
|
||||
### Scenario 5: Non-Root Assertion
|
||||
|
||||
```bash
|
||||
docker run --rm reactbin-ui-prod:latest id
|
||||
# Must NOT output uid=0(root)
|
||||
```
|
||||
|
||||
### Scenario 6: No Node.js in Runtime Image
|
||||
|
||||
```bash
|
||||
docker run --rm reactbin-ui-prod:latest node --version 2>&1
|
||||
# Must exit non-zero (node not found)
|
||||
```
|
||||
69
specs/011-ui-prod-dockerfile/research.md
Normal file
69
specs/011-ui-prod-dockerfile/research.md
Normal file
@@ -0,0 +1,69 @@
|
||||
# Research: Production-Grade UI Container Image
|
||||
|
||||
## Decision 1: Build-stage base image
|
||||
|
||||
**Decision**: `node:22-slim`
|
||||
**Rationale**: Matches the version in the existing dev `ui/Dockerfile`. Slim variant reduces the builder layer size and attack surface relative to the full Debian image.
|
||||
**Alternatives considered**: `node:22-alpine` — lighter, but can introduce musl/glibc compatibility issues with some native npm packages; `node:22-bookworm-slim` — functionally equivalent to `node:22-slim`, same image.
|
||||
|
||||
## Decision 2: Runtime base image
|
||||
|
||||
**Decision**: `nginxinc/nginx-unprivileged:alpine`
|
||||
**Rationale**: Runs fully as a non-root user on port 8080 out of the box — no manual user creation or privilege workarounds required. Alpine-based keeps the final image small. The official `nginx:alpine` image requires the master process to run as root to bind port 80; `nginx-unprivileged` avoids this by binding to 8080 instead.
|
||||
**Alternatives considered**:
|
||||
- `nginx:alpine` — master process must be root (violates FR-005); workers run as `nginx` user but `id -u` inside container still shows 0 for PID 1.
|
||||
- `caddy:alpine` — also supports non-root but adds Caddy's Go runtime footprint unnecessarily for pure static serving.
|
||||
|
||||
## Decision 3: Container port
|
||||
|
||||
**Decision**: Expose port `8080` in the container; external orchestrators (docker-compose, Kubernetes ingress) map it to port 80 or 4200 as needed.
|
||||
**Rationale**: `nginxinc/nginx-unprivileged` defaults to port 8080; deviating would require overriding nginx config with no benefit. Port remapping is standard practice — containers should not run as root just to bind to a privileged port.
|
||||
**Alternatives considered**: Running nginx on port 80 requires either root or Linux capabilities (`CAP_NET_BIND_SERVICE`), both of which increase the attack surface.
|
||||
|
||||
## Decision 4: Angular build output directory
|
||||
|
||||
**Decision**: COPY `dist/reactbin-ui/browser/` into the nginx document root.
|
||||
**Rationale**: The Angular 19 `@angular-devkit/build-angular:application` builder (esbuild-based) places browser assets in `dist/{projectName}/browser/` — confirmed by inspecting the existing `dist/reactbin-ui/browser/` directory in the repo. The parent `dist/reactbin-ui/` also contains `prerendered-routes.json` and `3rdpartylicenses.txt` which must not be served as the web root.
|
||||
**Alternatives considered**: Serving from `dist/reactbin-ui/` directly — would expose the `3rdpartylicenses.txt` file at the root and include the prerendering metadata file.
|
||||
|
||||
## Decision 5: Dependency install command
|
||||
|
||||
**Decision**: `npm ci` (not `npm install`)
|
||||
**Rationale**: `npm ci` installs exactly what `package-lock.json` specifies — reproducible, faster on CI, and fails loudly on lockfile mismatches. All dependencies (including `devDependencies`) are needed in the build stage because Angular CLI and build tools are `devDependencies`.
|
||||
**Alternatives considered**: `npm install` — non-deterministic across environments; `npm install --omit=dev` — would break the Angular build since `@angular/cli` is a devDependency.
|
||||
|
||||
## Decision 6: Layer cache strategy
|
||||
|
||||
**Decision**: Two COPY layers — lockfiles first, then source.
|
||||
```
|
||||
COPY package.json package-lock.json ./ # invalidated only on dep changes
|
||||
RUN npm ci # expensive step, cached when lockfiles unchanged
|
||||
COPY . . # invalidated on every source change
|
||||
RUN npm run build
|
||||
```
|
||||
**Rationale**: Mirrors the proven pattern used in the API's `Dockerfile.prod`. Dependency installation (30s–2min) is cached independently from source compilation.
|
||||
**Alternatives considered**: Single COPY of all source — trivial source changes would always re-run `npm ci`.
|
||||
|
||||
## Decision 7: SPA routing
|
||||
|
||||
**Decision**: nginx `try_files $uri $uri/ /index.html` fallback in a custom `nginx.conf`.
|
||||
**Rationale**: Angular is a single-page application. All non-asset routes (e.g., `/library`, `/tags`, `/login`) must return `index.html` so Angular's router can handle them client-side. Without this, direct navigation to any deep link returns 404.
|
||||
**Alternatives considered**: Redirect to `/` — would break deep linking; returning 404 — breaks client-side routing entirely.
|
||||
|
||||
## Decision 8: Cache-control headers
|
||||
|
||||
**Decision**: Long-lived `Cache-Control: public, max-age=31536000, immutable` for fingerprinted JS/CSS/font assets; `Cache-Control: no-store` for `index.html`.
|
||||
**Rationale**: Angular's production build fingerprints all bundles (e.g., `main.a1b2c3d4.js`). These are safe to cache indefinitely. `index.html` is never fingerprinted and must always be fresh so users pick up new deployments.
|
||||
**Alternatives considered**: No cache-control headers — acceptable for MVP but fails FR-008.
|
||||
|
||||
## Decision 9: Health check probe
|
||||
|
||||
**Decision**: Use `wget -qO- http://localhost:8080/` as the HEALTHCHECK command (no `curl` in `nginx-unprivileged:alpine`).
|
||||
**Rationale**: The `nginxinc/nginx-unprivileged:alpine` image is minimal and does not include `curl`. `wget` is available in Alpine. The health check tests that nginx is accepting connections and returning the app shell.
|
||||
**Alternatives considered**: Installing `curl` via `apk add` — adds package manager overhead and unnecessary tooling to the runtime image.
|
||||
|
||||
## Decision 10: TDD verification approach
|
||||
|
||||
**Decision**: Shell script `ui/tests/build/verify_production_image.sh` mirrors the approach used for the API in feature 010.
|
||||
**Rationale**: There is no pytest equivalent for Docker build artifacts. A shell script that fails because `Dockerfile.prod` does not exist satisfies §5.1 TDD (the script is the failing test; writing the Dockerfile turns it green).
|
||||
**Alternatives considered**: No TDD — violates §5.1; a Python test with subprocess — overkill when a shell script is simpler and already proven.
|
||||
110
specs/011-ui-prod-dockerfile/spec.md
Normal file
110
specs/011-ui-prod-dockerfile/spec.md
Normal file
@@ -0,0 +1,110 @@
|
||||
# Feature Specification: Production-Grade UI Container Image
|
||||
|
||||
**Feature Branch**: `011-ui-prod-dockerfile`
|
||||
**Created**: 2026-05-07
|
||||
**Status**: Draft
|
||||
**Input**: User description: "Production-grade UI container image build"
|
||||
|
||||
## User Scenarios & Testing *(mandatory)*
|
||||
|
||||
### User Story 1 - UI Serves Reliably in Production (Priority: P1)
|
||||
|
||||
A production deployment starts the UI container and it serves the compiled application correctly — returning the app shell for all routes, responding quickly, and shutting down cleanly when the orchestrator stops it.
|
||||
|
||||
**Why this priority**: A container that can't serve traffic is not deployable. All other properties (security, build speed) are meaningless without a running service.
|
||||
|
||||
**Independent Test**: Build the image, start the container, and verify the root path returns a 200 response. Stopping the container produces a clean exit. This alone constitutes a deployable MVP.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** a built production image, **When** the container starts, **Then** it serves the application on port 8080 within 30 seconds.
|
||||
2. **Given** the container is running, **When** a request is made to any client-side route (e.g., `/library`, `/tags`), **Then** the server returns the app shell (200 OK) so client-side routing can take over.
|
||||
3. **Given** the container is running, **When** a static asset is requested, **Then** it is returned with appropriate caching headers.
|
||||
4. **Given** a running container, **When** the orchestrator sends a stop signal, **Then** the container exits with code 0 within a reasonable timeout.
|
||||
5. **Given** the production image, **When** a health probe is issued to a designated endpoint, **Then** the container reports healthy.
|
||||
|
||||
---
|
||||
|
||||
### User Story 2 - Minimal, Secure Container (Priority: P2)
|
||||
|
||||
The production image contains only what is needed to serve static files — no build tools, no source code, no `node_modules`. It runs as a non-privileged user.
|
||||
|
||||
**Why this priority**: Shipping build tools and source code in production images increases attack surface and image size. Running as root violates least-privilege principles.
|
||||
|
||||
**Independent Test**: Inspect the running container — confirm the process user is non-root; attempt to import or run a Node.js binary inside the image and confirm it is absent.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the production image, **When** the running process user is inspected, **Then** it is not root (UID ≠ 0).
|
||||
2. **Given** the production image, **When** the image contents are inspected, **Then** `node_modules/`, source TypeScript files, and the Node.js runtime are absent.
|
||||
3. **Given** the production image, **When** image layer history is inspected, **Then** no secrets, API keys, or credentials appear in any layer command.
|
||||
4. **Given** the production image, **When** the image size is measured, **Then** it is substantially smaller than a single-stage image that includes the Node.js toolchain.
|
||||
|
||||
---
|
||||
|
||||
### User Story 3 - Fast, Reproducible Builds (Priority: P3)
|
||||
|
||||
Rebuilding the image after a source-only change (no dependency changes) reuses the dependency installation layer from cache, completing in seconds rather than minutes.
|
||||
|
||||
**Why this priority**: Slow builds impede the development feedback loop and CI pipeline throughput. Dependency installs are the dominant time cost.
|
||||
|
||||
**Independent Test**: Build once, then change a source file and build again — the build output confirms the dependency layer was served from cache.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the image has been built once, **When** only a source file is changed and the image is rebuilt, **Then** the dependency installation step is skipped (cache hit).
|
||||
2. **Given** a dependency file is changed, **When** the image is rebuilt, **Then** the dependency installation step runs fresh (cache miss is correct behaviour).
|
||||
3. **Given** two successive builds with identical inputs, **Then** both produce functionally identical output.
|
||||
|
||||
---
|
||||
|
||||
### Edge Cases
|
||||
|
||||
- What happens when the container starts but the built assets are missing or corrupted?
|
||||
- How does the server handle requests for non-existent routes that should fall back to the app shell (SPA routing)?
|
||||
- What happens when the container receives a stop signal while actively serving requests?
|
||||
- What happens if the port is already in use at startup?
|
||||
|
||||
## Requirements *(mandatory)*
|
||||
|
||||
### Functional Requirements
|
||||
|
||||
- **FR-001**: The production image MUST be built via a multi-stage process — a build stage compiles the application into static assets, and a separate runtime stage serves only those assets.
|
||||
- **FR-002**: The runtime stage MUST NOT contain the Node.js runtime, npm, source TypeScript, or `node_modules/`.
|
||||
- **FR-003**: The container MUST serve the application on port 8080. External orchestrators (docker-compose, Kubernetes ingress) map this to port 80 as needed.
|
||||
- **FR-004**: The container MUST handle SPA (single-page application) routing by returning the app shell for any unmatched path, so client-side routing works correctly.
|
||||
- **FR-005**: The container MUST run as a non-root user.
|
||||
- **FR-006**: The container MUST expose a health-check endpoint that returns success when the service is ready to accept traffic.
|
||||
- **FR-007**: The container MUST exit with code 0 when sent a graceful stop signal.
|
||||
- **FR-008**: Static assets MUST be served with cache-control headers that enable client-side caching for fingerprinted assets.
|
||||
- **FR-009**: The Dockerfile MUST structure layers so that dependency installation is cached independently from source code changes.
|
||||
- **FR-010**: The build MUST be reproducible — given the same source and lockfile, successive builds produce equivalent images.
|
||||
- **FR-011**: No credentials, secrets, or API keys MUST appear in any image layer.
|
||||
|
||||
### Key Entities
|
||||
|
||||
- **Build Stage**: The intermediate container that installs dependencies and compiles source into static assets; discarded after build.
|
||||
- **Static Assets**: The compiled output (HTML, JS bundles, CSS, fonts, images) that the runtime stage serves.
|
||||
- **Runtime Stage**: The minimal final image containing only a web server and the compiled static assets.
|
||||
- **Production Image**: The tagged, distributable image produced by the build; used directly in deployment.
|
||||
|
||||
## Success Criteria *(mandatory)*
|
||||
|
||||
### Measurable Outcomes
|
||||
|
||||
- **SC-001**: The container serves a 200 response on port 8080 within 30 seconds of starting.
|
||||
- **SC-002**: The production image is substantially smaller than a single-stage image that retains the Node.js toolchain. A manual size comparison after the initial build confirms the multi-stage approach delivers a meaningful reduction (expected: >60% reduction).
|
||||
- **SC-003**: A source-only rebuild completes in under 30 seconds (dependency layer served from cache).
|
||||
- **SC-004**: All 11 functional requirements pass automated verification on every build.
|
||||
- **SC-005**: The running container process has UID ≠ 0, confirmed by automated check.
|
||||
- **SC-006**: No existing integration tests regress after the Dockerfile and supporting files are introduced.
|
||||
|
||||
## Assumptions
|
||||
|
||||
- The Angular application is built for production using the standard build toolchain (`ng build --configuration production` or equivalent), producing a `dist/` output directory.
|
||||
- The production web server is responsible for SPA fallback routing (returning the app shell for unmatched paths).
|
||||
- Gzip or Brotli compression at the web server layer is desirable but not mandatory for the initial implementation.
|
||||
- The UI container does not need to proxy API requests — it communicates with the API directly from the browser (the Angular proxy config is only used in local development).
|
||||
- The container listens on port 8080 (non-privileged, enabling non-root operation). External load balancers or ingress controllers map this to port 80. TLS termination occurs upstream.
|
||||
- The build context is the `ui/` directory; files excluded from the build context (source maps in CI, `node_modules/` already present locally) are managed via `.dockerignore`.
|
||||
- The same verification approach used for the API image (a shell script as the TDD artefact) applies here.
|
||||
166
specs/011-ui-prod-dockerfile/tasks.md
Normal file
166
specs/011-ui-prod-dockerfile/tasks.md
Normal file
@@ -0,0 +1,166 @@
|
||||
# Tasks: Production-Grade UI Container Image
|
||||
|
||||
**Input**: Design documents from `specs/011-ui-prod-dockerfile/`
|
||||
**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, contracts/container.md ✅, quickstart.md ✅
|
||||
|
||||
**Tests**: TDD is non-negotiable (§5.1). The "test" for a Docker build artefact is `ui/tests/build/verify_production_image.sh`, written before `ui/Dockerfile.prod` exists. Running the script immediately fails (red) because the build step cannot find the file; writing `Dockerfile.prod` turns it green.
|
||||
|
||||
**Organization**: Phase 1 sets up Makefile targets, `.dockerignore`, and supporting files; Phase 3 (US1) writes the verification script and the Dockerfile; Phase 4 (US2) extends the script with security checks; Phase 5 (US3) extends it with a cache-hit check; Phase 6 polishes.
|
||||
|
||||
## Format: `[ID] [P?] [Story] Description`
|
||||
|
||||
- **[P]**: Can run in parallel with other [P] tasks in the same phase
|
||||
- **[Story]**: Which user story this task belongs to
|
||||
- Exact file paths included in every task description
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Setup
|
||||
|
||||
- [X] T001 Add `build-ui-prod` and `verify-ui-prod` targets (and their `.PHONY` entries) to the root `Makefile` at `/workspace/Makefile`: `build-ui-prod` runs `docker build -f ui/Dockerfile.prod ui/ -t reactbin-ui-prod:latest`; `verify-ui-prod` runs `bash ui/tests/build/verify_production_image.sh`
|
||||
|
||||
- [X] T002 Create `ui/.dockerignore` at `/workspace/ui/.dockerignore` with the following exclusions (the file does not yet exist — create it fresh): `.git/`, `node_modules/`, `dist/`, `.angular/`, `coverage/`, `*.spec.ts`, `.env`, `.env.*`, `!.env.example`, `tests/`; these keep the build context transfer fast and prevent dev state from leaking into the production image
|
||||
|
||||
- [X] T003 Create directory `ui/tests/build/` at `/workspace/ui/tests/build/` with `mkdir -p` and add a `.gitkeep` so the directory is tracked in git
|
||||
|
||||
---
|
||||
|
||||
**Checkpoint**: Directory structure is ready; Makefile and .dockerignore are created.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Foundational
|
||||
|
||||
No blocking foundational prerequisites exist for this feature — the setup tasks in Phase 1 directly enable all user story phases. Phase 2 is intentionally omitted.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: User Story 1 — UI Serves Reliably in Production (Priority: P1) 🎯 MVP
|
||||
|
||||
**Goal**: The container builds, starts, serves the health endpoint and SPA routes, and exits cleanly on SIGTERM.
|
||||
|
||||
**Independent Test**: `make verify-ui-prod` — passes when `Dockerfile.prod` and `nginx.conf` exist and all US1 checks pass.
|
||||
|
||||
### Test for User Story 1 (TDD red — write first, confirm failure before T005)
|
||||
|
||||
- [X] T004 [US1] Create `ui/tests/build/verify_production_image.sh` as an executable bash script (`chmod +x`) with `#!/usr/bin/env bash` and `set -euo pipefail`; the script MUST:
|
||||
1. Set `IMAGE="reactbin-ui-prod:verify-$$"` and `IMAGE2="reactbin-ui-prod:verify-cache-$$"` and `APP_CONTAINER=""`;
|
||||
2. Define a `cleanup()` function that runs `docker rm -f "$APP_CONTAINER" 2>/dev/null || true`, `docker rmi "$IMAGE" 2>/dev/null || true`, and `docker rmi "$IMAGE2" 2>/dev/null || true`, then register it with `trap cleanup EXIT`;
|
||||
3. **[US1 check 1 — build]** Run `docker build -f ui/Dockerfile.prod ui/ -t "$IMAGE"` — this is the line that fails **red** because `ui/Dockerfile.prod` does not yet exist; print `[verify] Building $IMAGE...` before and `[verify] Build OK` after;
|
||||
4. **[US1 check 2 — start container]** Start the production container: `APP_CONTAINER=$(docker run -d -p 18080:8080 "$IMAGE")`; print `[verify] Starting production container...`;
|
||||
5. **[US1 check 3 — health endpoint]** Poll `curl -sf http://localhost:18080/` up to 30 × 1s, fail with `FAIL: health check timed out after 30s` if timeout; print `[verify] Health check passed` on success;
|
||||
6. **[US1 check 4 — SPA routing]** Run `curl -sf http://localhost:18080/library > /dev/null`; assert exit code is 0 (200 response); fail with `FAIL: SPA routing check failed (/library did not return 200)` if violated; print `[verify] SPA routing OK (/library → 200)`;
|
||||
7. **[US1 check 5 — SIGTERM → exit 0]** Run `docker stop "$APP_CONTAINER"` (sends SIGTERM); capture `EXIT_CODE=$(docker wait "$APP_CONTAINER")`; assert `"$EXIT_CODE" -eq 0`, fail with `FAIL: non-zero exit code $EXIT_CODE after SIGTERM` otherwise; print `[verify] Graceful shutdown OK (exit $EXIT_CODE)`;
|
||||
8. Print `[verify] US1 checks passed.`
|
||||
After writing the script, run `make verify-ui-prod` and confirm it **fails** with a Docker build error (red state — `ui/Dockerfile.prod` does not exist).
|
||||
|
||||
### Implementation for User Story 1
|
||||
|
||||
- [X] T005 [US1] Create `ui/nginx.conf` at `/workspace/ui/nginx.conf` — an nginx server block that: listens on port `8080`; sets `root /usr/share/nginx/html` and `index index.html`; adds a `location /` block with `try_files $uri $uri/ /index.html` for SPA fallback routing; adds a `location ~* \.(js|css|woff2?|ttf|eot|svg|png|jpg|jpeg|gif|ico)$` block with `expires 1y` and `add_header Cache-Control "public, immutable"` for fingerprinted assets; adds a `location = /index.html` block with `add_header Cache-Control "no-store, no-cache, must-revalidate"` so the entry point is never cached
|
||||
|
||||
- [X] T006 [US1] Create `ui/Dockerfile.prod` at `/workspace/ui/Dockerfile.prod` — a two-stage multi-stage build:
|
||||
**Stage 1 (builder)**: `FROM node:22-slim AS builder`; `WORKDIR /app`; `COPY package.json package-lock.json ./`; `RUN --mount=type=cache,target=/root/.npm npm ci`; `COPY . .`; `RUN npm run build`
|
||||
**Stage 2 (runtime)**: `FROM nginxinc/nginx-unprivileged:alpine`; `COPY --from=builder /app/dist/reactbin-ui/browser /usr/share/nginx/html`; `COPY nginx.conf /etc/nginx/conf.d/default.conf`; `EXPOSE 8080`; `HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 CMD wget -qO- http://localhost:8080/ || exit 1`
|
||||
|
||||
- [X] T007 [US1] Verify TDD green for US1: run `make verify-ui-prod` and confirm all five US1 checks pass — build OK, health endpoint returns 200, SPA routing returns 200, SIGTERM produces exit code 0, and `[verify] US1 checks passed.` is printed.
|
||||
|
||||
**Checkpoint**: US1 is complete. Production container builds, starts, serves traffic (including SPA routes), and shuts down gracefully.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: User Story 2 — Minimal, Secure Container (Priority: P2)
|
||||
|
||||
**Goal**: The production image runs as non-root and contains no Node.js runtime, source, or embedded secrets.
|
||||
|
||||
**Independent Test**: US2 checks in `make verify-ui-prod` — the same script extended with non-root, node-absent, and secrets-free assertions.
|
||||
|
||||
### Tests for User Story 2 (TDD extension — add checks, confirm they pass against existing Dockerfile.prod)
|
||||
|
||||
- [X] T008 [US2] Extend `ui/tests/build/verify_production_image.sh` with US2 checks inserted after the health/SPA/SIGTERM checks (before the final `US1 checks passed` line) and update the final success message to `[verify] All checks passed (US1 + US2).`:
|
||||
**[US2 check 1 — non-root]** Before `docker stop`, run `UID_IN_CONTAINER=$(docker exec "$APP_CONTAINER" id -u)`; assert `"$UID_IN_CONTAINER" -ne 0`, fail with `FAIL: process running as root (UID 0)` if violated; print `[verify] Non-root user OK (UID $UID_IN_CONTAINER)`;
|
||||
**[C1 — stdout log capture]** Run `LOGS=$(docker logs "$APP_CONTAINER" 2>&1)`; assert `"$LOGS"` is non-empty, fail with `FAIL: no output on stdout/stderr` if empty; print `[verify] Stdout logging OK`; insert this check before `docker stop`;
|
||||
**[US2 check 2 — Node.js absent]** After SIGTERM cleanup, run `docker run --rm "$IMAGE" node --version 2>/dev/null`; assert the exit code is **non-zero** (node not present in runtime image); if it returns 0, fail with `FAIL: node runtime found in production image`; print `[verify] Node.js absent in runtime image OK`;
|
||||
**[C2 — no hardcoded secrets in layers]** Run `docker history --no-trunc "$IMAGE" 2>&1`; pipe through `grep -qiE "(password|secret_key|api_key|token)"`; assert zero matching lines; if any match, fail with `FAIL: potential secret found in image history`; print `[verify] No secrets in image layers OK`;
|
||||
**[FR-008 — cache-control headers on assets]** While APP_CONTAINER is running, find the first JS bundle filename: `JS_FILE=$(docker run --rm "$IMAGE" ls /usr/share/nginx/html | grep -E '\.js$' | head -1)`; run `curl -sI "http://localhost:18080/${JS_FILE}"`; assert the response contains `Cache-Control` with `immutable` or `max-age=31536000`, fail with `FAIL: cache-control header not set on fingerprinted asset` if absent; print `[verify] Cache-Control header OK`;
|
||||
Confirm `make verify-ui-prod` passes with the extended checks.
|
||||
|
||||
**Checkpoint**: US2 is verified. Image runs as a non-root user and contains no Node.js toolchain.
|
||||
|
||||
---
|
||||
|
||||
## Phase 5: User Story 3 — Fast, Reproducible Builds (Priority: P3)
|
||||
|
||||
**Goal**: Rebuilding after a source-only change reuses the `npm ci` dependency layer from cache.
|
||||
|
||||
**Independent Test**: US3 check in `make verify-ui-prod` — a second build after touching a source file asserts the dep layer was cached.
|
||||
|
||||
### Tests for User Story 3 (TDD extension)
|
||||
|
||||
- [X] T009 [US3] Extend `ui/tests/build/verify_production_image.sh` with a US3 cache check appended after all other checks (before the final success line):
|
||||
**[US3 check — dep layer cached on source-only rebuild]** Print `[verify] Testing cache hit on source-only rebuild...`; `touch ui/src/app/app.component.ts`; capture `BUILD2_OUTPUT=$(docker build --progress=plain -f ui/Dockerfile.prod ui/ -t "$IMAGE2" 2>&1)` (the `--progress=plain` flag ensures consistent `CACHED` output regardless of Docker version or TTY); assert the output contains the string `CACHED`; if absent, fail with `FAIL: dependency layer not reused on source-only rebuild`; print `[verify] Dep layer cache hit confirmed (US3 OK)`;
|
||||
Update the final success line to `[verify] All checks passed (US1 + US2 + US3).`
|
||||
|
||||
- [X] T010 [US3] Verify TDD green for US3: run `make verify-ui-prod` and confirm the full script passes including the cache check — the build output for the second image must contain `CACHED`, and `[verify] All checks passed (US1 + US2 + US3).` must print.
|
||||
|
||||
**Checkpoint**: All three user stories are verified end-to-end by `make verify-ui-prod`.
|
||||
|
||||
---
|
||||
|
||||
## Phase 6: Polish & Cross-Cutting Concerns
|
||||
|
||||
- [X] T011 Run `make test-integration` from `/workspace` and confirm all 102 existing tests still pass — verifies that the new files (Makefile targets, ui/.dockerignore, ui/tests/build/) do not break the existing test Dockerfile build or any integration test (§5.4 regression gate)
|
||||
|
||||
- [X] T012 Confirm image size reduction (SC-002): run `docker images reactbin-ui-prod:latest --format "{{.Size}}"` and compare against a reference single-stage image built from `FROM node:22-slim` + `npm ci` + `npm run build` to confirm the production image is substantially smaller (expected >60% reduction); document the sizes in a comment or log line
|
||||
|
||||
- [X] T013 Run `shellcheck ui/tests/build/verify_production_image.sh` and fix any violations (common: unquoted variables, `[ ]` vs `[[ ]]`, missing `--` before arguments); also verify `make verify-ui-prod` still passes after any fixes
|
||||
|
||||
---
|
||||
|
||||
## Dependencies & Execution Order
|
||||
|
||||
### Phase Dependencies
|
||||
|
||||
- **Phase 1 (Setup)**: No external dependencies — start immediately
|
||||
- **Phase 3 (US1)**: Depends on Phase 1 (Makefile + .dockerignore must exist before `make verify-ui-prod` can run) and directory must exist (T003)
|
||||
- **Phase 4 (US2)**: Depends on Phase 3 (US1 script and Dockerfile must exist to extend)
|
||||
- **Phase 5 (US3)**: Depends on Phase 4 (full US2 script must exist to extend)
|
||||
- **Phase 6 (Polish)**: Depends on all prior phases; T011 before T012
|
||||
|
||||
### Within Phase 3
|
||||
|
||||
- T004 before T005/T006 (write test script before writing the nginx config and Dockerfile)
|
||||
- T005 and T006 can run in parallel (different files, no mutual dependency)
|
||||
- T007 after T005 and T006 (verify green after both implementation files exist)
|
||||
|
||||
### Execution Order Summary
|
||||
|
||||
```
|
||||
Step 1: T001 ∥ T002 ∥ T003 (setup — parallel, different files)
|
||||
Step 2: T004 (write verification script — TDD red)
|
||||
Step 3: T005 ∥ T006 (write nginx.conf and Dockerfile.prod — parallel)
|
||||
Step 4: T007 (verify US1 green)
|
||||
Step 5: T008 (extend script with US2 checks, verify pass)
|
||||
Step 6: T009 (extend script with US3 check)
|
||||
Step 7: T010 (verify US3 green)
|
||||
Step 8: T011 (make test-integration — regression gate)
|
||||
Step 9: T012 (image size comparison — SC-002)
|
||||
Step 10: T013 (shellcheck polish)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Strategy
|
||||
|
||||
### MVP (US1 — reliable production run)
|
||||
|
||||
1. Complete T001–T003 (setup)
|
||||
2. Complete T004–T007 (core: write script → write nginx.conf + Dockerfile → verify green)
|
||||
3. **Validate**: `make verify-ui-prod` passes; `make test-integration` still passes
|
||||
4. US2 and US3 add explicit verification coverage for properties already implemented by the two-stage build
|
||||
|
||||
### Incremental Delivery
|
||||
|
||||
- After Phase 3: Production image builds, starts, serves traffic with SPA routing — safe to deploy
|
||||
- After Phase 4: Security properties (non-root, no Node.js runtime) are explicitly verified
|
||||
- After Phase 5: Build efficiency (npm ci layer caching) is confirmed by automated check
|
||||
- After Phase 6: Script is lint-clean, ready for CI integration
|
||||
34
specs/012-api-docs-gate/checklists/requirements.md
Normal file
34
specs/012-api-docs-gate/checklists/requirements.md
Normal file
@@ -0,0 +1,34 @@
|
||||
# Specification Quality Checklist: API Documentation Visibility Gate
|
||||
|
||||
**Purpose**: Validate specification completeness and quality before proceeding to planning
|
||||
**Created**: 2026-05-07
|
||||
**Feature**: [spec.md](../spec.md)
|
||||
|
||||
## Content Quality
|
||||
|
||||
- [X] No implementation details (languages, frameworks, APIs)
|
||||
- [X] Focused on user value and business needs
|
||||
- [X] Written for non-technical stakeholders
|
||||
- [X] All mandatory sections completed
|
||||
|
||||
## Requirement Completeness
|
||||
|
||||
- [X] No [NEEDS CLARIFICATION] markers remain
|
||||
- [X] Requirements are testable and unambiguous
|
||||
- [X] Success criteria are measurable
|
||||
- [X] Success criteria are technology-agnostic (no implementation details)
|
||||
- [X] All acceptance scenarios are defined
|
||||
- [X] Edge cases are identified
|
||||
- [X] Scope is clearly bounded
|
||||
- [X] Dependencies and assumptions identified
|
||||
|
||||
## Feature Readiness
|
||||
|
||||
- [X] All functional requirements have clear acceptance criteria
|
||||
- [X] User scenarios cover primary flows
|
||||
- [X] Feature meets measurable outcomes defined in Success Criteria
|
||||
- [X] No implementation details leak into specification
|
||||
|
||||
## Notes
|
||||
|
||||
- All items pass. Spec is ready for `/speckit-plan`.
|
||||
40
specs/012-api-docs-gate/contracts/docs-endpoints.md
Normal file
40
specs/012-api-docs-gate/contracts/docs-endpoints.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# Contract: API Documentation Endpoints
|
||||
|
||||
These three endpoints exist in FastAPI by default. This feature makes their availability conditional on a runtime configuration flag.
|
||||
|
||||
## Affected Endpoints
|
||||
|
||||
| Endpoint | Default path | Purpose |
|
||||
|----------|-------------|---------|
|
||||
| Swagger UI | `GET /docs` | Interactive browser-based API documentation |
|
||||
| ReDoc UI | `GET /redoc` | Alternative read-only API documentation |
|
||||
| OpenAPI schema | `GET /openapi.json` | Raw JSON schema of the entire API surface |
|
||||
|
||||
## Behaviour by Flag State
|
||||
|
||||
### `API_DOCS_ENABLED=true` (default)
|
||||
|
||||
All three endpoints respond exactly as they did before this feature. No change.
|
||||
|
||||
| Endpoint | Response |
|
||||
|----------|----------|
|
||||
| `GET /docs` | `200 OK` — Swagger UI HTML |
|
||||
| `GET /redoc` | `200 OK` — ReDoc UI HTML |
|
||||
| `GET /openapi.json` | `200 OK` — OpenAPI schema JSON |
|
||||
|
||||
### `API_DOCS_ENABLED=false`
|
||||
|
||||
All three endpoints are unregistered. Requests fall through to the framework's default 404 handler.
|
||||
|
||||
| Endpoint | Response |
|
||||
|----------|----------|
|
||||
| `GET /docs` | `404 Not Found` |
|
||||
| `GET /redoc` | `404 Not Found` |
|
||||
| `GET /openapi.json` | `404 Not Found` |
|
||||
|
||||
## Invariants
|
||||
|
||||
- All other endpoints are unaffected in both flag states.
|
||||
- The `GET /api/v1/health` endpoint always returns `200 OK` regardless of the flag.
|
||||
- Internal OpenAPI schema generation (used for request/response validation) is not disabled — only the HTTP routes serving it are removed.
|
||||
- The flag is read once at application startup. A running process does not respond to live changes; a restart is required.
|
||||
138
specs/012-api-docs-gate/plan.md
Normal file
138
specs/012-api-docs-gate/plan.md
Normal file
@@ -0,0 +1,138 @@
|
||||
# Implementation Plan: API Documentation Visibility Gate
|
||||
|
||||
**Branch**: `012-api-docs-gate` | **Date**: 2026-05-07 | **Spec**: [spec.md](spec.md)
|
||||
**Input**: Feature specification from `specs/012-api-docs-gate/spec.md`
|
||||
|
||||
## Summary
|
||||
|
||||
Add `API_DOCS_ENABLED` (boolean, default `true`) to `app/config.py`. When `false`, pass `docs_url=None`, `redoc_url=None`, `openapi_url=None` to the `FastAPI()` constructor in `app/main.py`, making all three documentation routes return 404. A field validator provides graceful fallback for invalid flag values. Two new integration tests verify both flag states; the existing unit test suite is extended with two settings tests.
|
||||
|
||||
## Technical Context
|
||||
|
||||
**Language/Version**: Python 3.12
|
||||
**Primary Dependencies**: FastAPI (constructor params), pydantic-settings (field validator)
|
||||
**Storage**: None
|
||||
**Testing**: pytest unit (`api/tests/unit/test_config.py`), pytest + ASGI test client (`api/tests/integration/test_docs_gate.py`)
|
||||
**Target Platform**: API container (same as existing)
|
||||
**Project Type**: Web service configuration change
|
||||
**Performance Goals**: No measurable impact — one boolean read at startup
|
||||
**Constraints**: Default must be `true` (backwards compatible); invalid env var value must not crash startup; no other routes affected
|
||||
**Scale/Scope**: Three files changed (`config.py`, `main.py`, `.env.example`); one new test file; one existing test file extended
|
||||
|
||||
## Constitution Check
|
||||
|
||||
| Principle | Requirement | Status |
|
||||
|-----------|-------------|--------|
|
||||
| §5.1 TDD | Failing tests written before implementation | ✅ Tasks order tests first |
|
||||
| §5.2 Integration tests | New integration tests follow existing pattern | ✅ |
|
||||
| §5.3 Tests next to code | `api/tests/unit/` and `api/tests/integration/` | ✅ |
|
||||
| §5.4 CI before done | All tests pass before task marked done | ✅ |
|
||||
| §7.2 Env config | Flag via environment variable, not hardcoded | ✅ |
|
||||
| §7.3 Linting | `ruff` passes on all changed files | ✅ Enforced in polish task |
|
||||
| §2.6 No speculative abstraction | One boolean field, no plugin system | ✅ |
|
||||
|
||||
**No violations. All gates pass.**
|
||||
|
||||
## Project Structure
|
||||
|
||||
### Documentation (this feature)
|
||||
|
||||
```text
|
||||
specs/012-api-docs-gate/
|
||||
├── plan.md ← this file
|
||||
├── research.md ← 6 decisions
|
||||
├── contracts/
|
||||
│ └── docs-endpoints.md ← behaviour contract for 3 affected endpoints
|
||||
├── quickstart.md ← 4 test scenarios
|
||||
└── tasks.md ← generated by /speckit-tasks
|
||||
```
|
||||
|
||||
### Source Code Changes
|
||||
|
||||
```text
|
||||
api/
|
||||
├── app/
|
||||
│ ├── config.py ← MODIFIED: add api_docs_enabled field + validator
|
||||
│ └── main.py ← MODIFIED: conditional docs_url/redoc_url/openapi_url
|
||||
├── tests/
|
||||
│ ├── unit/
|
||||
│ │ └── test_config.py ← MODIFIED: 2 new tests for api_docs_enabled
|
||||
│ └── integration/
|
||||
│ └── test_docs_gate.py ← NEW: 2 integration tests (disabled + enabled)
|
||||
|
||||
.env.example ← MODIFIED: document API_DOCS_ENABLED
|
||||
```
|
||||
|
||||
## Implementation Design
|
||||
|
||||
### `app/config.py` — new field with graceful fallback validator
|
||||
|
||||
```python
|
||||
from pydantic import field_validator
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# ... existing fields ...
|
||||
api_docs_enabled: bool = True
|
||||
|
||||
@field_validator('api_docs_enabled', mode='before')
|
||||
@classmethod
|
||||
def coerce_docs_enabled(cls, v):
|
||||
if isinstance(v, bool):
|
||||
return v
|
||||
try:
|
||||
from pydantic import TypeAdapter
|
||||
return TypeAdapter(bool).validate_python(v)
|
||||
except Exception:
|
||||
return True # FR-007: invalid value → safe default (enabled)
|
||||
```
|
||||
|
||||
### `app/main.py` — conditional docs URLs
|
||||
|
||||
```python
|
||||
_settings = get_settings()
|
||||
|
||||
app = FastAPI(
|
||||
title="Reactbin API",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
docs_url="/docs" if _settings.api_docs_enabled else None,
|
||||
redoc_url="/redoc" if _settings.api_docs_enabled else None,
|
||||
openapi_url="/openapi.json" if _settings.api_docs_enabled else None,
|
||||
)
|
||||
```
|
||||
|
||||
### Integration test pattern
|
||||
|
||||
The `app` object is constructed at module import time. Tests reload the module with the env var pre-set:
|
||||
|
||||
```python
|
||||
def test_docs_disabled(monkeypatch, _base_env):
|
||||
monkeypatch.setenv("API_DOCS_ENABLED", "false")
|
||||
from app.config import get_settings
|
||||
get_settings.cache_clear()
|
||||
import importlib, app.main as m
|
||||
importlib.reload(m)
|
||||
client = TestClient(m.app)
|
||||
assert client.get("/docs").status_code == 404
|
||||
assert client.get("/redoc").status_code == 404
|
||||
assert client.get("/openapi.json").status_code == 404
|
||||
assert client.get("/api/v1/health").status_code == 200
|
||||
```
|
||||
|
||||
`get_settings.cache_clear()` is required before the reload so the new env var is picked up.
|
||||
|
||||
### `.env.example` addition
|
||||
|
||||
```bash
|
||||
# API documentation endpoints (Swagger UI, ReDoc, OpenAPI schema)
|
||||
# Set to false in production to avoid exposing the API surface publicly.
|
||||
API_DOCS_ENABLED=true
|
||||
```
|
||||
|
||||
## Dependencies & Risks
|
||||
|
||||
| Item | Risk | Mitigation |
|
||||
|------|------|------------|
|
||||
| `@lru_cache` on `get_settings()` | Tests may pick up cached settings across reloads | Always call `get_settings.cache_clear()` before reloading `app.main` in tests |
|
||||
| Module-level `get_settings()` in `main.py` | Import fails if required settings are absent (pre-existing behaviour) | Not a new risk; same as today |
|
||||
| `openapi_url=None` | Disables HTTP route but not internal schema generation | Intentional; request validation is unaffected |
|
||||
42
specs/012-api-docs-gate/quickstart.md
Normal file
42
specs/012-api-docs-gate/quickstart.md
Normal file
@@ -0,0 +1,42 @@
|
||||
# Quickstart: API Documentation Visibility Gate
|
||||
|
||||
## Verify docs are disabled
|
||||
|
||||
```bash
|
||||
# Start API with docs disabled
|
||||
API_DOCS_ENABLED=false uvicorn app.main:app --reload
|
||||
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/docs # → 404
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/redoc # → 404
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/openapi.json # → 404
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/api/v1/health # → 200
|
||||
```
|
||||
|
||||
## Verify docs are enabled (default)
|
||||
|
||||
```bash
|
||||
# Start API without the flag (or with it set to true)
|
||||
uvicorn app.main:app --reload
|
||||
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/docs # → 200
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/redoc # → 200
|
||||
curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/openapi.json # → 200
|
||||
```
|
||||
|
||||
## Integration test scenarios
|
||||
|
||||
### Scenario 1: flag disabled — all three docs endpoints return 404
|
||||
|
||||
Start a test client with `API_DOCS_ENABLED=false` injected into settings. Assert each of the three endpoint paths returns 404. Assert `/api/v1/health` returns 200.
|
||||
|
||||
### Scenario 2: flag enabled (default) — docs endpoints return 200
|
||||
|
||||
Start a test client without the flag (or with `API_DOCS_ENABLED=true`). Assert each of the three endpoint paths returns 200.
|
||||
|
||||
### Scenario 3: invalid flag value — app starts, docs enabled
|
||||
|
||||
Set `API_DOCS_ENABLED=not-a-bool`. The app must start without error. Docs must be accessible (safe fallback to enabled).
|
||||
|
||||
### Scenario 4: flag absent — docs enabled (backwards compatibility)
|
||||
|
||||
Start the app with no `API_DOCS_ENABLED` variable set. Assert docs endpoints return 200 — identical to pre-feature behaviour.
|
||||
36
specs/012-api-docs-gate/research.md
Normal file
36
specs/012-api-docs-gate/research.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# Research: API Documentation Visibility Gate
|
||||
|
||||
## Decision 1: Env var name
|
||||
|
||||
**Decision**: `API_DOCS_ENABLED` (boolean, default `true`)
|
||||
**Rationale**: Consistent with the existing `API_BASE_URL` naming convention in the project. The positive-phrasing default (`true` = enabled) preserves backwards compatibility — existing deployments that don't set the variable get the same behaviour as today.
|
||||
**Alternatives considered**: `HIDE_API_DOCS=false` (negative phrasing) — inverted booleans are error-prone and confusing in `.env` files; `DOCS_ENABLED` — too generic, could collide with other tools in a multi-service env file.
|
||||
|
||||
## Decision 2: FastAPI docs suppression mechanism
|
||||
|
||||
**Decision**: Pass `docs_url=None`, `redoc_url=None`, `openapi_url=None` to the `FastAPI()` constructor when the flag is disabled.
|
||||
**Rationale**: This is the official FastAPI-supported mechanism. Setting these to `None` causes FastAPI to register no routes for those paths — requests to them fall through to the default 404 handler. The internal OpenAPI schema is still generated in memory (for request validation), but no HTTP route exposes it.
|
||||
**Alternatives considered**: Route-level middleware that intercepts and returns 404 — more complex, not the canonical approach; removing routers at runtime — impossible, routers are registered at import time.
|
||||
|
||||
## Decision 3: Settings read at module level
|
||||
|
||||
**Decision**: Read `get_settings()` once at module import time in `main.py` to configure the `FastAPI()` constructor.
|
||||
**Rationale**: `FastAPI()` is instantiated at module level; the docs URL parameters must be known at that point. `get_settings()` is already `@lru_cache` so calling it at module level is cheap and consistent with calling it again inside `lifespan`. Tests that need to change the flag must reload the module or override `get_settings`.
|
||||
**Alternatives considered**: Lazy initialisation of `app` inside a factory function — would require restructuring `main.py` and all imports; not worth the complexity for this change.
|
||||
|
||||
## Decision 4: Graceful fallback for invalid flag values (FR-007)
|
||||
|
||||
**Decision**: Add a `@field_validator('api_docs_enabled', mode='before')` in `Settings` that wraps Pydantic's bool coercion in a try/except and returns `True` on any `ValueError`.
|
||||
**Rationale**: Pydantic v2 raises `ValidationError` for unrecognised boolean strings (e.g., `API_DOCS_ENABLED=maybe`). FR-007 requires the app to start rather than fail. The validator intercepts the invalid value before Pydantic's own coercion and returns the safe default.
|
||||
**Alternatives considered**: Using `Optional[bool] = True` without a validator — Pydantic would still raise on invalid input; using `str` field with manual parsing — duplicates Pydantic's boolean parsing logic unnecessarily.
|
||||
|
||||
## Decision 5: Integration test approach
|
||||
|
||||
**Decision**: Test both enabled and disabled states by overriding `get_settings` in integration tests using `app.dependency_overrides`, or by constructing a local `FastAPI` app instance with the appropriate `docs_url`/`redoc_url`/`openapi_url` values.
|
||||
**Rationale**: The `app` in `app.main` is created at import time. Since the unit tests already use `monkeypatch` + `importlib.reload` for config changes, the integration tests for docs visibility can follow the same pattern — reload `app.main` with the env var set before importing `app`. Alternatively, test the URL routing behaviour directly by constructing a minimal test app.
|
||||
**Alternatives considered**: Patching `app.docs_url` after import — FastAPI does not re-register routes when these attributes are changed post-construction; no effect on routing.
|
||||
|
||||
## Decision 6: Production documentation
|
||||
|
||||
**Decision**: Update `.env.example` to include `API_DOCS_ENABLED=true` with a comment recommending `false` for production. No changes to `api/Dockerfile.prod` (env vars are supplied by the deployment environment, not the image).
|
||||
**Rationale**: The Dockerfile intentionally contains no runtime secrets or config. The `.env.example` is the canonical documentation for operators. A comment is sufficient; the production Dockerfile.prod already has no docs-related config.
|
||||
80
specs/012-api-docs-gate/spec.md
Normal file
80
specs/012-api-docs-gate/spec.md
Normal file
@@ -0,0 +1,80 @@
|
||||
# Feature Specification: API Documentation Visibility Gate
|
||||
|
||||
**Feature Branch**: `012-api-docs-gate`
|
||||
**Created**: 2026-05-07
|
||||
**Status**: Draft
|
||||
**Input**: User description: "Add an environment variable flag to disable the FastAPI Swagger and ReDoc documentation endpoints (and the raw OpenAPI schema) in production. When disabled, all three endpoints return 404. When enabled (the default), behaviour is unchanged. The flag should be off by default in production and on by default in development."
|
||||
|
||||
## User Scenarios & Testing *(mandatory)*
|
||||
|
||||
### User Story 1 - Documentation Hidden in Production (Priority: P1)
|
||||
|
||||
An operator deploys the API to a production environment and wants to ensure that the interactive documentation UI and the raw API schema are not publicly reachable. Setting a configuration flag causes all three documentation endpoints to return "not found", as if they do not exist.
|
||||
|
||||
**Why this priority**: Exposing the full API schema and interactive console to anonymous users in production reveals the attack surface of the application. Hiding it is a low-effort, high-value hardening step.
|
||||
|
||||
**Independent Test**: Start the API with the flag set to disabled. Request each of the three documentation endpoints. All three must return 404.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the API is started with documentation disabled, **When** a client requests the interactive documentation UI, **Then** the response is 404 Not Found.
|
||||
2. **Given** the API is started with documentation disabled, **When** a client requests the alternative documentation UI, **Then** the response is 404 Not Found.
|
||||
3. **Given** the API is started with documentation disabled, **When** a client requests the raw OpenAPI schema endpoint, **Then** the response is 404 Not Found.
|
||||
4. **Given** the API is started with documentation disabled, **When** a client requests any other API endpoint (e.g., the health check), **Then** the response is unaffected — normal behaviour continues.
|
||||
|
||||
---
|
||||
|
||||
### User Story 2 - Documentation Available in Development (Priority: P2)
|
||||
|
||||
A developer runs the API locally without setting the flag. The documentation endpoints remain fully accessible — no change in behaviour from before this feature.
|
||||
|
||||
**Why this priority**: Developer productivity depends on the interactive docs being available during local development. The default must not break existing workflows.
|
||||
|
||||
**Independent Test**: Start the API without the flag set (or with it explicitly enabled). Request each of the three documentation endpoints. All three must respond successfully with their normal content.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the API is started without the flag set, **When** a client requests any documentation endpoint, **Then** the response is the same as it was before this feature was introduced.
|
||||
2. **Given** the API is started with the flag explicitly set to enabled, **When** a client requests any documentation endpoint, **Then** the response is the same as it was before this feature was introduced.
|
||||
3. **Given** the flag is changed from enabled to disabled (or vice versa), **When** the API is restarted, **Then** the new state takes effect immediately with no other changes required.
|
||||
|
||||
---
|
||||
|
||||
### Edge Cases
|
||||
|
||||
- What happens if the flag is set to an unrecognised value (e.g., a typo)?
|
||||
- What happens if the flag is absent entirely — is the default enabled or disabled?
|
||||
- Does disabling documentation affect any other behaviour (e.g., internal schema generation used for validation)?
|
||||
- If a monitoring tool scrapes the schema endpoint for API drift detection, does disabling break it?
|
||||
|
||||
## Requirements *(mandatory)*
|
||||
|
||||
### Functional Requirements
|
||||
|
||||
- **FR-001**: The system MUST support a configuration flag that controls whether the API documentation endpoints are reachable.
|
||||
- **FR-002**: When the flag is set to disabled, all three documentation endpoints (interactive UI, alternative UI, and raw schema) MUST return 404 Not Found.
|
||||
- **FR-003**: When the flag is set to enabled, the behaviour of all three documentation endpoints MUST be identical to the behaviour before this feature was introduced.
|
||||
- **FR-004**: The flag MUST default to **enabled** when not explicitly set (preserving backwards compatibility for existing deployments).
|
||||
- **FR-005**: Disabling documentation MUST NOT affect any other API endpoint, including the health check, authentication, and all resource endpoints.
|
||||
- **FR-006**: The flag MUST be configurable via an environment variable without requiring a code change or rebuild.
|
||||
- **FR-007**: An unrecognised or missing flag value MUST fall back to the enabled default rather than causing a startup failure.
|
||||
- **FR-008**: The existing `.env.example` file MUST be updated to document the flag and its default value.
|
||||
- **FR-009**: The production environment configuration MUST set the flag to disabled by default.
|
||||
|
||||
## Success Criteria *(mandatory)*
|
||||
|
||||
### Measurable Outcomes
|
||||
|
||||
- **SC-001**: With the flag disabled, all three documentation endpoints return 404, confirmed by automated test.
|
||||
- **SC-002**: With the flag enabled (or absent), all three documentation endpoints respond successfully, confirmed by automated test.
|
||||
- **SC-003**: All existing tests continue to pass — zero regressions introduced.
|
||||
- **SC-004**: The flag takes effect on restart with no other intervention required.
|
||||
- **SC-005**: The `.env.example` file documents the flag so any developer setting up the project discovers it without reading source code.
|
||||
|
||||
## Assumptions
|
||||
|
||||
- There are exactly three documentation-related endpoints to gate: the primary interactive UI, the alternative documentation UI, and the raw OpenAPI schema JSON. No other endpoints are affected.
|
||||
- The flag is read once at application startup; a running process does not need to respond to live changes.
|
||||
- Internal schema generation (used by the framework for request validation) is not affected by hiding the documentation endpoints — only the public-facing HTTP routes are removed.
|
||||
- The production Dockerfile (`api/Dockerfile.prod`) does not hardcode the flag; it is supplied via the deployment environment (docker-compose, Kubernetes secret, etc.).
|
||||
- "Off by default in production" means the recommended value for production is disabled, documented in `.env.example` and in the production docker-compose or deployment config; it does not mean the application auto-detects its environment.
|
||||
100
specs/012-api-docs-gate/tasks.md
Normal file
100
specs/012-api-docs-gate/tasks.md
Normal file
@@ -0,0 +1,100 @@
|
||||
# Tasks: API Documentation Visibility Gate
|
||||
|
||||
**Input**: Design documents from `specs/012-api-docs-gate/`
|
||||
**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, contracts/docs-endpoints.md ✅, quickstart.md ✅
|
||||
|
||||
**Tests**: TDD is non-negotiable (§5.1). Failing tests are written before implementation code in each phase.
|
||||
|
||||
**Organization**: No setup or foundational phases — this feature modifies three existing files and adds one new test file. Phase 3 (US1) covers the disable path; Phase 4 (US2) verifies the enable/default path using the same implementation; Phase 5 polishes.
|
||||
|
||||
## Format: `[ID] [P?] [Story] Description`
|
||||
|
||||
- **[P]**: Can run in parallel with other [P] tasks in the same phase
|
||||
- **[Story]**: Which user story this task belongs to
|
||||
- Exact file paths included in every task description
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: User Story 1 — Documentation Hidden in Production (Priority: P1) 🎯 MVP
|
||||
|
||||
**Goal**: When `API_DOCS_ENABLED=false`, all three documentation endpoints (`/docs`, `/redoc`, `/openapi.json`) return 404. All other endpoints are unaffected.
|
||||
|
||||
**Independent Test**: `make test-unit` passes the new settings tests; `make test-integration` passes the new `test_docs_disabled` integration test.
|
||||
|
||||
### Tests for User Story 1 (TDD — write first, confirm failure before T003)
|
||||
|
||||
- [X] T001 [US1] Add three failing unit tests to `api/tests/unit/test_config.py` using the existing `_apply_env`/`_BASE_ENV` pattern:
|
||||
1. `test_api_docs_enabled_default` — call `Settings()` with `_BASE_ENV` only (no `API_DOCS_ENABLED`); assert `s.api_docs_enabled is True`
|
||||
2. `test_api_docs_enabled_false` — call `Settings()` with `_BASE_ENV` + `{"API_DOCS_ENABLED": "false"}`; assert `s.api_docs_enabled is False`
|
||||
3. `test_api_docs_invalid_value_defaults_to_enabled` — call `Settings()` with `_BASE_ENV` + `{"API_DOCS_ENABLED": "not-a-bool"}`; assert `s.api_docs_enabled is True` (graceful fallback, FR-007)
|
||||
All three tests fail before T003 because `api_docs_enabled` does not yet exist on `Settings`.
|
||||
|
||||
- [X] T002 [US1] Create `api/tests/integration/test_docs_gate.py` with two failing integration tests; the file MUST set up a minimal app client using `from starlette.testclient import TestClient` and the `importlib.reload` + `get_settings.cache_clear()` pattern shown in plan.md:
|
||||
1. `test_docs_hidden_when_flag_disabled(monkeypatch)` — set `API_DOCS_ENABLED=false` via monkeypatch + all required env vars (`DATABASE_URL`, `JWT_SECRET_KEY`, `OWNER_USERNAME`, `OWNER_PASSWORD`, `S3_ENDPOINT_URL`, `S3_BUCKET_NAME`, `S3_ACCESS_KEY_ID`, `S3_SECRET_ACCESS_KEY`); call `get_settings.cache_clear()`; `importlib.reload(app.main)`; create `TestClient(app.main.app)`; assert `/docs` → 404, `/redoc` → 404, `/openapi.json` → 404, `/api/v1/health` → 200; after test, call `get_settings.cache_clear()` again as cleanup
|
||||
2. `test_docs_visible_when_flag_enabled(monkeypatch)` — same setup but with `API_DOCS_ENABLED=true` (or omit it); assert `/docs` → 200, `/redoc` → 200, `/openapi.json` → 200
|
||||
Both tests fail before T003/T004 because `api_docs_enabled` does not exist on `Settings`.
|
||||
|
||||
### Implementation for User Story 1
|
||||
|
||||
- [X] T003 [US1] Add `api_docs_enabled: bool = True` field and a `coerce_docs_enabled` field validator to the `Settings` class in `api/app/config.py`: the validator MUST use `mode='before'`, be a `@classmethod`, and wrap Pydantic bool coercion in a try/except that returns `True` on any exception (implements FR-007); import `field_validator` from `pydantic` at the top of the file; the field goes after the existing `login_trusted_proxy_ips` field.
|
||||
|
||||
- [X] T004 [US1] Update `api/app/main.py`: before the `app = FastAPI(...)` call, add `_settings = get_settings()`; add `docs_url="/docs" if _settings.api_docs_enabled else None`, `redoc_url="/redoc" if _settings.api_docs_enabled else None`, and `openapi_url="/openapi.json" if _settings.api_docs_enabled else None` as keyword arguments to the `FastAPI()` constructor; the existing module-level defaults for `app.state` (after the `app = FastAPI(...)` line) are unchanged.
|
||||
|
||||
- [X] T005 [US1] Verify TDD green for US1: run `cd api && python -m pytest tests/unit/ -v -k "docs"` and confirm all three new unit tests pass; then run `cd api && python -m pytest tests/unit/ -v` to confirm no regressions in the full 102-test unit suite.
|
||||
|
||||
**Checkpoint**: US1 is complete. With `API_DOCS_ENABLED=false` the three docs endpoints return 404; all other endpoints are unaffected.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: User Story 2 — Documentation Available in Development (Priority: P2)
|
||||
|
||||
**Goal**: Without the flag set (or with it set to `true`), docs endpoints behave identically to before this feature. Default is backwards compatible.
|
||||
|
||||
**Independent Test**: `make test-integration` — the `test_docs_visible_when_flag_enabled` test written in T002 passes, confirming the enabled/default path.
|
||||
|
||||
- [X] T006 [US2] Verify TDD green for US2: run `make test-integration` from `/workspace` and confirm all integration tests pass, including `test_docs_gate.py::test_docs_visible_when_flag_enabled` and the full existing suite (102 tests + 2 new = 104 total).
|
||||
|
||||
**Checkpoint**: Both user stories verified. Flag disabled → 404; flag enabled or absent → unchanged behaviour.
|
||||
|
||||
---
|
||||
|
||||
## Phase 5: Polish & Cross-Cutting Concerns
|
||||
|
||||
- [X] T007 Add documentation for `API_DOCS_ENABLED` to `/workspace/.env.example`: insert a new section after the `LOGIN_TRUSTED_PROXY_IPS` block with a comment and `API_DOCS_ENABLED=true`; the comment MUST note that this should be set to `false` in production to avoid publicly exposing the API schema
|
||||
|
||||
- [X] T008 Run `ruff check api/app/config.py api/app/main.py api/tests/integration/test_docs_gate.py` from `/workspace/api` and fix any lint violations; then run `ruff check api/` to confirm the full API directory is clean
|
||||
|
||||
---
|
||||
|
||||
## Dependencies & Execution Order
|
||||
|
||||
- T001 and T002 can run in parallel (different files, both TDD-red before implementation)
|
||||
- T003 must complete before T004 (main.py reads from config.py)
|
||||
- T005 after T003 and T004
|
||||
- T006 after T005
|
||||
- T007 and T008 can run in parallel (different files, after all tests pass)
|
||||
|
||||
### Execution Order Summary
|
||||
|
||||
```
|
||||
Step 1: T001 ∥ T002 (write failing tests — TDD red)
|
||||
Step 2: T003 (implement config.py — turns T001 green)
|
||||
Step 3: T004 (implement main.py — turns T002 green)
|
||||
Step 4: T005 (verify unit tests green)
|
||||
Step 5: T006 (verify integration tests green — regression gate)
|
||||
Step 6: T007 ∥ T008 (polish — .env.example + ruff)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Strategy
|
||||
|
||||
### MVP (US1 + US2 — one implementation covers both)
|
||||
|
||||
1. Write failing tests (T001, T002)
|
||||
2. Add `api_docs_enabled` to `config.py` (T003)
|
||||
3. Update `FastAPI()` constructor in `main.py` (T004)
|
||||
4. Verify all tests green (T005, T006)
|
||||
5. Polish (T007, T008)
|
||||
|
||||
US1 and US2 share the same implementation — the flag controls both paths. There is no separate implementation for US2; the default value of `true` is the entire implementation of US2.
|
||||
35
specs/013-k8s-manifests/checklists/requirements.md
Normal file
35
specs/013-k8s-manifests/checklists/requirements.md
Normal file
@@ -0,0 +1,35 @@
|
||||
# Specification Quality Checklist: Kubernetes Production Manifests
|
||||
|
||||
**Purpose**: Validate specification completeness and quality before proceeding to planning
|
||||
**Created**: 2026-05-07
|
||||
**Feature**: [spec.md](../spec.md)
|
||||
|
||||
## Content Quality
|
||||
|
||||
- [x] No implementation details (languages, frameworks, APIs)
|
||||
- [x] Focused on user value and business needs
|
||||
- [x] Written for non-technical stakeholders
|
||||
- [x] All mandatory sections completed
|
||||
|
||||
## Requirement Completeness
|
||||
|
||||
- [x] No [NEEDS CLARIFICATION] markers remain
|
||||
- [x] Requirements are testable and unambiguous
|
||||
- [x] Success criteria are measurable
|
||||
- [x] Success criteria are technology-agnostic (no implementation details)
|
||||
- [x] All acceptance scenarios are defined
|
||||
- [x] Edge cases are identified
|
||||
- [x] Scope is clearly bounded
|
||||
- [x] Dependencies and assumptions identified
|
||||
|
||||
## Feature Readiness
|
||||
|
||||
- [x] All functional requirements have clear acceptance criteria
|
||||
- [x] User scenarios cover primary flows
|
||||
- [x] Feature meets measurable outcomes defined in Success Criteria
|
||||
- [x] No implementation details leak into specification
|
||||
|
||||
## Notes
|
||||
|
||||
- FR-014 (migration files in production image) is a prerequisite code change to `Dockerfile.prod`, not a manifest. Included in scope as it is required for the init container to function.
|
||||
- Image tag placeholder strategy is documented in Assumptions; the specifics of tag substitution (kustomize, sed, etc.) are left to planning.
|
||||
59
specs/013-k8s-manifests/contracts/operator-deploy.md
Normal file
59
specs/013-k8s-manifests/contracts/operator-deploy.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Contract: Operator Deployment Interface
|
||||
|
||||
The manifests in `k8s/` define the operator's deployment interface — the inputs required before applying and the observable outputs after applying.
|
||||
|
||||
## Pre-deployment Prerequisites (Operator-supplied)
|
||||
|
||||
| Prerequisite | Details |
|
||||
|---|---|
|
||||
| Vault KV v2 secret at `reactbin/api/config` | Must contain keys: `DATABASE_URL`, `JWT_SECRET_KEY`, `OWNER_USERNAME`, `OWNER_PASSWORD`, `S3_ENDPOINT_URL`, `S3_BUCKET_NAME`, `S3_ACCESS_KEY_ID`, `S3_SECRET_ACCESS_KEY`, `API_BASE_URL` |
|
||||
| Vault KV v2 secret at `reactbin/minio/credentials` | Must contain keys: `MINIO_ROOT_USER`, `MINIO_ROOT_PASSWORD` |
|
||||
| Vault Kubernetes auth role | A role in the Vault Kubernetes auth mount bound to the `default` service account in the `reactbin` namespace with read access to both paths above |
|
||||
| `VaultConnection` resource | Named `default` in the operator's VSO namespace pointing to the Vault server address |
|
||||
| External PostgreSQL database | A dedicated database and user created; `DATABASE_URL` in Vault reflects the credentials |
|
||||
| DNS | The production domain resolves to the cluster ingress IP |
|
||||
| `ClusterIssuer` | A cert-manager `ClusterIssuer` named `letsencrypt-prod` exists in the cluster |
|
||||
| Image tags | The operator substitutes the `latest` placeholder in `k8s/api/deployment.yaml` and `k8s/ui/deployment.yaml` with the real image tag before applying |
|
||||
|
||||
## Apply Command
|
||||
|
||||
```bash
|
||||
# Substitute image tags
|
||||
sed -i 's|reactbin-api:latest|reactbin-api:<tag>|g' k8s/api/deployment.yaml
|
||||
sed -i 's|reactbin-ui:latest|reactbin-ui:<tag>|g' k8s/ui/deployment.yaml
|
||||
|
||||
# Apply all manifests
|
||||
kubectl apply -f k8s/
|
||||
```
|
||||
|
||||
Applying is idempotent — safe to re-run on every deployment.
|
||||
|
||||
## Observable Outputs (Post-apply)
|
||||
|
||||
| Resource | Expected State |
|
||||
|---|---|
|
||||
| `Namespace/reactbin` | Active |
|
||||
| `Deployment/api` in `reactbin` | 1/1 Ready (init container completes first) |
|
||||
| `Deployment/ui` in `reactbin` | 1/1 Ready |
|
||||
| `StatefulSet/minio` in `reactbin` | 1/1 Ready |
|
||||
| `Job/minio-init-bucket` in `reactbin` | Completed |
|
||||
| `Secret/api-env` in `reactbin` | Created by VSO, populated with all API env keys |
|
||||
| `Secret/minio-credentials` in `reactbin` | Created by VSO, populated with MinIO root keys |
|
||||
| `Certificate/reactbin-tls` in `reactbin` | Issued (may take up to 2 minutes on first apply) |
|
||||
| `Ingress/reactbin` in `reactbin` | Address populated with cluster ingress IP |
|
||||
|
||||
## Verification Commands
|
||||
|
||||
```bash
|
||||
# All pods running
|
||||
kubectl get pods -n reactbin
|
||||
|
||||
# API health
|
||||
curl -sf https://<domain>/api/v1/health
|
||||
|
||||
# UI reachable
|
||||
curl -sf https://<domain>/
|
||||
|
||||
# Docs correctly gated (should return 404)
|
||||
curl -o /dev/null -w "%{http_code}" https://<domain>/docs
|
||||
```
|
||||
238
specs/013-k8s-manifests/plan.md
Normal file
238
specs/013-k8s-manifests/plan.md
Normal file
@@ -0,0 +1,238 @@
|
||||
# Implementation Plan: Kubernetes Production Manifests
|
||||
|
||||
**Branch**: `013-k8s-manifests` | **Date**: 2026-05-07 | **Spec**: [spec.md](spec.md)
|
||||
**Input**: Feature specification from `specs/013-k8s-manifests/spec.md`
|
||||
|
||||
## Summary
|
||||
|
||||
Write Kubernetes manifests deploying Reactbin to k3s: a `Namespace`, API `Deployment` (with Alembic init container) + `Service`, UI `Deployment` + `Service`, a shared `Ingress` with Let's Encrypt TLS, a MinIO `StatefulSet` + `Service` + bucket-init `Job`, and three VSO CRDs (`VaultConnection`, `VaultAuth`, `VaultStaticSecret` × 2) to sync secrets from Vault. A small update to `api/Dockerfile.prod` includes Alembic migration files in the production image so the init container can run them.
|
||||
|
||||
## Technical Context
|
||||
|
||||
**Language/Version**: YAML (Kubernetes manifests); Python 3.12 (Dockerfile.prod touch)
|
||||
**Primary Dependencies**: Kubernetes 1.29+ API, nginx Ingress controller, cert-manager (ClusterIssuer `letsencrypt-prod`), Vault Secrets Operator (`secrets.hashicorp.com/v1beta1`), MinIO
|
||||
**Storage**: MinIO StatefulSet with ReadWriteOnce PVC (cluster default storage class); external PostgreSQL (operator-provisioned)
|
||||
**Testing**: `kubectl apply --dry-run=client` for schema validation; `yamllint` for formatting
|
||||
**Target Platform**: k3s cluster (Kubernetes 1.29+, Linux)
|
||||
**Performance Goals**: No measurable impact — manifests are declarative config, not runtime code
|
||||
**Constraints**: All secrets must come from Vault (no plaintext in manifests); all containers run non-root; MinIO is ClusterIP-only (no external Ingress)
|
||||
**Scale/Scope**: 11 YAML files across `k8s/`; one Dockerfile.prod change; one Makefile target
|
||||
|
||||
## Constitution Check
|
||||
|
||||
| Principle | Requirement | Status |
|
||||
|-----------|-------------|--------|
|
||||
| §5.1 TDD | Failing tests before implementation | ✅ Dry-run validation script written before manifests |
|
||||
| §5.4 CI before done | All tests pass before task marked done | ✅ kubectl dry-run + yamllint gate |
|
||||
| §7.2 Env config | No hardcoded secrets or hostnames | ✅ All secrets via VSO; domain is operator-substituted placeholder |
|
||||
| §7.3 Linting | `ruff` / linting passes | ✅ `yamllint` on all manifests |
|
||||
| §2.6 No speculative abstraction | No Kustomize overlays or Helm chart | ✅ Plain YAML, single environment |
|
||||
| §8 Scope boundaries | No multi-user, no OIDC, no OR/NOT tags | ✅ Not affected |
|
||||
|
||||
**No violations. All gates pass.**
|
||||
|
||||
*Post-design re-check*: The Dockerfile.prod change (FR-014) adds `alembic/` to the runtime stage only — no builder-stage change, no new dependencies, no behaviour change to the running API. Constitution unchanged.
|
||||
|
||||
## Project Structure
|
||||
|
||||
### Documentation (this feature)
|
||||
|
||||
```text
|
||||
specs/013-k8s-manifests/
|
||||
├── plan.md ← this file
|
||||
├── research.md ← 8 decisions
|
||||
├── contracts/
|
||||
│ └── operator-deploy.md ← prerequisites + verification commands
|
||||
├── quickstart.md ← deploy + verify + scenario walkthroughs
|
||||
└── tasks.md ← generated by /speckit-tasks
|
||||
```
|
||||
|
||||
### Source Code Changes
|
||||
|
||||
```text
|
||||
k8s/ ← NEW directory
|
||||
├── namespace.yaml ← Namespace: reactbin
|
||||
├── api/
|
||||
│ ├── deployment.yaml ← Deployment: api (with alembic init container)
|
||||
│ └── service.yaml ← Service: api (ClusterIP, port 8000)
|
||||
├── ui/
|
||||
│ ├── deployment.yaml ← Deployment: ui
|
||||
│ └── service.yaml ← Service: ui (ClusterIP, port 8080)
|
||||
├── ingress.yaml ← Ingress: /api/ → api, / → ui, TLS via cert-manager
|
||||
├── minio/
|
||||
│ ├── statefulset.yaml ← StatefulSet: minio (volumeClaimTemplates)
|
||||
│ ├── service.yaml ← Service: minio (ClusterIP, port 9000)
|
||||
│ └── init-job.yaml ← Job: minio-init-bucket (mc mb --ignore-existing)
|
||||
└── vault/
|
||||
├── vault-auth.yaml ← VaultAuth: kubernetes method, reactbin SA
|
||||
├── api-secret.yaml ← VaultStaticSecret → K8s Secret: api-env
|
||||
└── minio-secret.yaml ← VaultStaticSecret → K8s Secret: minio-credentials
|
||||
|
||||
api/Dockerfile.prod ← MODIFIED: add alembic/ and alembic.ini to runtime stage
|
||||
Makefile ← MODIFIED: add dry-run validation target
|
||||
```
|
||||
|
||||
## Implementation Design
|
||||
|
||||
### `api/Dockerfile.prod` — runtime stage addition
|
||||
|
||||
```dockerfile
|
||||
# In the runtime stage, after copying app/:
|
||||
COPY --chown=appuser:appgroup alembic/ ./alembic/
|
||||
COPY --chown=appuser:appgroup alembic.ini .
|
||||
```
|
||||
|
||||
No builder-stage change. No new base image. The init container uses the same image and `workingDir: /app`.
|
||||
|
||||
### `k8s/namespace.yaml`
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: reactbin
|
||||
```
|
||||
|
||||
### `k8s/vault/vault-auth.yaml`
|
||||
|
||||
```yaml
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultAuth
|
||||
metadata:
|
||||
name: reactbin-auth
|
||||
namespace: reactbin
|
||||
spec:
|
||||
method: kubernetes
|
||||
mount: kubernetes
|
||||
kubernetes:
|
||||
role: reactbin
|
||||
serviceAccount: default
|
||||
audiences:
|
||||
- https://kubernetes.default.svc
|
||||
```
|
||||
|
||||
Note: `VaultConnection` is not included in the `k8s/` tree — it lives in the VSO operator's namespace and is operator-managed infrastructure, not application manifests.
|
||||
|
||||
### `k8s/vault/api-secret.yaml`
|
||||
|
||||
```yaml
|
||||
apiVersion: secrets.hashicorp.com/v1beta1
|
||||
kind: VaultStaticSecret
|
||||
metadata:
|
||||
name: api-secret
|
||||
namespace: reactbin
|
||||
spec:
|
||||
vaultAuthRef: reactbin-auth
|
||||
mount: secret
|
||||
type: kv-v2
|
||||
path: reactbin/api/config
|
||||
refreshAfter: 1h
|
||||
destination:
|
||||
name: api-env
|
||||
create: true
|
||||
```
|
||||
|
||||
The API Deployment then uses `envFrom: [{secretRef: {name: api-env}}]`.
|
||||
|
||||
### `k8s/vault/minio-secret.yaml`
|
||||
|
||||
Same pattern, path `reactbin/minio/credentials`, destination `minio-credentials`.
|
||||
|
||||
### `k8s/api/deployment.yaml` — init container
|
||||
|
||||
```yaml
|
||||
initContainers:
|
||||
- name: alembic-migrate
|
||||
image: reactbin-api:latest # same tag as main container
|
||||
command: ["alembic", "upgrade", "head"]
|
||||
workingDir: /app
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: api-env
|
||||
containers:
|
||||
- name: api
|
||||
image: reactbin-api:latest
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
envFrom:
|
||||
- secretRef:
|
||||
name: api-env
|
||||
env:
|
||||
- name: API_DOCS_ENABLED
|
||||
value: "false"
|
||||
livenessProbe:
|
||||
httpGet: {path: /api/v1/health, port: 8000}
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet: {path: /api/v1/health, port: 8000}
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1001
|
||||
```
|
||||
|
||||
### `k8s/ingress.yaml`
|
||||
|
||||
```yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: reactbin
|
||||
namespace: reactbin
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
spec:
|
||||
ingressClassName: nginx
|
||||
tls:
|
||||
- hosts: [<your-domain>]
|
||||
secretName: reactbin-tls
|
||||
rules:
|
||||
- host: <your-domain>
|
||||
http:
|
||||
paths:
|
||||
- path: /api/
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service: {name: api, port: {number: 8000}}
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service: {name: ui, port: {number: 8080}}
|
||||
```
|
||||
|
||||
`/api/` must be listed before `/`.
|
||||
|
||||
### `k8s/minio/statefulset.yaml` — StatefulSet (not Deployment)
|
||||
|
||||
StatefulSet gives stable pod name `minio-0` and automatic PVC reattachment via `volumeClaimTemplates`. ReadWriteOnce, default storage class.
|
||||
|
||||
Health probes: `GET /minio/health/live:9000` (liveness), `GET /minio/health/ready:9000` (readiness).
|
||||
|
||||
### `k8s/minio/init-job.yaml`
|
||||
|
||||
```yaml
|
||||
command: ["sh", "-c", "mc alias set local http://minio:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD && mc mb --ignore-existing local/reactbin"]
|
||||
```
|
||||
|
||||
`restartPolicy: OnFailure`. `--ignore-existing` makes the job idempotent.
|
||||
|
||||
### Makefile addition
|
||||
|
||||
```makefile
|
||||
validate-k8s:
|
||||
yamllint k8s/
|
||||
kubectl apply --dry-run=client -f k8s/
|
||||
```
|
||||
|
||||
## Dependencies & Risks
|
||||
|
||||
| Item | Risk | Mitigation |
|
||||
|------|------|------------|
|
||||
| `VaultConnection` not in `k8s/` | Operator may not have it pre-created | Documented as prerequisite in contracts/operator-deploy.md |
|
||||
| `letsencrypt-prod` ClusterIssuer name | May differ in operator's cluster | Documented as prerequisite; easy to sed-replace |
|
||||
| Image tag placeholder `latest` | Operator forgets to substitute | `validate-k8s` dry-run will succeed but notes in quickstart.md and task descriptions warn explicitly |
|
||||
| MinIO PVC storage class | Default may be unsuitable (e.g., ephemeral) | Noted in Assumptions; operator can patch `storageClassName` |
|
||||
| `<your-domain>` placeholder in Ingress | `kubectl apply --dry-run=client` validates everything except host value | Noted in quickstart; hostname must be substituted before applying |
|
||||
92
specs/013-k8s-manifests/quickstart.md
Normal file
92
specs/013-k8s-manifests/quickstart.md
Normal file
@@ -0,0 +1,92 @@
|
||||
# Quickstart: Kubernetes Production Deployment
|
||||
|
||||
## Before You Apply
|
||||
|
||||
1. Store API secrets in Vault at `reactbin/api/config` (KV v2):
|
||||
```
|
||||
DATABASE_URL = postgresql+asyncpg://reactbin:<pw>@<host>:5432/reactbin
|
||||
JWT_SECRET_KEY = <long-random-string>
|
||||
OWNER_USERNAME = <your-username>
|
||||
OWNER_PASSWORD = <your-password>
|
||||
S3_ENDPOINT_URL = http://minio.reactbin.svc.cluster.local:9000
|
||||
S3_BUCKET_NAME = reactbin
|
||||
S3_ACCESS_KEY_ID = <same as MINIO_ROOT_USER>
|
||||
S3_SECRET_ACCESS_KEY = <same as MINIO_ROOT_PASSWORD>
|
||||
API_BASE_URL = https://<your-domain>
|
||||
API_DOCS_ENABLED = false
|
||||
```
|
||||
|
||||
2. Store MinIO credentials in Vault at `reactbin/minio/credentials` (KV v2):
|
||||
```
|
||||
MINIO_ROOT_USER = <choose a strong username>
|
||||
MINIO_ROOT_PASSWORD = <choose a strong password>
|
||||
```
|
||||
|
||||
3. Create a Vault Kubernetes auth role bound to the `default` service account in the `reactbin` namespace with read access to both paths above.
|
||||
|
||||
4. Confirm DNS resolves to the cluster ingress IP and the `letsencrypt-prod` ClusterIssuer exists.
|
||||
|
||||
## Deploy
|
||||
|
||||
```bash
|
||||
# Substitute the real image tags
|
||||
sed -i 's|reactbin-api:latest|reactbin-api:v1.0.0|g' k8s/api/deployment.yaml
|
||||
sed -i 's|reactbin-ui:latest|reactbin-ui:v1.0.0|g' k8s/ui/deployment.yaml
|
||||
|
||||
# Apply everything
|
||||
kubectl apply -f k8s/
|
||||
```
|
||||
|
||||
## Verify
|
||||
|
||||
```bash
|
||||
# Watch pods come up (init container runs first on the API pod)
|
||||
kubectl get pods -n reactbin -w
|
||||
|
||||
# API health
|
||||
curl -sf https://<your-domain>/api/v1/health && echo "API OK"
|
||||
|
||||
# UI reachable
|
||||
curl -sf -o /dev/null -w "%{http_code}\n" https://<your-domain>/
|
||||
|
||||
# Docs correctly gated
|
||||
curl -o /dev/null -w "%{http_code}\n" https://<your-domain>/docs # → 404
|
||||
curl -o /dev/null -w "%{http_code}\n" https://<your-domain>/redoc # → 404
|
||||
|
||||
# Check migration init container ran
|
||||
kubectl logs -n reactbin -l app=api -c alembic-migrate
|
||||
```
|
||||
|
||||
## Scenario: Migration fails on deploy
|
||||
|
||||
```bash
|
||||
# Pod will be stuck in Init state
|
||||
kubectl get pods -n reactbin
|
||||
# NAME READY STATUS RESTARTS
|
||||
# api-xxx-yyy 0/1 Init:CrashLoopBackOff 2
|
||||
|
||||
# See why
|
||||
kubectl logs -n reactbin <pod-name> -c alembic-migrate
|
||||
|
||||
# Fix the issue (e.g. correct DATABASE_URL in Vault, wait for VSO to resync)
|
||||
# Then delete the pod to force a fresh rollout
|
||||
kubectl rollout restart deployment/api -n reactbin
|
||||
```
|
||||
|
||||
## Scenario: Update to a new image version
|
||||
|
||||
```bash
|
||||
kubectl set image deployment/api api=reactbin-api:v1.1.0 -n reactbin
|
||||
kubectl set image deployment/ui ui=reactbin-ui:v1.1.0 -n reactbin
|
||||
# Kubernetes rolls out new pods; init container runs migrations before traffic switches
|
||||
```
|
||||
|
||||
## Scenario: Restore after MinIO pod restart
|
||||
|
||||
MinIO uses a PersistentVolumeClaim. Pod restarts do not affect stored data. Verify:
|
||||
|
||||
```bash
|
||||
kubectl delete pod -n reactbin minio-0
|
||||
kubectl get pods -n reactbin -w # minio-0 restarts, PVC reattaches
|
||||
# Previously uploaded images should still be accessible via the API
|
||||
```
|
||||
63
specs/013-k8s-manifests/research.md
Normal file
63
specs/013-k8s-manifests/research.md
Normal file
@@ -0,0 +1,63 @@
|
||||
# Research: Kubernetes Production Manifests
|
||||
|
||||
## Decision 1: VSO CRD chain (VaultConnection → VaultAuth → VaultStaticSecret)
|
||||
|
||||
**Decision**: Use three CRDs — `VaultConnection`, `VaultAuth`, and `VaultStaticSecret` — all under `apiVersion: secrets.hashicorp.com/v1beta1`.
|
||||
**Rationale**: This is the required VSO resource chain. `VaultConnection` points to the Vault server address. `VaultAuth` declares the Kubernetes auth method (role, service account, mount path). `VaultStaticSecret` references a `VaultAuth` via `vaultAuthRef` and declares the Vault KV path and the destination K8s Secret name. VSO syncs all Vault keys to the K8s Secret 1:1 by default — no explicit key mapping needed.
|
||||
**Alternatives considered**: `VaultAuthGlobal` for cross-namespace sharing — not needed; all resources are in the same `reactbin` namespace.
|
||||
|
||||
Key fields:
|
||||
- `VaultStaticSecret.spec.type`: `kv-v2` (standard for modern Vault)
|
||||
- `VaultStaticSecret.spec.refreshAfter`: `1h` (Go duration string)
|
||||
- `VaultStaticSecret.spec.destination.create: true` — VSO creates the K8s Secret if absent
|
||||
- `VaultAuth.spec.kubernetes.role` — a Vault role the operator must pre-create and bind to the `reactbin` namespace service account
|
||||
|
||||
## Decision 2: MinIO as StatefulSet (not Deployment)
|
||||
|
||||
**Decision**: Run MinIO as a `StatefulSet` with `volumeClaimTemplates`.
|
||||
**Rationale**: StatefulSet gives the pod a stable name (`minio-0`) and automatically reattaches its PVC on pod recreation. A Deployment would require a manually-created PVC and is prone to PVC binding issues on reschedule. The marginal complexity of a StatefulSet over a Deployment is acceptable. `ReadWriteOnce` PVC is correct for single-replica MinIO.
|
||||
**Alternatives considered**: Deployment with explicit PVC — works but PVC lifecycle is decoupled from the pod, creating operational risk.
|
||||
|
||||
MinIO health probes:
|
||||
- Liveness: `GET /minio/health/live:9000`
|
||||
- Readiness: `GET /minio/health/ready:9000`
|
||||
|
||||
MinIO env vars: `MINIO_ROOT_USER`, `MINIO_ROOT_PASSWORD` (injected from a K8s Secret synced by VSO).
|
||||
|
||||
## Decision 3: Bucket initialisation via Kubernetes Job with `minio/mc`
|
||||
|
||||
**Decision**: A one-off `Job` using `minio/mc:latest` runs `mc mb --ignore-existing` to create the bucket idempotently.
|
||||
**Rationale**: This is the standard in-cluster pattern. `--ignore-existing` makes the job safe to re-apply (exits 0 if bucket already exists). `restartPolicy: OnFailure` retries transient failures (e.g. MinIO not yet ready).
|
||||
**Alternatives considered**: Init container on the API pod — tightly couples bucket creation to API startup; a Job is cleaner and independently rerunnable.
|
||||
|
||||
## Decision 4: Ingress — single resource, `/api/` path before `/`
|
||||
|
||||
**Decision**: One `Ingress` resource with `ingressClassName: nginx`, two path entries in a single rule: `/api/` (Prefix) → API Service, `/` (Prefix) → UI Service; `/api/` must be listed first.
|
||||
**Rationale**: nginx ingress evaluates paths in declaration order; the more specific `/api/` prefix must appear before `/` or all traffic is routed to the UI. No path rewriting annotation is needed — the API already handles full `/api/v1/...` paths.
|
||||
**TLS**: cert-manager annotation `cert-manager.io/cluster-issuer: letsencrypt-prod` triggers automatic certificate provisioning into a K8s Secret named in `spec.tls[].secretName`. HTTP→HTTPS redirect is on by default when TLS is configured (`nginx.ingress.kubernetes.io/ssl-redirect: "true"` is explicit but redundant).
|
||||
**Alternatives considered**: Two separate Ingress resources (one per service) — works but harder to reason about routing order; single Ingress is canonical.
|
||||
|
||||
## Decision 5: Alembic init container — same image, workdir `/app`
|
||||
|
||||
**Decision**: The API Deployment includes an init container with the same image as the main container, `command: ["alembic", "upgrade", "head"]`, and `workingDir: /app`. It shares the API's env secret via `envFrom` so it can read `DATABASE_URL`.
|
||||
**Rationale**: Alembic needs `DATABASE_URL` to connect and `alembic.ini` + `alembic/` to find migrations. Both are available in the production image once `Dockerfile.prod` is updated. Using the same image guarantees the migration files match the running version.
|
||||
**Dockerfile.prod update required**: Add `COPY --chown=appuser:appgroup alembic/ ./alembic/` and `COPY --chown=appuser:appgroup alembic.ini .` in the runtime stage (not the builder stage — no compilation needed).
|
||||
**Alternatives considered**: Separate migration image — adds a second image to build and push on every release; unnecessary when the source image already has everything.
|
||||
|
||||
## Decision 6: Image tag strategy — placeholder `latest`, substituted at deploy time
|
||||
|
||||
**Decision**: Manifests reference image tags using `latest` as a documented placeholder. The operator substitutes the real tag with `kubectl set image` or a `sed` one-liner before applying.
|
||||
**Rationale**: Kustomize's `images` transformer is the clean alternative, but introduces a tooling dependency. For a personal single-operator deployment, `sed` or `kubectl set image` after `kubectl apply` is simpler and requires no additional setup. The placeholder is documented in the operator guide (quickstart.md).
|
||||
**Alternatives considered**: Kustomize overlays — appropriate for multi-environment setups; over-engineered for one environment.
|
||||
|
||||
## Decision 7: Two VaultStaticSecrets (API env and MinIO credentials)
|
||||
|
||||
**Decision**: Separate VaultStaticSecret resources for API env vars and MinIO root credentials, syncing into `api-env` and `minio-credentials` K8s Secrets respectively.
|
||||
**Rationale**: The API's env secret contains database, JWT, and S3 access credentials. MinIO's root credentials are a different concern with a different rotation lifecycle. Keeping them separate makes Vault policies simpler (least privilege) and avoids giving the API pod access to MinIO's root password.
|
||||
**Vault paths assumed**: `reactbin/api/config` (KV v2) for API env; `reactbin/minio/credentials` (KV v2) for MinIO root credentials.
|
||||
|
||||
## Decision 8: Namespace manifest included in `k8s/`
|
||||
|
||||
**Decision**: `k8s/namespace.yaml` creates the `reactbin` namespace as part of the manifest set.
|
||||
**Rationale**: Makes the full deployment self-contained — operator runs `kubectl apply -f k8s/` without a prerequisite namespace creation step.
|
||||
**Note**: If the namespace already exists, `kubectl apply` is idempotent.
|
||||
124
specs/013-k8s-manifests/spec.md
Normal file
124
specs/013-k8s-manifests/spec.md
Normal file
@@ -0,0 +1,124 @@
|
||||
# Feature Specification: Kubernetes Production Manifests
|
||||
|
||||
**Feature Branch**: `013-k8s-manifests`
|
||||
**Created**: 2026-05-07
|
||||
**Status**: Draft
|
||||
**Input**: User description: "Kubernetes manifests for production deployment to k3s: Deployment, Service, and Ingress for the API and UI; VaultStaticSecret CRDs to sync secrets from HashiCorp Vault; Alembic init container on the API Deployment for schema migrations. The cluster uses an nginx ingress controller with Let's Encrypt TLS, a shared external Postgres instance, MinIO running in-cluster, and VSO (Vault Secrets Operator) for secret management."
|
||||
|
||||
## User Scenarios & Testing *(mandatory)*
|
||||
|
||||
### User Story 1 — Application Reachable in Production (Priority: P1)
|
||||
|
||||
As an operator, I can apply the manifests to my k3s cluster and have both the API and UI reachable at the production domain over HTTPS, with all health checks passing.
|
||||
|
||||
**Why this priority**: This is the core deployment goal. Nothing else matters if the application is not reachable.
|
||||
|
||||
**Independent Test**: Apply the API and UI manifests with a manually-created K8s Secret (bypassing Vault). Confirm the UI loads at the domain root and the API health endpoint returns 200 at `/api/v1/health`. Confirm HTTPS is enforced and HTTP redirects to HTTPS.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the manifests are applied to the cluster, **When** a browser navigates to `https://<domain>/`, **Then** the UI loads successfully with a valid TLS certificate.
|
||||
2. **Given** the manifests are applied, **When** a request is made to `https://<domain>/api/v1/health`, **Then** a 200 response is returned.
|
||||
3. **Given** the API docs flag is disabled, **When** a request is made to `https://<domain>/docs`, **Then** a 404 is returned.
|
||||
4. **Given** the API pod is restarted, **When** it comes back up, **Then** it passes readiness checks before receiving traffic.
|
||||
5. **Given** a request for an unknown path, **When** it is made to the UI, **Then** the SPA serves the index page (client-side routing is preserved).
|
||||
|
||||
---
|
||||
|
||||
### User Story 2 — Secrets Sourced from Vault (Priority: P2)
|
||||
|
||||
As an operator, no secrets are stored in version-controlled manifest files. All sensitive values are declared in Vault and synced automatically into the cluster as Kubernetes Secrets by the Vault Secrets Operator.
|
||||
|
||||
**Why this priority**: Security prerequisite for production. Hardcoded secrets in manifests are a material risk.
|
||||
|
||||
**Independent Test**: Run `git grep` for known secret patterns across `k8s/` and confirm zero matches. Confirm VaultStaticSecret CRDs reference a Vault path and that the synced K8s Secret is created and the API pod's environment is populated from it.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** Vault contains the required secret values at the declared path, **When** VSO is running, **Then** a K8s Secret is created in the cluster namespace with the declared keys.
|
||||
2. **Given** the K8s Secret exists, **When** the API pod starts, **Then** its environment variables are populated from that secret.
|
||||
3. **Given** a `git grep` for plaintext credentials across `k8s/`, **When** run against the committed manifests, **Then** no plaintext secrets are found.
|
||||
|
||||
---
|
||||
|
||||
### User Story 3 — Schema Migrations Run Before API Starts (Priority: P3)
|
||||
|
||||
As an operator, every time the API is deployed, database migrations run automatically in an init container before the main application container starts. A failed migration prevents the pod from starting, protecting against schema drift.
|
||||
|
||||
**Why this priority**: Prevents the API from serving requests against a stale or incompatible schema. Safe deployment ordering is essential for production.
|
||||
|
||||
**Independent Test**: Deploy with the init container pointing at a valid database. Confirm migrations run and the API starts. Simulate a failing migration by pointing the init container at an unreachable database and confirm the pod stays in init state and does not serve traffic.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the API Deployment is applied, **When** the pod starts, **Then** the init container completes `alembic upgrade head` before the main container starts.
|
||||
2. **Given** the schema is already current, **When** the pod starts, **Then** the migration init container exits successfully with no changes applied.
|
||||
3. **Given** the migration fails, **When** the pod starts, **Then** the init container exits non-zero, the main container does not start, and the pod enters a visible error state.
|
||||
|
||||
---
|
||||
|
||||
### User Story 4 — MinIO Runs In-Cluster with Persistent Storage (Priority: P4)
|
||||
|
||||
As an operator, MinIO runs inside the cluster with a PersistentVolumeClaim for durable storage, is not externally reachable, and has the required bucket initialised on first deployment.
|
||||
|
||||
**Why this priority**: Required for image storage, but decoupled from the other manifests — the S3 endpoint is just a config value the API reads.
|
||||
|
||||
**Independent Test**: Confirm the MinIO pod is running and has no external Ingress. Confirm the required bucket exists. Restart the MinIO pod and confirm previously stored objects are still accessible.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the MinIO manifests are applied, **When** the MinIO pod starts, **Then** the required bucket is created and the API can store and retrieve images.
|
||||
2. **Given** the MinIO pod is restarted, **When** it comes back up, **Then** all previously stored objects remain accessible (PVC-backed storage persists).
|
||||
3. **Given** no Ingress is defined for MinIO, **When** a connection is attempted from outside the cluster, **Then** MinIO is not reachable.
|
||||
|
||||
---
|
||||
|
||||
### Edge Cases
|
||||
|
||||
- What if Vault is unavailable when VSO tries to sync? VSO retries on a configurable interval; the pod will not start until the K8s Secret exists.
|
||||
- What if the database is unreachable during migration? The init container exits non-zero; the pod does not start and Kubernetes retries with backoff.
|
||||
- What if the MinIO PVC runs out of space? MinIO will fail writes; the API will return upload errors. Capacity monitoring is out of scope for this feature.
|
||||
- What if migrations and the main container use different image tags? They use the same tag in the same Deployment spec, so they are always in sync.
|
||||
|
||||
## Requirements *(mandatory)*
|
||||
|
||||
### Functional Requirements
|
||||
|
||||
- **FR-001**: All manifests MUST target a single configurable namespace (default: `reactbin`).
|
||||
- **FR-002**: The API MUST be deployed as a Deployment with liveness and readiness probes on `/api/v1/health`.
|
||||
- **FR-003**: The API Deployment MUST include an init container using the same image that runs database schema migrations before the main container starts.
|
||||
- **FR-004**: The API Deployment MUST set `API_DOCS_ENABLED=false`.
|
||||
- **FR-005**: The UI MUST be deployed as a Deployment with a liveness probe confirming the nginx process is serving.
|
||||
- **FR-006**: A single Ingress MUST route `https://<domain>/api/` to the API Service and all other paths to the UI Service, with TLS termination via a cert-manager Let's Encrypt certificate.
|
||||
- **FR-007**: HTTP requests MUST be redirected to HTTPS via the Ingress.
|
||||
- **FR-008**: All API secrets MUST be declared in a VaultStaticSecret CRD and synced into a K8s Secret; no secret value MUST appear as plaintext in any manifest file.
|
||||
- **FR-009**: The API Deployment MUST source all environment variables from the synced K8s Secret via `envFrom`.
|
||||
- **FR-010**: MinIO MUST be deployed as a StatefulSet with a PersistentVolumeClaim using the cluster's default storage class.
|
||||
- **FR-011**: A Kubernetes Job MUST create the required S3 bucket in MinIO on first deployment and MUST be idempotent on re-apply.
|
||||
- **FR-012**: MinIO MUST have no Ingress; it MUST only be accessible within the cluster via ClusterIP.
|
||||
- **FR-013**: All containers MUST run as non-root users.
|
||||
- **FR-014**: The API production image MUST include migration files so the init container can run migrations without a separate image.
|
||||
|
||||
## Success Criteria *(mandatory)*
|
||||
|
||||
### Measurable Outcomes
|
||||
|
||||
- **SC-001**: The application is accessible at the production domain within 120 seconds of `kubectl apply`.
|
||||
- **SC-002**: Schema migrations complete and the API begins serving traffic without manual operator intervention on every deployment.
|
||||
- **SC-003**: A `git grep` across `k8s/` finds zero plaintext secret values in committed files.
|
||||
- **SC-004**: A simulated migration failure holds the pod in init state and the application never serves traffic.
|
||||
- **SC-005**: Restarting the MinIO pod does not result in data loss — previously uploaded images remain accessible.
|
||||
|
||||
## Assumptions
|
||||
|
||||
- The k3s cluster is running with the nginx ingress controller installed.
|
||||
- cert-manager is installed and a `ClusterIssuer` named `letsencrypt-prod` is already configured.
|
||||
- The Vault Secrets Operator is installed in the cluster.
|
||||
- A HashiCorp Vault instance is accessible from the cluster and the required secret values are stored at the declared Vault path before deployment.
|
||||
- A shared external PostgreSQL instance is available; the operator creates a dedicated database and user before deploying.
|
||||
- DNS for the production domain is already pointing at the cluster ingress IP.
|
||||
- Manifests are stored in a `k8s/` directory at the repository root.
|
||||
- The cluster's default storage class supports ReadWriteOnce (sufficient for single-replica MinIO).
|
||||
- All Deployments run a single replica (personal tool, no HA requirement).
|
||||
- Image tags are managed externally; manifests use a placeholder tag that the operator substitutes at deploy time.
|
||||
- The `API_DOCS_ENABLED` flag exists on the API (implemented in feature 012).
|
||||
174
specs/013-k8s-manifests/tasks.md
Normal file
174
specs/013-k8s-manifests/tasks.md
Normal file
@@ -0,0 +1,174 @@
|
||||
# Tasks: Kubernetes Production Manifests
|
||||
|
||||
**Input**: Design documents from `specs/013-k8s-manifests/`
|
||||
**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, contracts/operator-deploy.md ✅, quickstart.md ✅
|
||||
|
||||
**Tests**: K8s manifests have no unit test framework. Validation is via `yamllint` (format) and `kubectl apply --dry-run=client` (schema). Each phase ends with a validation step. The TDD analogue is: write the validate-k8s Makefile target (Phase 1) before any manifest exists, so it immediately fails — then manifests are written to make it pass.
|
||||
|
||||
**Organization**: Phase 1 creates the directory structure and validation target. Phase 2 creates the namespace and Vault CRDs (foundational — required by all user story deployments). Phases 3–6 implement user stories. Phase 7 polishes.
|
||||
|
||||
## Format: `[ID] [P?] [Story] Description`
|
||||
|
||||
- **[P]**: Can run in parallel with other [P] tasks in the same phase
|
||||
- **[Story]**: Which user story this task belongs to
|
||||
- Exact file paths included in every task description
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Setup
|
||||
|
||||
**Goal**: Create the `k8s/` directory structure and the validation Makefile target before any manifests exist.
|
||||
|
||||
- [X] T001 Create the `k8s/` directory tree: `mkdir -p k8s/api k8s/ui k8s/minio k8s/vault` from the repository root; confirm the four subdirectories exist
|
||||
|
||||
- [X] T002 Add a `validate-k8s` target to `Makefile` immediately after the existing `verify-ui-prod` target: the target MUST run `yamllint -d relaxed k8s/` then `kubectl apply --dry-run=client -f k8s/`; add `validate-k8s` to the `.PHONY` line; note in a comment that `kubectl apply --dry-run=client` requires a kubeconfig with cluster access — offline validation uses `yamllint` only
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Foundational (Namespace + Vault CRDs)
|
||||
|
||||
**Goal**: Namespace and Vault secret-sync resources that every other manifest depends on.
|
||||
|
||||
**⚠️ CRITICAL**: No user story manifest can be applied until this phase is complete — the namespace must exist before any namespaced resource, and the Vault CRDs must exist before the API or MinIO pods can start.
|
||||
|
||||
- [X] T003 Create `k8s/namespace.yaml`: a single `Namespace` resource with `name: reactbin` and no additional labels
|
||||
|
||||
- [X] T004 [P] Create `k8s/vault/vault-auth.yaml`: a `VaultAuth` resource (`apiVersion: secrets.hashicorp.com/v1beta1`) with `name: reactbin-auth`, `namespace: reactbin`, `spec.method: kubernetes`, `spec.mount: kubernetes`, `spec.kubernetes.role: reactbin`, `spec.kubernetes.serviceAccount: default`, `spec.kubernetes.audiences: [https://kubernetes.default.svc]`; add a comment noting the operator must create the Vault role and bind it to the `default` SA in the `reactbin` namespace with read access to both secret paths
|
||||
|
||||
- [X] T005 [P] Create `k8s/vault/api-secret.yaml`: a `VaultStaticSecret` resource with `name: api-secret`, `namespace: reactbin`, `spec.vaultAuthRef: reactbin-auth`, `spec.mount: secret`, `spec.type: kv-v2`, `spec.path: reactbin/api/config`, `spec.refreshAfter: 1h`, `spec.destination.name: api-env`, `spec.destination.create: true`; add a comment listing all required Vault keys: `DATABASE_URL`, `JWT_SECRET_KEY`, `OWNER_USERNAME`, `OWNER_PASSWORD`, `S3_ENDPOINT_URL`, `S3_BUCKET_NAME`, `S3_ACCESS_KEY_ID`, `S3_SECRET_ACCESS_KEY`, `API_BASE_URL`
|
||||
|
||||
- [X] T006 [P] Create `k8s/vault/minio-secret.yaml`: same structure as T005 but `name: minio-secret`, `spec.path: reactbin/minio/credentials`, `spec.destination.name: minio-credentials`; comment listing required Vault keys: `MINIO_ROOT_USER`, `MINIO_ROOT_PASSWORD`
|
||||
|
||||
**Checkpoint**: Foundational resources complete. User story implementation can now begin.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: User Story 1 — Application Reachable in Production (Priority: P1) 🎯 MVP
|
||||
|
||||
**Goal**: API and UI are deployed and reachable at the production domain via HTTPS with TLS from cert-manager.
|
||||
|
||||
**Independent Test**: Apply all Phase 2 + Phase 3 manifests. Confirm `kubectl get pods -n reactbin` shows api and ui pods Running. Confirm `curl https://<domain>/api/v1/health` returns 200 and `curl https://<domain>/` returns 200.
|
||||
|
||||
- [X] T007 [P] [US1] Create `k8s/api/service.yaml`: `Service`, `name: api`, `namespace: reactbin`, `type: ClusterIP`, `selector: {app: api}`, `ports: [{port: 8000, targetPort: 8000, name: http}]`
|
||||
|
||||
- [X] T008 [P] [US1] Create `k8s/ui/service.yaml`: `Service`, `name: ui`, `namespace: reactbin`, `type: ClusterIP`, `selector: {app: ui}`, `ports: [{port: 8080, targetPort: 8080, name: http}]`
|
||||
|
||||
- [X] T009 [P] [US1] Create `k8s/ui/deployment.yaml`: `Deployment`, `name: ui`, `namespace: reactbin`, 1 replica, `selector.matchLabels: {app: ui}`; container `name: ui`, `image: reactbin-ui:latest` (placeholder — operator substitutes real tag), `ports: [{containerPort: 8080}]`; `livenessProbe: {httpGet: {path: /, port: 8080}, initialDelaySeconds: 10, periodSeconds: 30}`; `securityContext: {runAsNonRoot: true, runAsUser: 101}` (UID 101 is the nginxinc/nginx-unprivileged user); add comment: `# Replace 'latest' with the real image tag before applying`
|
||||
|
||||
- [X] T010 [US1] Create `k8s/api/deployment.yaml`: `Deployment`, `name: api`, `namespace: reactbin`, 1 replica, `selector.matchLabels: {app: api}`; container `name: api`, `image: reactbin-api:latest` (placeholder), `ports: [{containerPort: 8000}]`; `envFrom: [{secretRef: {name: api-env}}]`; `env: [{name: API_DOCS_ENABLED, value: "false"}]`; `livenessProbe: {httpGet: {path: /api/v1/health, port: 8000}, initialDelaySeconds: 10, periodSeconds: 30}`; `readinessProbe: {httpGet: {path: /api/v1/health, port: 8000}, initialDelaySeconds: 5, periodSeconds: 10}`; `securityContext: {runAsNonRoot: true, runAsUser: 1001}`; add comment: `# initContainers block added in US3 (T015)`; add comment: `# Replace 'latest' with the real image tag before applying`
|
||||
|
||||
- [X] T011 [US1] Create `k8s/ingress.yaml`: `Ingress`, `name: reactbin`, `namespace: reactbin`; `annotations: {"cert-manager.io/cluster-issuer": "letsencrypt-prod", "nginx.ingress.kubernetes.io/ssl-redirect": "true"}`; `spec.ingressClassName: nginx`; `spec.tls: [{hosts: ["<your-domain>"], secretName: reactbin-tls}]`; `spec.rules: [{host: "<your-domain>", http: {paths: [{path: /api/, pathType: Prefix, backend: {service: {name: api, port: {number: 8000}}}}, {path: /, pathType: Prefix, backend: {service: {name: ui, port: {number: 8080}}}}]}}]`; IMPORTANT — `/api/` path entry MUST appear before `/` in the YAML (nginx evaluates in declaration order); add comment: `# Replace <your-domain> with the real domain before applying`
|
||||
|
||||
- [X] T012 [US1] Verify US1: run `yamllint -d relaxed k8s/` from the repository root and confirm no errors; run `kubectl apply --dry-run=client -f k8s/` (requires cluster kubeconfig) and confirm all resources in namespace.yaml, vault/, api/, ui/, and ingress.yaml are accepted; if no cluster is available, yamllint passing is sufficient for this checkpoint
|
||||
|
||||
**Checkpoint**: US1 complete. API and UI manifests are schema-valid and ready to apply.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: User Story 2 — Secrets Sourced from Vault (Priority: P2)
|
||||
|
||||
**Goal**: Confirm that no plaintext secret values appear in any committed manifest file. The implementation (VaultAuth + VaultStaticSecret × 2) was completed in Phase 2.
|
||||
|
||||
**Independent Test**: `git grep` across `k8s/` finds no plaintext credential values.
|
||||
|
||||
- [X] T013 [US2] Verify US2: run `git grep -rn "password\|secret_key\|access_key\|DATABASE_URL" k8s/` and confirm that only key names (in comments) and Vault path references appear — no actual values; also confirm that `k8s/vault/api-secret.yaml` and `k8s/vault/minio-secret.yaml` reference Vault paths under `spec.path` and that `spec.destination.create: true` is set so VSO creates the K8s Secrets
|
||||
|
||||
**Checkpoint**: US2 complete. Zero plaintext secrets in manifests; all secrets flow through Vault.
|
||||
|
||||
---
|
||||
|
||||
## Phase 5: User Story 3 — Schema Migrations Run Before API Starts (Priority: P3)
|
||||
|
||||
**Goal**: The API Deployment includes an Alembic init container. `api/Dockerfile.prod` is updated to include migration files.
|
||||
|
||||
**Independent Test**: `docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:test` succeeds and `docker run --rm reactbin-api-prod:test ls /app/alembic` shows migration files. `make validate-k8s` confirms the init container spec is accepted by the Kubernetes schema.
|
||||
|
||||
- [X] T014 [US3] Update `api/Dockerfile.prod`: in the **runtime stage** (the `FROM python:3.12-slim` stage), after the line `COPY --chown=appuser:appgroup app/ ./app/`, add two new lines: `COPY --chown=appuser:appgroup alembic/ ./alembic/` and `COPY --chown=appuser:appgroup alembic.ini .`; the builder stage is unchanged; verify with `docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:test && docker run --rm reactbin-api-prod:test ls /app/alembic /app/alembic.ini`
|
||||
|
||||
- [X] T015 [US3] Update `k8s/api/deployment.yaml`: add an `initContainers` block to the pod spec (before the `containers` block) containing one init container: `name: alembic-migrate`, `image: reactbin-api:latest` (same placeholder tag as the main container), `command: ["alembic", "upgrade", "head"]`, `workingDir: /app`, `envFrom: [{secretRef: {name: api-env}}]`, `securityContext: {runAsNonRoot: true, runAsUser: 1001}`; remove the `# initContainers block added in US3 (T015)` comment added in T010
|
||||
|
||||
- [X] T016 [US3] Verify US3: run `make validate-k8s` (or `yamllint -d relaxed k8s/`) and confirm the updated deployment.yaml with the init container passes validation; run `docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:test` and confirm it succeeds; run `docker run --rm reactbin-api-prod:test ls /app/alembic.ini` and confirm the file is present
|
||||
|
||||
**Checkpoint**: US3 complete. API Deployment includes Alembic init container; production image includes migration files.
|
||||
|
||||
---
|
||||
|
||||
## Phase 6: User Story 4 — MinIO In-Cluster with Persistent Storage (Priority: P4)
|
||||
|
||||
**Goal**: MinIO runs as a StatefulSet with a PVC, is accessible only within the cluster, and has the required bucket created by a Job.
|
||||
|
||||
**Independent Test**: `make validate-k8s` confirms all MinIO manifests pass schema validation. On a live cluster: MinIO pod reaches Running state, bucket exists, no external Ingress for MinIO.
|
||||
|
||||
- [X] T017 [P] [US4] Create `k8s/minio/service.yaml`: `Service`, `name: minio`, `namespace: reactbin`, `type: ClusterIP`, `selector: {app: minio}`, `ports: [{port: 9000, targetPort: 9000, name: s3}]`; add comment: `# No Ingress for MinIO — internal access only (FR-012)`
|
||||
|
||||
- [X] T018 [US4] Create `k8s/minio/statefulset.yaml`: `StatefulSet` (NOT Deployment — StatefulSet ensures stable PVC binding on pod recreation), `name: minio`, `namespace: reactbin`, `replicas: 1`, `selector.matchLabels: {app: minio}`, `serviceName: minio`; pod `securityContext: {runAsUser: 1000, runAsGroup: 1000, fsGroup: 1000}`; container `name: minio`, `image: minio/minio:latest`, `args: ["server", "/data", "--console-address", ":9001"]`, `ports: [{containerPort: 9000, name: s3}]`; `env: [{name: MINIO_ROOT_USER, valueFrom: {secretKeyRef: {name: minio-credentials, key: MINIO_ROOT_USER}}}, {name: MINIO_ROOT_PASSWORD, valueFrom: {secretKeyRef: {name: minio-credentials, key: MINIO_ROOT_PASSWORD}}}]`; `livenessProbe: {httpGet: {path: /minio/health/live, port: 9000}, initialDelaySeconds: 30, periodSeconds: 20}`; `readinessProbe: {httpGet: {path: /minio/health/ready, port: 9000}, initialDelaySeconds: 15, periodSeconds: 10}`; `volumeMounts: [{name: minio-data, mountPath: /data}]`; `volumeClaimTemplates: [{metadata: {name: minio-data}, spec: {accessModes: [ReadWriteOnce], resources: {requests: {storage: 10Gi}}}}]`; add comment: `# storageClassName omitted — uses cluster default; override if needed`
|
||||
|
||||
- [X] T019 [US4] Create `k8s/minio/init-job.yaml`: `Job`, `name: minio-init-bucket`, `namespace: reactbin`; `spec.template.spec.restartPolicy: OnFailure`; container `name: mc`, `image: minio/mc:latest`, `command: ["sh", "-c"]`, `args: ["mc alias set local http://minio.reactbin.svc.cluster.local:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD && mc mb --ignore-existing local/reactbin"]`; `env: [{name: MINIO_ROOT_USER, valueFrom: {secretKeyRef: {name: minio-credentials, key: MINIO_ROOT_USER}}}, {name: MINIO_ROOT_PASSWORD, valueFrom: {secretKeyRef: {name: minio-credentials, key: MINIO_ROOT_PASSWORD}}}]`; `securityContext: {runAsNonRoot: false}` with comment `# minio/mc runs as root by default; FR-013 exception for this one-off init Job`; add comment: `# --ignore-existing makes this Job idempotent — safe to re-apply`
|
||||
|
||||
- [X] T020 [US4] Verify US4: run `make validate-k8s` (or `yamllint -d relaxed k8s/`) and confirm all three MinIO manifests (statefulset.yaml, service.yaml, init-job.yaml) pass validation; confirm no Ingress resource references MinIO
|
||||
|
||||
**Checkpoint**: All four user stories complete.
|
||||
|
||||
---
|
||||
|
||||
## Phase 7: Polish & Cross-Cutting Concerns
|
||||
|
||||
- [X] T021 [P] Run `yamllint -d relaxed k8s/` from the repository root and fix any YAML formatting violations across all 12 manifest files; confirm output shows no errors
|
||||
|
||||
- [X] T022 [P] Add `.yamllint.yml` at the repository root (if not already present) with `extends: relaxed` and `rules: {line-length: {max: 120}}` to keep line length reasonable for verbose K8s YAML
|
||||
|
||||
- [X] T023 Run `make build-prod` to confirm `api/Dockerfile.prod` still builds cleanly after the T014 addition; run `docker run --rm reactbin-api-prod:latest ls /app/alembic.ini /app/alembic/` and confirm both are present in the production image
|
||||
|
||||
---
|
||||
|
||||
## Dependencies & Execution Order
|
||||
|
||||
- T001 and T002 can run in parallel (directory creation vs Makefile edit)
|
||||
- T003, T004, T005, T006 can run in parallel after T001 (different files, same phase)
|
||||
- T007, T008, T009 can run in parallel after Phase 2 completes
|
||||
- T010 after T007 (deployment references service name, easier to write with service done) — but they're different files so technically parallel; keep sequential for clarity
|
||||
- T011 after T007 and T008 (Ingress references both service names)
|
||||
- T012 after T007–T011
|
||||
- T013 after Phase 2 (Vault CRDs exist to inspect)
|
||||
- T014 and T015 can run in parallel (different files: Dockerfile.prod vs deployment.yaml)
|
||||
- T016 after T014 and T015
|
||||
- T017, T018, T019 can run in parallel after Phase 2 completes
|
||||
- T020 after T017–T019
|
||||
- T021, T022, T023 can run in parallel
|
||||
|
||||
### Execution Order Summary
|
||||
|
||||
```
|
||||
Step 1: T001 ∥ T002 (setup)
|
||||
Step 2: T003 ∥ T004 ∥ T005 ∥ T006 (foundational: namespace + Vault CRDs)
|
||||
Step 3: T007 ∥ T008 ∥ T009 (US1: services + UI deployment)
|
||||
Step 4: T010 (US1: API deployment)
|
||||
Step 5: T011 (US1: Ingress)
|
||||
Step 6: T012 (US1: validate)
|
||||
Step 7: T013 (US2: verify no plaintext secrets)
|
||||
Step 8: T014 ∥ T015 (US3: Dockerfile.prod + init container)
|
||||
Step 9: T016 (US3: verify)
|
||||
Step 10: T017 ∥ T018 ∥ T019 (US4: MinIO manifests)
|
||||
Step 11: T020 (US4: validate MinIO)
|
||||
Step 12: T021 ∥ T022 ∥ T023 (polish)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Strategy
|
||||
|
||||
### MVP (US1 + US2 — application is reachable with Vault-backed secrets)
|
||||
|
||||
1. Phase 1 (Setup) + Phase 2 (Foundational)
|
||||
2. Phase 3 (US1 — API, UI, Ingress)
|
||||
3. Phase 4 (US2 — verify no plaintext secrets)
|
||||
4. **STOP and VALIDATE**: apply to cluster, confirm `https://<domain>/` and `/api/v1/health` return 200
|
||||
5. Deploy MVP
|
||||
|
||||
### Incremental Delivery
|
||||
|
||||
1. Setup + Foundational → Apply → namespace and Vault sync ready
|
||||
2. Add US1 (API + UI + Ingress) → Deploy → application reachable at domain
|
||||
3. Add US3 (Alembic init container) → Deploy → migrations run automatically on rollout
|
||||
4. Add US4 (MinIO) → Deploy → persistent image storage in-cluster
|
||||
5. Polish → clean YAML, confirmed builds
|
||||
@@ -7,3 +7,5 @@ coverage/
|
||||
.env.*
|
||||
!.env.example
|
||||
*.log
|
||||
*.spec.ts
|
||||
tests/
|
||||
|
||||
30
ui/Dockerfile.prod
Normal file
30
ui/Dockerfile.prod
Normal file
@@ -0,0 +1,30 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
|
||||
# ════════════════════════════════════════════════
|
||||
# Build stage: install deps and compile Angular app
|
||||
# ════════════════════════════════════════════════
|
||||
FROM node:22-slim AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Layer cache split: deps only (changes rarely)
|
||||
COPY package.json package-lock.json ./
|
||||
RUN --mount=type=cache,target=/root/.npm \
|
||||
npm ci
|
||||
|
||||
# Layer cache split: source (changes often)
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
# ════════════════════════════════════════════════
|
||||
# Runtime stage: minimal nginx serving static assets
|
||||
# ════════════════════════════════════════════════
|
||||
FROM nginxinc/nginx-unprivileged:alpine
|
||||
|
||||
COPY --from=builder /app/dist/reactbin-ui/browser /usr/share/nginx/html
|
||||
COPY nginx.conf /etc/nginx/conf.d/default.conf
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
|
||||
CMD wget -qO- http://localhost:8080/ || exit 1
|
||||
22
ui/nginx.conf
Normal file
22
ui/nginx.conf
Normal file
@@ -0,0 +1,22 @@
|
||||
server {
|
||||
listen 8080;
|
||||
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
# SPA fallback: all unmatched paths → app shell
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
# Long-lived cache for fingerprinted assets
|
||||
location ~* \.(js|css|woff2?|ttf|eot|svg|png|jpg|jpeg|gif|ico)$ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
# Never cache the entry point
|
||||
location = /index.html {
|
||||
add_header Cache-Control "no-store, no-cache, must-revalidate";
|
||||
}
|
||||
}
|
||||
0
ui/tests/build/.gitkeep
Normal file
0
ui/tests/build/.gitkeep
Normal file
100
ui/tests/build/verify_production_image.sh
Executable file
100
ui/tests/build/verify_production_image.sh
Executable file
@@ -0,0 +1,100 @@
|
||||
#!/usr/bin/env bash
|
||||
# TDD verification script for ui/Dockerfile.prod
|
||||
# Fails (red) if Dockerfile.prod does not exist or any check fails.
|
||||
set -euo pipefail
|
||||
|
||||
IMAGE="reactbin-ui-prod:verify-$$"
|
||||
IMAGE2="reactbin-ui-prod:verify-cache-$$"
|
||||
APP_CONTAINER=""
|
||||
|
||||
cleanup() {
|
||||
[ -n "$APP_CONTAINER" ] && docker rm -f "$APP_CONTAINER" 2>/dev/null || true
|
||||
docker rmi "$IMAGE" 2>/dev/null || true
|
||||
docker rmi "$IMAGE2" 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# ── US1 check 1: build ────────────────────────────────────────────────────────
|
||||
echo "[verify] Building $IMAGE..."
|
||||
docker build -f ui/Dockerfile.prod ui/ -t "$IMAGE"
|
||||
echo "[verify] Build OK"
|
||||
|
||||
# ── US1 check 2: start container ──────────────────────────────────────────────
|
||||
echo "[verify] Starting production container..."
|
||||
APP_CONTAINER=$(docker run -d -p 18080:8080 "$IMAGE")
|
||||
|
||||
# ── US1 check 3: health endpoint ──────────────────────────────────────────────
|
||||
echo "[verify] Polling health endpoint..."
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf http://localhost:18080/ > /dev/null; then break; fi
|
||||
sleep 1
|
||||
if [[ $i -eq 30 ]]; then echo "FAIL: health check timed out after 30s"; exit 1; fi
|
||||
done
|
||||
echo "[verify] Health check passed"
|
||||
|
||||
# ── US1 check 4: SPA routing ──────────────────────────────────────────────────
|
||||
if ! curl -sf http://localhost:18080/library > /dev/null; then
|
||||
echo "FAIL: SPA routing check failed (/library did not return 200)"; exit 1
|
||||
fi
|
||||
echo "[verify] SPA routing OK (/library → 200)"
|
||||
|
||||
# ── US2 check 1: non-root user ────────────────────────────────────────────────
|
||||
UID_IN_CONTAINER=$(docker exec "$APP_CONTAINER" id -u)
|
||||
if [[ "$UID_IN_CONTAINER" -eq 0 ]]; then
|
||||
echo "FAIL: process running as root (UID 0)"; exit 1
|
||||
fi
|
||||
echo "[verify] Non-root user OK (UID $UID_IN_CONTAINER)"
|
||||
|
||||
# ── C1: stdout/stderr log capture ─────────────────────────────────────────────
|
||||
LOGS=$(docker logs "$APP_CONTAINER" 2>&1)
|
||||
if [[ -z "$LOGS" ]]; then
|
||||
echo "FAIL: no output on stdout/stderr"; exit 1
|
||||
fi
|
||||
echo "[verify] Stdout logging OK"
|
||||
|
||||
# ── FR-008: cache-control headers on fingerprinted assets ─────────────────────
|
||||
JS_FILE=$(docker run --rm "$IMAGE" ls /usr/share/nginx/html | grep -E '\.js$' | head -1)
|
||||
if [[ -n "$JS_FILE" ]]; then
|
||||
CACHE_HEADER=$(curl -sI "http://localhost:18080/${JS_FILE}" | grep -i "cache-control" || true)
|
||||
if ! echo "$CACHE_HEADER" | grep -qi "immutable\|max-age=31536000"; then
|
||||
echo "FAIL: cache-control header not set on fingerprinted asset ${JS_FILE}"; exit 1
|
||||
fi
|
||||
echo "[verify] Cache-Control header OK"
|
||||
else
|
||||
echo "[verify] Cache-Control header check skipped (no .js file found at root)"
|
||||
fi
|
||||
|
||||
# ── US1 check 5: SIGTERM → exit 0 ────────────────────────────────────────────
|
||||
docker stop "$APP_CONTAINER" > /dev/null
|
||||
EXIT_CODE=$(docker wait "$APP_CONTAINER")
|
||||
if [[ "$EXIT_CODE" -ne 0 ]]; then
|
||||
echo "FAIL: non-zero exit code $EXIT_CODE after SIGTERM"; exit 1
|
||||
fi
|
||||
echo "[verify] Graceful shutdown OK (exit $EXIT_CODE)"
|
||||
|
||||
# ── US2 check 2: Node.js absent from runtime image ───────────────────────────
|
||||
set +e
|
||||
docker run --rm "$IMAGE" node --version 2>/dev/null
|
||||
NODE_EXIT=$?
|
||||
set -e
|
||||
if [[ "$NODE_EXIT" -eq 0 ]]; then
|
||||
echo "FAIL: node runtime found in production image"; exit 1
|
||||
fi
|
||||
echo "[verify] Node.js absent in runtime image OK"
|
||||
|
||||
# ── C2: no hardcoded secrets in image layers ─────────────────────────────────
|
||||
if docker history --no-trunc "$IMAGE" 2>&1 | grep -qiE "(password|secret_key|api_key|token)"; then
|
||||
echo "FAIL: potential secret found in image history"; exit 1
|
||||
fi
|
||||
echo "[verify] No secrets in image layers OK"
|
||||
|
||||
# ── US3: dep layer cached on source-only rebuild ─────────────────────────────
|
||||
echo "[verify] Testing cache hit on source-only rebuild..."
|
||||
touch ui/src/app/app.component.ts
|
||||
BUILD2_OUTPUT=$(docker build --progress=plain -f ui/Dockerfile.prod ui/ -t "$IMAGE2" 2>&1)
|
||||
if ! echo "$BUILD2_OUTPUT" | grep -q "CACHED"; then
|
||||
echo "FAIL: dependency layer not reused on source-only rebuild"; exit 1
|
||||
fi
|
||||
echo "[verify] Dep layer cache hit confirmed (US3 OK)"
|
||||
|
||||
echo "[verify] All checks passed (US1 + US2 + US3)."
|
||||
Reference in New Issue
Block a user