diff --git a/.gitignore b/.gitignore index bc7b343..088b19f 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ venv/ *.egg-info/ dist/ build/ +!api/tests/build/ .pytest_cache/ .ruff_cache/ .coverage diff --git a/.specify/feature.json b/.specify/feature.json index 82ccf46..6894ea8 100644 --- a/.specify/feature.json +++ b/.specify/feature.json @@ -1,3 +1,3 @@ { - "feature_directory": "specs/009-login-rate-limiting" + "feature_directory": "specs/010-api-prod-dockerfile" } diff --git a/CLAUDE.md b/CLAUDE.md index 3a03eb1..0a98a7b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,5 +1,5 @@ For additional context about technologies to be used, project structure, shell commands, and other important information, read the current plan at -`specs/009-login-rate-limiting/plan.md`. +`specs/010-api-prod-dockerfile/plan.md`. diff --git a/Makefile b/Makefile index 52c8e29..855e0ab 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,13 @@ -.PHONY: test-unit test-integration +.PHONY: test-unit test-integration build-prod verify-prod test-unit: cd api && python -m pytest tests/unit/ -v test-integration: docker compose -f docker-compose.test.yml run --rm api-test + +build-prod: + docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:latest + +verify-prod: + bash api/tests/build/verify_production_image.sh diff --git a/api/.dockerignore b/api/.dockerignore index 691316d..c32eeb3 100644 --- a/api/.dockerignore +++ b/api/.dockerignore @@ -12,3 +12,6 @@ dist/ .env .env.* !.env.example +tests/ +alembic/ +alembic.ini diff --git a/api/Dockerfile.prod b/api/Dockerfile.prod new file mode 100644 index 0000000..63d2c28 --- /dev/null +++ b/api/Dockerfile.prod @@ -0,0 +1,51 @@ +# syntax=docker/dockerfile:1 + +# ════════════════════════════════════════════════ +# Build stage: install production deps via uv +# ════════════════════════════════════════════════ +FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder + +WORKDIR /app + +ENV UV_COMPILE_BYTECODE=1 \ + UV_LINK_MODE=copy \ + UV_PYTHON_DOWNLOADS=never + +# Layer cache split: deps only (changes rarely) +COPY pyproject.toml uv.lock ./ +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --frozen --no-dev --no-install-project + +# Layer cache split: source (changes often) +COPY app/ ./app/ + +# ════════════════════════════════════════════════ +# Runtime stage: lean image with venv + source +# ════════════════════════════════════════════════ +FROM python:3.12-slim + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y --no-install-recommends curl \ + && rm -rf /var/lib/apt/lists/* + +RUN groupadd --system --gid 1001 appgroup \ + && useradd --system --uid 1001 --gid 1001 --no-create-home appuser + +COPY --from=builder --chown=appuser:appgroup /app/.venv /app/.venv +COPY --chown=appuser:appgroup app/ ./app/ + +USER appuser + +ENV PATH="/app/.venv/bin:$PATH" + +EXPOSE 8000 + +HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ + CMD curl -f http://localhost:8000/api/v1/health || exit 1 + +CMD ["uvicorn", "app.main:app", \ + "--host", "0.0.0.0", \ + "--port", "8000", \ + "--timeout-graceful-shutdown", "30"] diff --git a/api/tests/build/.gitkeep b/api/tests/build/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/api/tests/build/verify_production_image.sh b/api/tests/build/verify_production_image.sh new file mode 100755 index 0000000..994826b --- /dev/null +++ b/api/tests/build/verify_production_image.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# TDD verification script for api/Dockerfile.prod +# Fails (red) if Dockerfile.prod does not exist or any check fails. +set -euo pipefail + +IMAGE="reactbin-api-prod:verify-$$" +IMAGE2="reactbin-api-prod:verify-cache-$$" +PG_CONTAINER="" +APP_CONTAINER="" + +cleanup() { + [ -n "$APP_CONTAINER" ] && docker rm -f "$APP_CONTAINER" 2>/dev/null || true + [ -n "$PG_CONTAINER" ] && docker rm -f "$PG_CONTAINER" 2>/dev/null || true + docker rmi "$IMAGE" 2>/dev/null || true + docker rmi "$IMAGE2" 2>/dev/null || true +} +trap cleanup EXIT + +# ── US1 check 1: build ──────────────────────────────────────────────────────── +echo "[verify] Building $IMAGE..." +docker build -f api/Dockerfile.prod api/ -t "$IMAGE" +echo "[verify] Build OK" + +# ── US1 check 2: start with a throwaway postgres ────────────────────────────── +echo "[verify] Starting postgres..." +PG_CONTAINER=$(docker run -d \ + -e POSTGRES_DB=reactbin_verify \ + -e POSTGRES_USER=verify \ + -e POSTGRES_PASSWORD=verify \ + postgres:16-alpine) + +for i in $(seq 1 30); do + if docker exec "$PG_CONTAINER" pg_isready -U verify -q 2>/dev/null; then break; fi + sleep 1 + if [[ $i -eq 30 ]]; then echo "FAIL: postgres did not become ready"; exit 1; fi +done + +PG_IP=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$PG_CONTAINER") + +echo "[verify] Starting production container..." +APP_CONTAINER=$(docker run -d \ + -p 18000:8000 \ + -e JWT_SECRET_KEY=verify-key \ + -e OWNER_USERNAME=testowner \ + -e OWNER_PASSWORD=testpassword \ + -e DATABASE_URL="postgresql+asyncpg://verify:verify@${PG_IP}:5432/reactbin_verify" \ + -e S3_ENDPOINT_URL=http://noop:9000 \ + -e S3_BUCKET_NAME=noop \ + -e S3_ACCESS_KEY_ID=noop \ + -e S3_SECRET_ACCESS_KEY=noop \ + -e S3_REGION=us-east-1 \ + "$IMAGE") + +# ── US1 check 3: health endpoint ────────────────────────────────────────────── +echo "[verify] Polling health endpoint..." +for i in $(seq 1 30); do + if curl -sf http://localhost:18000/api/v1/health > /dev/null; then break; fi + sleep 1 + if [[ $i -eq 30 ]]; then echo "FAIL: health check timed out after 30s"; exit 1; fi +done +echo "[verify] Health check passed" + +# ── US2 check 1: non-root user ──────────────────────────────────────────────── +UID_IN_CONTAINER=$(docker exec "$APP_CONTAINER" id -u) +if [[ "$UID_IN_CONTAINER" -eq 0 ]]; then + echo "FAIL: process running as root (UID 0)"; exit 1 +fi +echo "[verify] Non-root user OK (UID $UID_IN_CONTAINER)" + +# ── C1: stdout/stderr log capture ──────────────────────────────────────────── +LOGS=$(docker logs "$APP_CONTAINER" 2>&1) +if [[ -z "$LOGS" ]]; then + echo "FAIL: no output on stdout/stderr"; exit 1 +fi +if ! echo "$LOGS" | grep -qiE "(started server|application startup complete|uvicorn)"; then + echo "FAIL: no startup logs found on stdout/stderr"; exit 1 +fi +echo "[verify] Stdout logging OK" + +# ── US1 check 4: SIGTERM → exit 0 ──────────────────────────────────────────── +docker stop "$APP_CONTAINER" > /dev/null +EXIT_CODE=$(docker wait "$APP_CONTAINER") +if [[ "$EXIT_CODE" -ne 0 ]]; then + echo "FAIL: non-zero exit code $EXIT_CODE after SIGTERM"; exit 1 +fi +echo "[verify] Graceful shutdown OK (exit $EXIT_CODE)" + +# ── US2 check 2: dev deps absent ───────────────────────────────────────────── +if docker run --rm "$IMAGE" /app/.venv/bin/python -c "import pytest" 2>/dev/null; then + echo "FAIL: pytest importable in production image (dev deps present)"; exit 1 +fi +echo "[verify] Dev deps absent OK" + +# ── C2: no hardcoded secrets in image layers ───────────────────────────────── +if docker history --no-trunc "$IMAGE" 2>&1 | grep -qiE "(password|secret_key|api_key|token)"; then + echo "FAIL: potential secret found in image history"; exit 1 +fi +echo "[verify] No secrets in image layers OK" + +# ── C3: missing env var → non-zero exit ────────────────────────────────────── +set +e +docker run --rm -e JWT_SECRET_KEY=verify-key "$IMAGE" 2>/dev/null +MISSING_ENV_EXIT=$? +set -e +if [[ "$MISSING_ENV_EXIT" -eq 0 ]]; then + echo "FAIL: container exited 0 despite missing OWNER_USERNAME"; exit 1 +fi +echo "[verify] Missing-env-var exit check OK (exit $MISSING_ENV_EXIT)" + +# ── US3: dep layer cached on source-only rebuild ────────────────────────────── +echo "[verify] Testing cache hit on source-only rebuild..." +touch api/app/main.py +BUILD2_OUTPUT=$(docker build --progress=plain -f api/Dockerfile.prod api/ -t "$IMAGE2" 2>&1) +if ! echo "$BUILD2_OUTPUT" | grep -q "CACHED"; then + echo "FAIL: dependency layer not reused on source-only rebuild"; exit 1 +fi +echo "[verify] Dep layer cache hit confirmed (US3 OK)" + +echo "[verify] All checks passed (US1 + US2 + US3)." diff --git a/specs/010-api-prod-dockerfile/checklists/requirements.md b/specs/010-api-prod-dockerfile/checklists/requirements.md new file mode 100644 index 0000000..1467b95 --- /dev/null +++ b/specs/010-api-prod-dockerfile/checklists/requirements.md @@ -0,0 +1,34 @@ +# Specification Quality Checklist: Production-Grade API Container Image + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-05-07 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [X] No implementation details (languages, frameworks, APIs) +- [X] Focused on user value and business needs +- [X] Written for non-technical stakeholders +- [X] All mandatory sections completed + +## Requirement Completeness + +- [X] No [NEEDS CLARIFICATION] markers remain +- [X] Requirements are testable and unambiguous +- [X] Success criteria are measurable +- [X] Success criteria are technology-agnostic (no implementation details) +- [X] All acceptance scenarios are defined +- [X] Edge cases are identified +- [X] Scope is clearly bounded +- [X] Dependencies and assumptions identified + +## Feature Readiness + +- [X] All functional requirements have clear acceptance criteria +- [X] User scenarios cover primary flows +- [X] Feature meets measurable outcomes defined in Success Criteria +- [X] No implementation details leak into specification + +## Notes + +- All items pass. Ready for `/speckit-plan`. diff --git a/specs/010-api-prod-dockerfile/contracts/container.md b/specs/010-api-prod-dockerfile/contracts/container.md new file mode 100644 index 0000000..7c95b36 --- /dev/null +++ b/specs/010-api-prod-dockerfile/contracts/container.md @@ -0,0 +1,122 @@ +# Contract: Production API Container Image + +This document defines the observable interface of the `reactbin-api-prod` container image. Any orchestration layer (Kubernetes manifests, Docker Compose, CI pipeline) MUST be written against this contract. + +--- + +## Network Interface + +| Property | Value | +|----------|-------| +| Protocol | HTTP/1.1 | +| Port | 8000 (TCP) | +| Bind address | `0.0.0.0` (all interfaces inside the container) | + +--- + +## Health Check + +The container exposes a health check at the existing API health endpoint: + +``` +GET /api/v1/health +``` + +**Success response** (`200 OK`): +```json +{ "status": "ok" } +``` + +The container declares a built-in `HEALTHCHECK` with the following defaults: + +| Parameter | Value | +|-----------|-------| +| Interval | 30s | +| Timeout | 5s | +| Start period | 10s | +| Retries | 3 | + +Orchestrators that define their own probes (e.g. Kubernetes `livenessProbe` / `readinessProbe`) SHOULD use this same endpoint. + +--- + +## Required Environment Variables + +All configuration is supplied at runtime via environment variables. The image contains no defaults for secret or environment-specific values. + +| Variable | Description | Example | +|----------|-------------|---------| +| `JWT_SECRET_KEY` | HS256 signing key for bearer tokens | `change-me-long-random-string` | +| `OWNER_USERNAME` | Username of the single owner account | `owner` | +| `OWNER_PASSWORD` | Password of the single owner account | `change-me` | +| `DATABASE_URL` | PostgreSQL connection URL (asyncpg scheme) | `postgresql+asyncpg://user:pass@host:5432/db` | +| `S3_ENDPOINT_URL` | S3-compatible object storage endpoint | `https://s3.amazonaws.com` | +| `S3_BUCKET_NAME` | Storage bucket name | `reactbin-prod` | +| `S3_ACCESS_KEY_ID` | Storage access key | `AKIAIOSFODNN7EXAMPLE` | +| `S3_SECRET_ACCESS_KEY` | Storage secret key | `wJalrXUtnFEMI/K7MDENG` | +| `S3_REGION` | Storage region | `us-east-1` | + +**Optional environment variables** (safe defaults apply): + +| Variable | Default | Description | +|----------|---------|-------------| +| `JWT_EXPIRY_SECONDS` | `86400` | Token lifetime in seconds | +| `MAX_UPLOAD_BYTES` | `52428800` | Maximum upload file size (50 MB) | +| `LOGIN_MAX_FAILURES` | `5` | Brute-force lock threshold | +| `LOGIN_WINDOW_SECONDS` | `300` | Failure counting window | +| `LOGIN_COOLDOWN_SECONDS` | `900` | Lock duration after threshold | +| `LOGIN_TRUSTED_PROXY_IPS` | `` | Comma-separated trusted proxy CIDRs | +| `API_BASE_URL` | _(not required at runtime)_ | Used only by client tooling | + +**Startup failure behaviour**: If a required variable is absent, the application exits with a non-zero code before accepting any requests. The error is logged to stderr identifying the missing variable. + +--- + +## Signal Handling + +| Signal | Behaviour | +|--------|-----------| +| `SIGTERM` | Stop accepting new connections; drain in-flight requests; exit 0 within 30s | +| `SIGKILL` | Immediate termination (OS-level; no graceful drain possible) | + +Kubernetes should configure `terminationGracePeriodSeconds ≥ 30` to allow the full drain window. + +--- + +## Process Identity + +| Property | Value | +|----------|-------| +| User | `appuser` | +| UID | `1001` | +| GID | `1001` | +| Root privileges | None | + +The container MUST NOT be run with `--privileged` or as UID 0. + +--- + +## Filesystem + +- **Working directory**: `/app` +- **Application source**: `/app/app/` +- **Virtual environment**: `/app/.venv/` +- **No writable state**: The container requires no persistent local storage. All state is in PostgreSQL and S3. +- **Read-only root**: The container is compatible with `--read-only` (no writes to the filesystem at runtime). + +--- + +## Logging + +All log output is written to **stdout** (info/debug) and **stderr** (warnings/errors). No log files are written inside the container. The container runtime log driver captures all output without additional configuration. + +--- + +## Image Tags + +| Tag pattern | Meaning | +|-------------|---------| +| `reactbin-api-prod:latest` | Latest build from `master` | +| `reactbin-api-prod:` | Immutable build for a specific commit | + +Deployments SHOULD pin to a specific git SHA tag, not `latest`. diff --git a/specs/010-api-prod-dockerfile/plan.md b/specs/010-api-prod-dockerfile/plan.md new file mode 100644 index 0000000..5a63fdb --- /dev/null +++ b/specs/010-api-prod-dockerfile/plan.md @@ -0,0 +1,242 @@ +# Implementation Plan: Production-Grade API Container Image + +**Branch**: `010-api-prod-dockerfile` | **Date**: 2026-05-07 | **Spec**: [spec.md](spec.md) +**Input**: Feature specification from `specs/010-api-prod-dockerfile/spec.md` + +## Summary + +Produce a production-ready `api/Dockerfile.prod` using a two-stage build: a uv builder stage that installs lockfile-pinned, production-only dependencies into a virtual environment, and a lean `python:3.12-slim` runtime stage that contains only the venv, application source, and `curl` for health checks. The runtime process runs as a non-root user (UID 1001), handles SIGTERM gracefully via uvicorn's built-in drain, and logs exclusively to stdout/stderr. Behavioral verification is automated via a shell script (`api/tests/build/verify_production_image.sh`) written before the Dockerfile (§5.1 TDD). + +--- + +## Technical Context + +**Language/Version**: Python 3.12 (existing API), Docker multi-stage build +**Build tool**: uv (lockfile: `api/uv.lock`, already committed) +**Base images**: `ghcr.io/astral-sh/uv:python3.12-bookworm-slim` (builder), `python:3.12-slim` (runtime) +**Testing**: Shell verification script (`verify_production_image.sh`) + `make verify-prod` target +**Target Platform**: linux/amd64 container (Kubernetes or Docker host) +**Performance Goals**: Container starts and passes health check within 30s; rebuild from warm cache in under 60s +**Constraints**: No root process, no hardcoded secrets, no dev deps in final image, compatible with `--read-only` filesystem +**Scale/Scope**: Single-file addition (`Dockerfile.prod`) + shell test + two Makefile targets; zero changes to existing source code + +--- + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-checked post-design below.* + +| Principle | Status | Notes | +|-----------|--------|-------| +| §5.1 TDD non-negotiable | **COMPLIANT** | `verify_production_image.sh` written before `Dockerfile.prod`; script fails (red) because the build file is absent, then passes (green) after | +| §5.2 Test pyramid | **COMPLIANT** | Shell verification script is the integration-level test for this build artefact; no unit tests applicable (no Python business logic added) | +| §5.4 CI must pass | **COMPLIANT** | `make verify-prod` target is runnable in host CI (requires Docker on the runner, which the existing `make test-integration` already requires) | +| §6 Tech Stack — Docker | **COMPLIANT** | Docker + Docker Compose are mandated; this adds a production Docker file within that constraint | +| §7.1 One-command local start | **COMPLIANT** | `api/Dockerfile` (dev stack) is unchanged; `docker compose up` is unaffected | +| §7.2 Environment configuration | **COMPLIANT** | `Dockerfile.prod` contains zero hardcoded env values; all config is injected at runtime | +| §7.3 Ruff/lint | **COMPLIANT** | No new Python files; shell script linted with `shellcheck` | +| §2.6 No speculative abstraction | **COMPLIANT** | Single Dockerfile, no plugin system or generics | +| §8 Scope boundaries | **COMPLIANT** | Purely infrastructure; no new API routes, data model, or UI changes | + +**Post-design re-check**: All gates remain green. No violations. + +--- + +## Project Structure + +### Documentation (this feature) + +```text +specs/010-api-prod-dockerfile/ +├── plan.md # This file +├── research.md # Phase 0 decisions +├── contracts/ +│ └── container.md # Container interface contract (port, env vars, signals, user) +├── quickstart.md # Build and verification scenarios +└── tasks.md # Generated by /speckit-tasks +``` + +### Source Code Changes + +```text +api/ +├── Dockerfile # Existing dev/test image — UNCHANGED +├── Dockerfile.prod # NEW: production multi-stage image +├── .dockerignore # Existing — verify test files are excluded from build context +└── tests/ + └── build/ + └── verify_production_image.sh # NEW: TDD verification script (written first) + +Makefile # Root Makefile — add build-prod and verify-prod targets +``` + +--- + +## Dockerfile.prod — Annotated Reference + +```dockerfile +# syntax=docker/dockerfile:1 + +# ════════════════════════════════════════════════ +# Build stage: install production deps via uv +# ════════════════════════════════════════════════ +FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder + +WORKDIR /app + +# Pre-compile bytecode; use copy mode for cross-layer compatibility +ENV UV_COMPILE_BYTECODE=1 \ + UV_LINK_MODE=copy \ + UV_PYTHON_DOWNLOADS=never + +# ── Layer cache split: deps only (changes rarely) ── +COPY pyproject.toml uv.lock ./ +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --frozen --no-dev --no-install-project + +# ── Layer cache split: source (changes often) ── +COPY app/ ./app/ + +# ════════════════════════════════════════════════ +# Runtime stage: lean image with venv + source +# ════════════════════════════════════════════════ +FROM python:3.12-slim + +WORKDIR /app + +# curl for HEALTHCHECK — only tool added beyond base Python +RUN apt-get update \ + && apt-get install -y --no-install-recommends curl \ + && rm -rf /var/lib/apt/lists/* + +# Non-root system user (UID/GID 1001) +RUN groupadd --system --gid 1001 appgroup \ + && useradd --system --uid 1001 --gid 1001 --no-create-home appuser + +# Copy venv from builder; copy source directly from build context +COPY --from=builder --chown=appuser:appgroup /app/.venv /app/.venv +COPY --chown=appuser:appgroup app/ ./app/ + +USER appuser + +# Activate the venv by prepending its bin to PATH +ENV PATH="/app/.venv/bin:$PATH" + +EXPOSE 8000 + +HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ + CMD curl -f http://localhost:8000/api/v1/health || exit 1 + +# uvicorn handles SIGTERM; --timeout-graceful-shutdown gives 30s to drain requests +CMD ["uvicorn", "app.main:app", \ + "--host", "0.0.0.0", \ + "--port", "8000", \ + "--timeout-graceful-shutdown", "30"] +``` + +> **Note on COPY paths**: Build context is `api/` (as set by the Makefile target). `COPY app/ ./app/` in both stages refers to `api/app/`. The runtime stage copies source directly from the build context, not from the builder stage — this is simpler and avoids an extra intermediate layer. + +--- + +## verify_production_image.sh — Structure + +```sh +#!/usr/bin/env bash +# TDD verification script for api/Dockerfile.prod +# Fails (red) if Dockerfile.prod does not exist or any check fails. +set -euo pipefail + +IMAGE="reactbin-api-prod:verify-$$" + +cleanup() { docker rm -f "$CONTAINER" 2>/dev/null || true; docker rmi "$IMAGE" 2>/dev/null || true; } +trap cleanup EXIT + +# Step 1: Build — fails red if Dockerfile.prod is absent +docker build -f api/Dockerfile.prod api/ -t "$IMAGE" + +# Step 2: Start container with minimal env vars +CONTAINER=$(docker run -d -p 18000:8000 \ + -e JWT_SECRET_KEY=verify-test-key \ + -e OWNER_USERNAME=testowner \ + -e OWNER_PASSWORD=testpassword \ + -e DATABASE_URL=postgresql+asyncpg://noop:noop@noop/noop \ + -e S3_ENDPOINT_URL=http://noop:9000 \ + -e S3_BUCKET_NAME=noop \ + -e S3_ACCESS_KEY_ID=noop \ + -e S3_SECRET_ACCESS_KEY=noop \ + -e S3_REGION=us-east-1 \ + "$IMAGE") + +# Step 3: Poll health endpoint (app will fail to connect to DB, but /health is pre-DB) +for i in $(seq 1 30); do + if curl -sf http://localhost:18000/api/v1/health > /dev/null; then break; fi + sleep 1 + [[ $i -eq 30 ]] && { echo "FAIL: health check timed out"; exit 1; } +done + +# Step 4: Assert non-root user +UID_IN_CONTAINER=$(docker exec "$CONTAINER" id -u) +[[ "$UID_IN_CONTAINER" -ne 0 ]] || { echo "FAIL: process running as root"; exit 1; } + +# Step 5: Graceful shutdown +docker stop "$CONTAINER" # sends SIGTERM +EXIT_CODE=$(docker wait "$CONTAINER") +[[ "$EXIT_CODE" -eq 0 ]] || { echo "FAIL: non-zero exit code $EXIT_CODE"; exit 1; } + +# Step 6: Dev deps absent +if docker run --rm "$IMAGE" /app/.venv/bin/python -c "import pytest" 2>/dev/null; then + echo "FAIL: pytest importable in production image (dev deps present)"; exit 1 +fi + +echo "All production image checks passed." +``` + +> **Note on health check feasibility**: `/api/v1/health` is a simple JSON response that does not require a database connection (confirmed in `api/app/main.py`). The verification script can therefore pass even without a real PostgreSQL instance. + +--- + +## Makefile Targets + +Add to root `Makefile`: + +```makefile +.PHONY: build-prod verify-prod + +build-prod: + docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:latest + +verify-prod: + bash api/tests/build/verify_production_image.sh +``` + +--- + +## `.dockerignore` Review + +The existing `api/.dockerignore` already excludes `.venv/`, `__pycache__/`, `.env`, etc. Two additions improve the production build context: + +``` +tests/ +*.egg-info/ +alembic/ +alembic.ini +``` + +`tests/` and `alembic/` are not needed in the production image (we `COPY app/ ./app/` explicitly). Excluding them from the build context reduces the data sent to the Docker daemon. + +> `*.egg-info/` is already present in the existing `.dockerignore`. + +--- + +## Implementation Order + +Tasks are generated by `/speckit-tasks`, but the logical dependency order is: + +1. **Write `verify_production_image.sh`** (TDD red — build fails because `Dockerfile.prod` absent) +2. **Add `Makefile` targets** (`build-prod`, `verify-prod`) — references the script +3. **Write `api/Dockerfile.prod`** (implement to make TDD pass) +4. **Update `api/.dockerignore`** (exclude `tests/`, `alembic/` from build context) +5. **Run `make verify-prod`** (TDD green — all 6 checks pass) +6. **Run `shellcheck`** on `verify_production_image.sh` + +No existing tests are modified. `make test-integration` continues to use `api/Dockerfile` unchanged. diff --git a/specs/010-api-prod-dockerfile/quickstart.md b/specs/010-api-prod-dockerfile/quickstart.md new file mode 100644 index 0000000..1268f4e --- /dev/null +++ b/specs/010-api-prod-dockerfile/quickstart.md @@ -0,0 +1,138 @@ +# Quickstart: Production API Container Image + +## Prerequisites + +- Docker 24+ installed and running on the host +- `make` available +- A copy of `.env` (or the env vars from `.env.example`) for smoke-testing + +--- + +## Build the Production Image + +```sh +make build-prod +# Equivalent: docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:latest +``` + +On a warm cache (deps unchanged), the build should complete in under 60 seconds because the dependency layer is reused. + +--- + +## Verify the Production Image (TDD Smoke Test) + +```sh +make verify-prod +``` + +This runs `api/tests/build/verify_production_image.sh`, which: +1. Builds the image (fails fast if `Dockerfile.prod` is missing — the **red** TDD state) +2. Starts the container with test env vars +3. Polls `/api/v1/health` until it returns 200 (or times out after 30s) +4. Asserts the API process is running as a non-root user (UID ≠ 0) +5. Sends SIGTERM and asserts the container exits with code 0 within 30s +6. Asserts `pytest` is NOT importable inside the container (dev deps excluded) + +**Expected output (green)**: +``` +[verify] Building reactbin-api-prod:test ... +[verify] Build OK +[verify] Starting container ... +[verify] Health check passed (GET /api/v1/health → 200) +[verify] Process user: 1001 (non-root ✓) +[verify] Sending SIGTERM ... +[verify] Container exited with code 0 (graceful shutdown ✓) +[verify] Dev deps absent ✓ +[verify] All checks passed. +``` + +--- + +## User Story Integration Scenarios + +### US1 — API Runs Reliably in Production + +```sh +# Start container with real (or test) env vars +docker run --rm -d \ + --name reactbin-test \ + -p 8000:8000 \ + -e JWT_SECRET_KEY=my-secret \ + -e OWNER_USERNAME=owner \ + -e OWNER_PASSWORD=changeme \ + -e DATABASE_URL=postgresql+asyncpg://user:pass@host:5432/db \ + -e S3_ENDPOINT_URL=http://minio:9000 \ + -e S3_BUCKET_NAME=reactbin \ + -e S3_ACCESS_KEY_ID=minioadmin \ + -e S3_SECRET_ACCESS_KEY=minioadmin \ + -e S3_REGION=us-east-1 \ + reactbin-api-prod:latest + +# Check health +curl http://localhost:8000/api/v1/health +# → {"status":"ok"} + +# Graceful shutdown +docker stop reactbin-test # sends SIGTERM +docker wait reactbin-test # → exit code 0 +``` + +### US2 — Minimal, Secure Container + +```sh +# Verify non-root user +docker inspect --format='{{.Config.User}}' reactbin-api-prod:latest +# → appuser (or 1001) + +# Verify no dev packages (pytest should not be importable) +docker run --rm reactbin-api-prod:latest \ + /app/.venv/bin/python -c "import pytest" 2>&1 +# → ModuleNotFoundError: No module named 'pytest' + +# Verify no source control or test files in image +docker run --rm reactbin-api-prod:latest ls /app +# → app .venv (no tests/, no alembic/, no .git/) +``` + +### US3 — Fast, Reproducible Builds + +```sh +# First build (cold): installs all deps +time docker build --no-cache -f api/Dockerfile.prod api/ -t reactbin-api-prod:cold + +# Touch a source file only (no dep change) +touch api/app/main.py + +# Second build: dependency layer served from cache +time docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:warm +# Expect: warm build < 30s; cold build varies (network-dependent) + +# Confirm same health response from both +docker run --rm ... reactbin-api-prod:cold +docker run --rm ... reactbin-api-prod:warm +``` + +--- + +## Missing Env Var Behaviour + +```sh +docker run --rm \ + -e JWT_SECRET_KEY=my-secret \ + # OWNER_USERNAME intentionally omitted + reactbin-api-prod:latest +# → Container exits non-zero, stderr logs: "field required: owner_username" +``` + +--- + +## Read-Only Filesystem Compatibility + +```sh +docker run --rm --read-only \ + -e JWT_SECRET_KEY=... [other env vars] \ + reactbin-api-prod:latest & + +curl http://localhost:8000/api/v1/health +# → {"status":"ok"} +``` diff --git a/specs/010-api-prod-dockerfile/research.md b/specs/010-api-prod-dockerfile/research.md new file mode 100644 index 0000000..f0ee06a --- /dev/null +++ b/specs/010-api-prod-dockerfile/research.md @@ -0,0 +1,94 @@ +# Research: Production API Container Image + +## Decision 1 — Use a Separate `Dockerfile.prod` + +**Decision**: Add `api/Dockerfile.prod` alongside the existing `api/Dockerfile`. + +**Rationale**: The existing `api/Dockerfile` installs dev dependencies (`.[dev]`), mounts source with `--reload`, and is used by the Docker Compose integration test stack. Modifying it would break `make test-integration`. A separate file keeps the two images independent with zero coupling. + +**Alternatives considered**: +- Build-arg flag in a single Dockerfile: adds conditional complexity and makes both files harder to read. +- Rename existing to `Dockerfile.dev` and make `Dockerfile` the production image: would require updating `docker-compose.test.yml` with an explicit file reference — a wider change than needed for this feature. + +--- + +## Decision 2 — Multi-Stage Build: uv Builder + python:3.12-slim Runtime + +**Decision**: Two-stage build. Stage 1 (`builder`) uses `ghcr.io/astral-sh/uv:python3.12-bookworm-slim` to install production dependencies into a virtual environment. Stage 2 (`runtime`) uses `python:3.12-slim` and copies only the `.venv` and application source from the builder. uv is not present in the final image. + +**Rationale**: +- uv's official Docker image is the fastest, most correct way to produce a pinned, bytecode-compiled venv from `uv.lock`. +- Keeping uv out of the runtime image reduces attack surface and image size. +- `python:3.12-slim` is a well-maintained, widely scanned base; using it for the runtime stage aligns with existing project images. + +**Layer caching strategy**: +``` +COPY pyproject.toml uv.lock ./ +RUN uv sync --frozen --no-dev --no-install-project ← cache hits when only source changes +COPY app/ ./app/ ← only reaches here on source changes +``` +`--no-install-project` installs all listed dependencies without the project package itself. The project source is then copied separately. This means a source-only change reuses the dependency layer from cache. + +**Environment variables for optimal builds**: +- `UV_COMPILE_BYTECODE=1` — pre-compile `.pyc` files; slightly larger venv but faster cold starts. +- `UV_LINK_MODE=copy` — avoids hard-link issues when copying between image layers. +- `UV_PYTHON_DOWNLOADS=never` — ensures the builder stage uses the bundled Python, not a downloaded one. + +**Alternatives considered**: +- Installing deps into the system Python (`--system`): rejected because it pollutes the base image and makes it harder to copy deps cleanly into the runtime stage. +- Using a single `FROM python:3.12-slim` with pip: slower builds, no lockfile pinning, no bytecode compilation step. + +--- + +## Decision 3 — Non-Root User (UID 1001, System User) + +**Decision**: Create a system user `appuser` with GID/UID 1001 in the runtime stage. All owned files are `chown`-ed at `COPY` time using `--chown=appuser:appgroup`. + +**Rationale**: Running as root inside a container is a container breakout risk. A numeric UID (rather than a named user that might not exist on the host) is required by some Kubernetes pod security admission policies. UID 1001 avoids collision with UID 1000 (the typical first interactive user on a Linux host) while remaining a predictable, inspectable value. + +**Alternatives considered**: +- UID 1000: small risk of collision with host user when bind mounts are involved. +- `USER nobody`: `nobody` (UID 65534) works but its name and UID are not consistent across distros. + +--- + +## Decision 4 — SIGTERM Graceful Shutdown via uvicorn `--timeout-graceful-shutdown` + +**Decision**: Use `uvicorn`'s built-in `--timeout-graceful-shutdown 30` flag. No process supervisor (tini, s6) is required. + +**Rationale**: uvicorn handles SIGTERM natively when run as PID 1 in single-worker mode (the production Dockerfile runs one worker). On SIGTERM it stops accepting new connections, waits up to `--timeout-graceful-shutdown` seconds for in-flight requests to complete, then exits with code 0. No additional init system is needed. + +**Alternatives considered**: +- tini: adds a small init shim that reaps zombies and forwards signals. Not necessary with a single uvicorn worker (no child processes to reap). +- Gunicorn + uvicorn workers: more complex; appropriate for multi-worker setups but the deployment platform (Kubernetes) scales horizontally via pod replicas rather than in-process workers. + +--- + +## Decision 5 — `curl` for HEALTHCHECK + +**Decision**: Install `curl` (via `apt-get --no-install-recommends`) in the runtime stage and use it in the `HEALTHCHECK` directive. + +**Rationale**: The existing dev Dockerfile already installs `curl` for the same reason. `curl -f` exits non-zero on HTTP errors, making it a reliable single-command health probe. A Python one-liner adds interpreter startup overhead (~100ms) per check; `curl` is ~5ms. + +**Alternatives considered**: +- `wget -q --spider`: available on Alpine but not on Debian-slim by default; requires separate install. +- Python `urllib.request`: no extra install, but slower and adds noise to the process table during health checks. + +--- + +## Decision 6 — TDD Verification via Shell Script + +**Decision**: Write `api/tests/build/verify_production_image.sh` before `Dockerfile.prod`. The script builds the image and runs behavioral checks (health endpoint, non-root user, clean SIGTERM exit). It is the "failing test" per §5.1. + +**Rationale**: The production image is a build artifact, not Python business logic. pytest cannot test a Docker image without Docker-in-Docker, which the current CI stack does not support. A shell script run on the host (via `make verify-prod`) is the appropriate TDD vehicle for this artefact type. + +**Verification steps the script covers**: +1. `docker build -f api/Dockerfile.prod api/` → fails (red) until Dockerfile.prod exists. +2. Run container with required env vars; wait for health endpoint → `GET /api/v1/health` returns 200. +3. Inspect running process user → UID ≠ 0 (non-root). +4. Send SIGTERM to container; assert exit code 0 within 30s (graceful shutdown). +5. Assert dev packages are absent: `pip show pytest` inside container must return non-zero. + +**Alternatives considered**: +- pytest with docker SDK: requires `docker` Python package and DinD in CI; rejected as over-engineered for a single-file build artifact. +- Manual verification only: rejected because §5.1 mandates automated failing tests before production code. diff --git a/specs/010-api-prod-dockerfile/spec.md b/specs/010-api-prod-dockerfile/spec.md new file mode 100644 index 0000000..9e0d841 --- /dev/null +++ b/specs/010-api-prod-dockerfile/spec.md @@ -0,0 +1,96 @@ +# Feature Specification: Production-Grade API Container Image + +**Feature Branch**: `010-api-prod-dockerfile` +**Created**: 2026-05-07 +**Status**: Draft +**Input**: User description: "We need a production-grade Dockerfile for the API to start preparing for a production deployment." + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 — API Runs Reliably in Production (Priority: P1) + +An operator builds and runs the API container in a production environment. The container starts successfully, serves requests, and can be health-checked by an orchestrator (e.g., Kubernetes). When the orchestrator signals shutdown, the container drains in-flight requests before exiting cleanly, avoiding dropped connections. + +**Why this priority**: Without a correctly functioning container, no production deployment is possible. This is the baseline that all other stories depend on. + +**Independent Test**: Build the image from source, run the container with required env vars, call the health endpoint, send SIGTERM, and verify the process exits cleanly with code 0. No other stories are required. + +**Acceptance Scenarios**: + +1. **Given** a built container image and all required env vars, **When** the container starts, **Then** it begins serving requests within 30 seconds and the health endpoint returns a success response. +2. **Given** a running container, **When** a SIGTERM is received, **Then** the process finishes any in-flight requests and exits with code 0 within 30 seconds. +3. **Given** a running container, **When** a required env var is absent, **Then** the process exits immediately with a non-zero code and logs a clear error message identifying the missing variable. + +--- + +### User Story 2 — Minimal, Secure Container (Priority: P2) + +A security-conscious operator audits the container image before promotion to production. They verify the API process does not run as root, the image contains no development tooling or test artefacts, and no credentials are baked into the image layers. + +**Why this priority**: Running as root or including unnecessary tools increases the blast radius of any container breakout. This is a production-readiness requirement, not optional hardening. + +**Independent Test**: Inspect the built image to confirm the runtime user is non-root, confirm no dev/test files are present in the image layers, and scan the image with a standard vulnerability scanner. Passes independently of any deployment environment. + +**Acceptance Scenarios**: + +1. **Given** a built container image, **When** the running process user is inspected, **Then** the API process runs as a non-root user with a numeric UID. +2. **Given** a built container image, **When** the image layers are inspected, **Then** no development dependencies, test files, or local configuration are present. +3. **Given** a built container image, **When** the image layers are scanned for hardcoded secrets, **Then** no credentials, API keys, or secret values are found embedded in any layer. + +--- + +### User Story 3 — Fast, Reproducible Builds (Priority: P3) + +A developer rebuilds the container image after a code change. The build completes quickly because unchanged layers (dependencies) are cached. Given identical source inputs, the resulting image is functionally equivalent across builds, enabling confident CI/CD promotion. + +**Why this priority**: Slow or non-deterministic builds reduce developer confidence and slow deployment pipelines. Important for velocity, but the container already works (P1, P2) before this is optimised. + +**Independent Test**: Build the image twice from the same source; confirm the second build reuses dependency layers from cache and completes significantly faster than the first. + +**Acceptance Scenarios**: + +1. **Given** an image built once, **When** only application source files change and the image is rebuilt, **Then** the dependency installation step is served from cache and the rebuild completes faster than a clean build. +2. **Given** two builds from the same source commit, **When** the images are run, **Then** both produce identical API behaviour. + +--- + +### Edge Cases + +- What happens when the database is unavailable at container startup? +- What happens when the container is sent SIGKILL instead of SIGTERM (hard kill by orchestrator)? +- What happens if the container runs out of memory mid-request? +- How does the image behave when run read-only filesystem (`--read-only`)? + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: The container image MUST start the API service and begin accepting requests without manual intervention after supplying required env vars. +- **FR-002**: The container image MUST expose a health check that an orchestrator can poll to determine service readiness. +- **FR-003**: The container image MUST handle the SIGTERM signal by completing in-flight requests then exiting cleanly within 30 seconds. +- **FR-004**: The container image MUST run the API process as a non-root, non-privileged user. +- **FR-005**: The container image MUST NOT contain development dependencies, test files, source control metadata, or local configuration files. +- **FR-006**: The container image MUST NOT contain any hardcoded credentials, secrets, or environment-specific values — all configuration MUST be supplied via environment variables at runtime. +- **FR-007**: The container image MUST log to standard output and standard error so logs are captured by the container runtime without additional configuration. +- **FR-008**: The container image MUST be buildable reproducibly from the same source inputs — a rebuild from the same commit MUST produce a functionally equivalent image. +- **FR-009**: Rebuilding the image after a source-only change (no dependency changes) MUST reuse the cached dependency installation layer. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: The container starts and serves its first successful health-check response within 30 seconds of launch with all required env vars present. +- **SC-002**: The container exits cleanly (code 0) within 30 seconds of receiving a SIGTERM, with no in-flight requests dropped. +- **SC-003**: The API process inside the container runs as a non-root user (inspectable via container runtime tooling). +- **SC-004**: A rebuild after a source-only change completes in under 60 seconds on a warm cache (dependency layer reused). +- **SC-005**: The image contains zero hardcoded secrets (verifiable by static layer inspection). +- **SC-006**: All API logs appear on stdout/stderr and are captured by the container runtime log driver without additional sidecar or configuration. + +## Assumptions + +- The existing test Dockerfile (used by the integration test stack) is not suitable for production and will remain separate; this feature produces a distinct production image. +- All required runtime configuration (database URL, S3 credentials, JWT secret, etc.) will be injected as environment variables by the deployment platform — the image itself carries no environment-specific values. +- The deployment target supports OCI-compatible container images (Kubernetes, Docker, etc.). +- No persistent local storage is needed by the API container; all state lives in the database and object storage. +- The production image does not need to run database migrations; migrations are applied by a separate step in the deployment pipeline. +- A single-architecture image (linux/amd64) is sufficient for the initial production target. diff --git a/specs/010-api-prod-dockerfile/tasks.md b/specs/010-api-prod-dockerfile/tasks.md new file mode 100644 index 0000000..6973fab --- /dev/null +++ b/specs/010-api-prod-dockerfile/tasks.md @@ -0,0 +1,158 @@ +# Tasks: Production-Grade API Container Image + +**Input**: Design documents from `specs/010-api-prod-dockerfile/` +**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, contracts/container.md ✅, quickstart.md ✅ + +**Tests**: TDD is non-negotiable (§5.1). The "test" for a Docker build artefact is `api/tests/build/verify_production_image.sh`, written before `api/Dockerfile.prod` exists. Running the script immediately fails (red) because the build step cannot find the file; writing `Dockerfile.prod` turns it green. + +**Organization**: Phase 1 sets up Makefile targets and `.dockerignore`; Phase 3 (US1) writes the verification script and the Dockerfile; Phase 4 (US2) extends the script with security checks; Phase 5 (US3) extends it with a cache-hit check; Phase 6 polishes. + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel with other [P] tasks in the same phase +- **[Story]**: Which user story this task belongs to +- Exact file paths included in every task description + +--- + +## Phase 1: Setup + +- [X] T001 Add `build-prod` and `verify-prod` targets (and their `.PHONY` entries) to the root `Makefile` at `/workspace/Makefile`: `build-prod` runs `docker build -f api/Dockerfile.prod api/ -t reactbin-api-prod:latest`; `verify-prod` runs `bash api/tests/build/verify_production_image.sh` + +- [X] T002 Update `api/.dockerignore` at `/workspace/api/.dockerignore`: append three lines — `tests/`, `alembic/`, and `alembic.ini` — so these are excluded from the production build context (the Dockerfile.prod copies only `app/` explicitly, but excluding them from the context keeps the transfer to the Docker daemon fast) + +--- + +## Phase 2: Foundational + +- [X] T003 Create directory `api/tests/build/` at `/workspace/api/tests/build/` with `mkdir -p` and add a `.gitkeep` so the directory is tracked + +**Checkpoint**: Directory structure is ready; Makefile and .dockerignore are updated. + +--- + +## Phase 3: User Story 1 — API Runs Reliably in Production (Priority: P1) 🎯 MVP + +**Goal**: The container builds, starts, serves the health endpoint, and exits cleanly on SIGTERM. + +**Independent Test**: `make verify-prod` — passes when `Dockerfile.prod` exists and all US1 checks pass. + +### Test for User Story 1 (TDD red — write first, confirm failure before T005) + +- [X] T004 [US1] Create `api/tests/build/verify_production_image.sh` as an executable bash script (`chmod +x`) with `#!/usr/bin/env bash` and `set -euo pipefail`; the script MUST: + 1. Set `IMAGE="reactbin-api-prod:verify-$$"` and `PG_CONTAINER=""` and `APP_CONTAINER=""`; + 2. Define a `cleanup()` function that runs `docker rm -f "$APP_CONTAINER" "$PG_CONTAINER" 2>/dev/null || true` and `docker rmi "$IMAGE" 2>/dev/null || true`, and register it with `trap cleanup EXIT`; + 3. **[US1 check 1 — build]** Run `docker build -f api/Dockerfile.prod api/ -t "$IMAGE"` — this is the line that fails **red** because `api/Dockerfile.prod` does not yet exist; print `[verify] Building $IMAGE...` before and `[verify] Build OK` after; + 4. **[US1 check 2 — start with real DB]** Launch a throwaway postgres: `PG_CONTAINER=$(docker run -d -e POSTGRES_DB=reactbin_verify -e POSTGRES_USER=verify -e POSTGRES_PASSWORD=verify postgres:16-alpine)`; poll `docker exec "$PG_CONTAINER" pg_isready -U verify` up to 30 × 1s, fail if timeout; capture `PG_IP=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$PG_CONTAINER")`; + 5. Start the production container: `APP_CONTAINER=$(docker run -d -p 18000:8000 -e JWT_SECRET_KEY=verify-key -e OWNER_USERNAME=testowner -e OWNER_PASSWORD=testpassword -e DATABASE_URL="postgresql+asyncpg://verify:verify@${PG_IP}:5432/reactbin_verify" -e S3_ENDPOINT_URL=http://noop:9000 -e S3_BUCKET_NAME=noop -e S3_ACCESS_KEY_ID=noop -e S3_SECRET_ACCESS_KEY=noop -e S3_REGION=us-east-1 "$IMAGE")`; note — S3 credentials are placeholders; the health endpoint does not require S3; + 6. **[US1 check 3 — health endpoint]** Poll `curl -sf http://localhost:18000/api/v1/health` up to 30 × 1s, fail with a message if timeout; print `[verify] Health check passed` on success; + 7. **[US1 check 4 — SIGTERM → exit 0]** Run `docker stop "$APP_CONTAINER"` (sends SIGTERM); capture `EXIT_CODE=$(docker wait "$APP_CONTAINER")`; assert `"$EXIT_CODE" -eq 0`, fail with `FAIL: non-zero exit $EXIT_CODE` otherwise; print `[verify] Graceful shutdown OK (exit $EXIT_CODE)`; + 8. Print `[verify] US1 checks passed.` + 9. **[C3 — missing env var → non-zero exit]** Run `docker run --rm -e JWT_SECRET_KEY=verify-key "$IMAGE" 2>&1`; assert the exit code is **non-zero** (OWNER_USERNAME is absent so Pydantic settings validation must fail at startup); print `[verify] Missing-env-var exit check OK`; + After writing the script, run `make verify-prod` and confirm it **fails** with a Docker build error (red state — `Dockerfile.prod` does not exist). + +### Implementation for User Story 1 + +- [X] T005 [US1] Create `api/Dockerfile.prod` at `/workspace/api/Dockerfile.prod` — a two-stage multi-stage build: + **Stage 1 (builder)**: `FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder`; `WORKDIR /app`; set `ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy UV_PYTHON_DOWNLOADS=never`; `COPY pyproject.toml uv.lock ./`; `RUN --mount=type=cache,target=/root/.cache/uv uv sync --frozen --no-dev --no-install-project`; `COPY app/ ./app/` + **Stage 2 (runtime)**: `FROM python:3.12-slim`; `WORKDIR /app`; `RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*`; `RUN groupadd --system --gid 1001 appgroup && useradd --system --uid 1001 --gid 1001 --no-create-home appuser`; `COPY --from=builder --chown=appuser:appgroup /app/.venv /app/.venv`; `COPY --chown=appuser:appgroup app/ ./app/`; `USER appuser`; `ENV PATH="/app/.venv/bin:$PATH"`; `EXPOSE 8000`; `HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 CMD curl -f http://localhost:8000/api/v1/health || exit 1`; `CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--timeout-graceful-shutdown", "30"]` + +- [X] T006 [US1] Verify TDD green for US1: run `make verify-prod` and confirm all four US1 checks pass — build OK, health endpoint returns 200, SIGTERM produces exit code 0, and `[verify] US1 checks passed.` is printed. + +**Checkpoint**: US1 is complete. Production container builds, starts, serves traffic, and shuts down gracefully. + +--- + +## Phase 4: User Story 2 — Minimal, Secure Container (Priority: P2) + +**Goal**: The production image runs as non-root and contains no dev dependencies or embedded secrets. + +**Independent Test**: US2 checks in `make verify-prod` — the same script extended with non-root and dev-deps-absent assertions. + +### Tests for User Story 2 (TDD extension — add checks, confirm they pass against existing Dockerfile.prod) + +- [X] T007 [US2] Extend `api/tests/build/verify_production_image.sh` with two US2 checks inserted after the SIGTERM check (before the final `US1 checks passed` line): + **[US2 check 1 — non-root]** After the container is running (before `docker stop`), run `UID_IN_CONTAINER=$(docker exec "$APP_CONTAINER" id -u)`; assert `"$UID_IN_CONTAINER" -ne 0`, fail with `FAIL: process running as root (UID 0)` if violated; print `[verify] Non-root user OK (UID $UID_IN_CONTAINER)`; + **[US2 check 2 — dev deps absent]** After cleanup of APP_CONTAINER but still holding the image, run `docker run --rm "$IMAGE" /app/.venv/bin/python -c "import pytest" 2>/dev/null`; assert the command returns **non-zero** (i.e., pytest is NOT importable); if it returns 0, fail with `FAIL: pytest importable in production image (dev deps present)`; print `[verify] Dev deps absent OK`; + **[C1 — stdout log capture]** Run `docker logs "$APP_CONTAINER" 2>&1`; assert the output is non-empty and contains `Started server` or `Application startup complete` (uvicorn startup lines); fail with `FAIL: no startup logs found on stdout/stderr` if absent; print `[verify] Stdout logging OK`; note — insert this check while APP_CONTAINER is still running, before the `docker stop` call; + **[C2 — no hardcoded secrets in layers]** Run `docker history --no-trunc "$IMAGE" 2>&1`; pipe through `grep -iE "(password|secret_key|api_key|token)" `; assert zero matching lines; if any match, fail with `FAIL: potential secret found in image history`; print `[verify] No secrets in image layers OK`; + Update the final success line to `[verify] All checks passed (US1 + US2).`; confirm `make verify-prod` passes. + +**Checkpoint**: US2 is verified. Image runs as UID 1001 and contains no test tooling. + +--- + +## Phase 5: User Story 3 — Fast, Reproducible Builds (Priority: P3) + +**Goal**: Rebuilding after a source-only change reuses the dependency layer from cache. + +**Independent Test**: US3 check in `make verify-prod` — a timed second build after touching a source file asserts the dep layer was cached. + +### Tests for User Story 3 (TDD extension) + +- [X] T008 [US3] Extend `api/tests/build/verify_production_image.sh` with a US3 cache check appended after all other checks (before final success line): + **[US3 check — dep layer cached on source-only rebuild]** Set `IMAGE2="reactbin-api-prod:verify-cache-$$"`; `touch api/app/main.py`; capture the output of `docker build --progress=plain -f api/Dockerfile.prod api/ -t "$IMAGE2" 2>&1` (the `--progress=plain` flag ensures consistent `CACHED` output regardless of Docker version or TTY settings); assert the output contains the string `CACHED`; if `CACHED` is absent, fail with `FAIL: dependency layer not reused on source-only rebuild`; add `docker rmi "$IMAGE2" 2>/dev/null || true` to the `cleanup()` function; print `[verify] Dep layer cache hit confirmed (US3 OK)`; + Update the final success line to `[verify] All checks passed (US1 + US2 + US3).` + +- [X] T009 [US3] Verify TDD green for US3: run `make verify-prod` and confirm the full script passes including the cache check — the build output for the second image must contain `CACHED`, and `[verify] All checks passed (US1 + US2 + US3).` must print. + +**Checkpoint**: All three user stories are verified end-to-end by `make verify-prod`. + +--- + +## Phase 6: Polish & Cross-Cutting Concerns + +- [X] T010 Run `make test-integration` from `/workspace` and confirm all 102 existing tests still pass — verifies that the `.dockerignore` additions (T002) do not break the existing test Dockerfile build or any integration test (§5.4 regression gate) + +- [X] T011 Run `shellcheck api/tests/build/verify_production_image.sh` and fix any violations (common: unquoted variables, `[ ]` vs `[[ ]]`, missing `--` before arguments) + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 (Setup)**: No external dependencies — start immediately +- **Phase 2 (Foundational)**: No dependencies — start immediately (parallel with Phase 1) +- **Phase 3 (US1)**: Depends on Phase 1 (Makefile + .dockerignore must exist before `make verify-prod` can run) and Phase 2 (test directory must exist) +- **Phase 4 (US2)**: Depends on Phase 3 (US1 script and Dockerfile must exist to extend) +- **Phase 5 (US3)**: Depends on Phase 4 (full US2 script must exist to extend) +- **Phase 6 (Polish)**: Depends on all prior phases; T010 (regression test) must precede T011 (shellcheck) + +### Within Phase 3 + +- T004 before T005 (write test script before writing the Dockerfile) +- T005 after T004 (implement Dockerfile after confirming red state) +- T006 after T005 (verify green after implementation) + +### Execution Order Summary + +``` +Step 1: T001 ∥ T002 ∥ T003 (setup — parallel, different files) +Step 2: T004 (write verification script — TDD red) +Step 3: T005 (write Dockerfile.prod — implementation) +Step 4: T006 (verify US1 green) +Step 5: T007 (extend script with US2 checks, verify pass) +Step 6: T008 (extend script with US3 check) +Step 7: T009 (verify US3 green) +Step 8: T010 (make test-integration — regression gate) +Step 9: T011 (shellcheck polish) +``` + +--- + +## Implementation Strategy + +### MVP (US1 — reliable production run) + +1. Complete T001–T003 (setup) +2. Complete T004–T006 (core blocking: write script → write Dockerfile → verify green) +3. **Validate**: `make verify-prod` passes; `make test-integration` still passes (no regressions) +4. US2 and US3 add explicit verification coverage for properties already implemented + +### Incremental Delivery + +- After Phase 3: Production image builds, starts, and shuts down gracefully — safe to deploy +- After Phase 4: Security properties (non-root, no dev deps) are explicitly verified +- After Phase 5: Build efficiency (layer caching) is confirmed by automated check +- After Phase 6: Script is lint-clean, ready for CI integration