diff --git a/.specify/feature.json b/.specify/feature.json index 28bd642..6fdb4b6 100644 --- a/.specify/feature.json +++ b/.specify/feature.json @@ -1,3 +1,3 @@ { - "feature_directory": "specs/001-reaction-image-board" + "feature_directory": "specs/002-api-image-proxy" } diff --git a/CLAUDE.md b/CLAUDE.md index 0b0dc20..37de3a5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,5 +1,5 @@ For additional context about technologies to be used, project structure, shell commands, and other important information, read the current plan at -`specs/001-reaction-image-board/plan.md`. +`specs/002-api-image-proxy/plan.md`. diff --git a/api/app/routers/images.py b/api/app/routers/images.py index e22a658..6b338db 100644 --- a/api/app/routers/images.py +++ b/api/app/routers/images.py @@ -1,11 +1,8 @@ -import io import struct import uuid -import zlib -from typing import Annotated, Any +from typing import Any from fastapi import APIRouter, Depends, File, Form, HTTPException, Response, UploadFile -from fastapi.responses import RedirectResponse from sqlalchemy.ext.asyncio import AsyncSession from app.auth.provider import AuthProvider @@ -219,8 +216,21 @@ async def serve_image_file( status_code=404, detail={"detail": "Image not found", "code": "image_not_found"}, ) - url = await storage.get_presigned_url(image.storage_key, expires_in_seconds=3600) - return RedirectResponse(url=url, status_code=302) + try: + data = await storage.get(image.storage_key) + except Exception: + raise HTTPException( + status_code=500, + detail={"detail": "Failed to retrieve image content", "code": "storage_error"}, + ) from None + return Response( + content=data, + media_type=image.mime_type, + headers={ + "ETag": f'"{image.hash}"', + "Cache-Control": "public, max-age=31536000, immutable", + }, + ) @router.patch("/images/{image_id}/tags") diff --git a/api/app/storage/backend.py b/api/app/storage/backend.py index aa9a8ea..d48450c 100644 --- a/api/app/storage/backend.py +++ b/api/app/storage/backend.py @@ -7,8 +7,8 @@ class StorageBackend(ABC): """Store object at key with given content type.""" @abstractmethod - async def get_presigned_url(self, key: str, expires_in_seconds: int = 3600) -> str: - """Return a pre-signed URL valid for expires_in_seconds.""" + async def get(self, key: str) -> bytes: + """Return object content as bytes.""" @abstractmethod async def delete(self, key: str) -> None: diff --git a/api/app/storage/s3_backend.py b/api/app/storage/s3_backend.py index d047c1d..ef93191 100644 --- a/api/app/storage/s3_backend.py +++ b/api/app/storage/s3_backend.py @@ -32,14 +32,10 @@ class S3StorageBackend(StorageBackend): ContentType=content_type, ) - async def get_presigned_url(self, key: str, expires_in_seconds: int = 3600) -> str: + async def get(self, key: str) -> bytes: async with self._client() as client: - url = await client.generate_presigned_url( - "get_object", - Params={"Bucket": self._settings.s3_bucket_name, "Key": key}, - ExpiresIn=expires_in_seconds, - ) - return url + response = await client.get_object(Bucket=self._settings.s3_bucket_name, Key=key) + return await response["Body"].read() async def delete(self, key: str) -> None: async with self._client() as client: diff --git a/api/tests/integration/test_serving.py b/api/tests/integration/test_serving.py index 48539e5..14e2725 100644 --- a/api/tests/integration/test_serving.py +++ b/api/tests/integration/test_serving.py @@ -1,9 +1,11 @@ """ -T055 — GET /api/v1/images/{id}/file → 302 with Location header +T055 — GET /api/v1/images/{id}/file → 200 with binary content, ETag, Cache-Control T056 — /file for unknown ID → 404 image_not_found +T057 — /file response exposes no storage-specific details """ import io import uuid + import pytest @@ -17,20 +19,23 @@ def _minimal_webp() -> bytes: @pytest.mark.asyncio -async def test_file_redirect_returns_302(client): +async def test_file_returns_200_with_content(client): data = _minimal_webp() upload = await client.post( "/api/v1/images", files={"file": ("img.webp", io.BytesIO(data), "image/webp")}, ) assert upload.status_code in (200, 201) - image_id = upload.json()["id"] + upload_body = upload.json() + image_id = upload_body["id"] + image_hash = upload_body["hash"] - # Don't follow redirects - response = await client.get(f"/api/v1/images/{image_id}/file", follow_redirects=False) - assert response.status_code == 302 - assert "Location" in response.headers - assert response.headers["Location"] # must not be empty + response = await client.get(f"/api/v1/images/{image_id}/file") + assert response.status_code == 200 + assert response.headers["content-type"].startswith("image/") + assert response.headers["etag"] == f'"{image_hash}"' + assert "immutable" in response.headers["cache-control"] + assert len(response.content) > 0 @pytest.mark.asyncio @@ -39,3 +44,21 @@ async def test_file_unknown_id_returns_404(client): assert response.status_code == 404 body = response.json() assert body["code"] == "image_not_found" + + +@pytest.mark.asyncio +async def test_file_response_exposes_no_storage_details(client): + data = _minimal_webp() + upload = await client.post( + "/api/v1/images", + files={"file": ("img.webp", io.BytesIO(data), "image/webp")}, + ) + assert upload.status_code in (200, 201) + image_id = upload.json()["id"] + + response = await client.get(f"/api/v1/images/{image_id}/file") + assert response.status_code == 200 + assert "location" not in response.headers + assert "minio" not in response.text.lower() + assert "s3://" not in response.text.lower() + assert "amazonaws.com" not in response.text.lower() diff --git a/specs/002-api-image-proxy/checklists/requirements.md b/specs/002-api-image-proxy/checklists/requirements.md new file mode 100644 index 0000000..38ca92a --- /dev/null +++ b/specs/002-api-image-proxy/checklists/requirements.md @@ -0,0 +1,34 @@ +# Specification Quality Checklist: API Image Proxy + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-05-03 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [x] No implementation details (languages, frameworks, APIs) +- [x] Focused on user value and business needs +- [x] Written for non-technical stakeholders +- [x] All mandatory sections completed + +## Requirement Completeness + +- [x] No [NEEDS CLARIFICATION] markers remain +- [x] Requirements are testable and unambiguous +- [x] Success criteria are measurable +- [x] Success criteria are technology-agnostic (no implementation details) +- [x] All acceptance scenarios are defined +- [x] Edge cases are identified +- [x] Scope is clearly bounded +- [x] Dependencies and assumptions identified + +## Feature Readiness + +- [x] All functional requirements have clear acceptance criteria +- [x] User scenarios cover primary flows +- [x] Feature meets measurable outcomes defined in Success Criteria +- [x] No implementation details leak into specification + +## Notes + +All checklist items pass. Spec is ready for `/speckit-plan`. diff --git a/specs/002-api-image-proxy/contracts/api.md b/specs/002-api-image-proxy/contracts/api.md new file mode 100644 index 0000000..bfaabab --- /dev/null +++ b/specs/002-api-image-proxy/contracts/api.md @@ -0,0 +1,62 @@ +# API Contract: Image Content Endpoint + +**Branch**: `002-api-image-proxy` | **Date**: 2026-05-03 + +## Changed endpoint + +### `GET /api/v1/images/{image_id}/file` + +Serves the binary content of the image with the given ID, proxied through the API from object storage. + +**Path parameters** + +| Parameter | Type | Description | +|-----------|------|-------------| +| `image_id` | UUID | Unique identifier of the image | + +**Responses** + +#### `200 OK` — Image content + +| Header | Value | Notes | +|--------|-------|-------| +| `Content-Type` | `image/jpeg` \| `image/png` \| `image/gif` \| `image/webp` | Taken from stored `mime_type` | +| `ETag` | `""` | SHA-256 hash of the image content; quoted string per RFC 7232 | +| `Cache-Control` | `public, max-age=31536000, immutable` | Safe: images are immutable after upload | + +Body: raw image bytes. + +#### `404 Not Found` — Image not found + +```json +{ "detail": "Image not found", "code": "image_not_found" } +``` + +#### `500 Internal Server Error` — Storage retrieval failure + +```json +{ "detail": "Failed to retrieve image content", "code": "storage_error" } +``` + +No storage-specific details (bucket name, hostname, credentials) are exposed in error responses. + +--- + +## Breaking change from prior behaviour + +| Aspect | Before | After | +|--------|--------|-------| +| Status code | `302 Found` | `200 OK` | +| `Location` header | Present (presigned S3 URL) | Absent | +| Body | Empty | Raw image bytes | +| Browser behaviour | Follows redirect to storage | Receives content from API | + +The endpoint path is unchanged. The UI requires no URL-construction changes. + +--- + +## Removed StorageBackend capability + +The `get_presigned_url` operation is removed from the `StorageBackend` interface. It was the only mechanism for generating time-limited direct-access URLs to stored objects, and had a single call site (`serve_image_file`). With the proxy in place it has no remaining callers. + +Any future need for presigned URLs (e.g., direct-upload flows) would require a new spec and a new interface method. diff --git a/specs/002-api-image-proxy/data-model.md b/specs/002-api-image-proxy/data-model.md new file mode 100644 index 0000000..80f49ee --- /dev/null +++ b/specs/002-api-image-proxy/data-model.md @@ -0,0 +1,25 @@ +# Data Model: API Image Proxy + +**Branch**: `002-api-image-proxy` | **Date**: 2026-05-03 + +## Summary + +This feature introduces no new entities and no database schema changes. + +The change is entirely in the *retrieval path* for image file content: +- The `Image` entity's `storage_key` field is used as the S3 object key to fetch content from storage. +- The `mime_type` field is used to set the `Content-Type` response header. +- The `hash` field (SHA-256) is used to set the `ETag` response header. + +All three fields are already present on the `Image` entity per the existing data model in `specs/001-reaction-image-board/plan.md`. + +## StorageBackend interface change + +The `StorageBackend` abstract interface (`api/app/storage/backend.py`) gains one method and loses one: + +| Method | Change | Notes | +|--------|--------|-------| +| `get_presigned_url(key, expires_in_seconds)` | **Removed** | No callers remain after this feature | +| `get(key: str) -> bytes` | **Added** | Returns full object content as bytes | + +This is an interface change, not a data model change. The database schema is unaffected. diff --git a/specs/002-api-image-proxy/plan.md b/specs/002-api-image-proxy/plan.md new file mode 100644 index 0000000..cee1bf2 --- /dev/null +++ b/specs/002-api-image-proxy/plan.md @@ -0,0 +1,195 @@ +# Implementation Plan: API Image Proxy + +**Branch**: `002-api-image-proxy` | **Date**: 2026-05-03 | **Spec**: [spec.md](spec.md) +**Input**: Feature specification from `specs/002-api-image-proxy/spec.md` + +## Summary + +Replace the presigned-URL redirect in `GET /api/v1/images/{id}/file` with a direct +proxy that fetches the image from object storage server-side and streams the bytes +back to the client. The browser never contacts the storage backend. This eliminates +the `/etc/hosts` workaround required in local development and keeps the storage +layer fully private. + +Changes touch three files in the API and the integration test suite. The UI +requires no changes because the endpoint path is unchanged. + +## Technical Context + +**Language/Version**: Python 3.12+ (API); TypeScript strict mode (UI — no changes) +**Primary Dependencies**: FastAPI, aiobotocore (S3 retrieval), pytest-asyncio (tests) +**Storage**: S3-compatible object storage (MinIO locally, real S3 in production) via `StorageBackend` interface +**Testing**: pytest + pytest-asyncio; existing integration test fixture reused +**Target Platform**: Linux server (containerised); modern evergreen desktop browsers +**Project Type**: Web application — FastAPI API + Angular SPA +**Performance Goals**: Image load time via proxy within 20% of direct storage access on local network +**Constraints**: Max 50 MB per image; no storage details exposed in any error response +**Scale/Scope**: Single-user personal application; no concurrency or throughput targets for v1 + +## Constitution Check + +*GATE: Must pass before Phase 0 research. Re-checked after Phase 1 design below.* + +| Principle | Check | Status | +|-----------|-------|--------| +| §2.1 Separation of concerns | API proxies bytes; UI is unchanged; storage detail never reaches the browser | ✅ | +| §2.2 Dependency direction | UI → API → Storage; no new upward imports introduced | ✅ | +| §2.3 Storage abstraction | Retrieval goes through the `StorageBackend.get()` method; no raw S3 calls in the router | ✅ | +| §2.4 Auth abstraction | No change to auth flow | ✅ | +| §2.5 DB abstraction | No query logic moves outside repositories | ✅ | +| §2.6 No speculative abstraction | `get_presigned_url` removed (no callers); `get()` added (has one caller) | ✅ | +| §3.1 API versioning | Endpoint path `/api/v1/images/{id}/file` is unchanged | ✅ | +| §3.3 Error shape | Storage errors return `{"detail": "...", "code": "storage_error"}` | ✅ | +| §5.1 TDD non-negotiable | Failing tests written before implementation in every task | ✅ | +| §5.2 Test pyramid | Existing integration test updated; no unit test needed (thin router delegation) | ✅ | +| §5.4 CI gate | All tests + ruff must pass before milestone is done | ✅ | + +**Post-design re-check**: Contracts and data model confirmed above; all gates still pass. + +## Project Structure + +### Documentation (this feature) + +```text +specs/002-api-image-proxy/ +├── plan.md # This file +├── spec.md # Feature specification +├── research.md # Phase 0 decisions +├── data-model.md # Interface change summary +├── contracts/ +│ └── api.md # Updated endpoint contract +├── checklists/ +│ └── requirements.md # Spec quality checklist +└── tasks.md # Phase 2 output (/speckit-tasks — NOT created here) +``` + +### Files changed + +```text +api/ +├── app/ +│ ├── storage/ +│ │ ├── backend.py # Remove get_presigned_url; add get(key) -> bytes +│ │ └── s3_backend.py # Remove get_presigned_url impl; add get(key) -> bytes +│ └── routers/ +│ └── images.py # serve_image_file: RedirectResponse → Response with bytes + cache headers +└── tests/ + └── integration/ + └── test_serving.py # T055: update assertion from 302 to 200 with content-type + ETag +``` + +No UI files change. `ImageService.getFileUrl()` already returns `/api/v1/images/{id}/file`. + +## Milestones + +> **TDD ORDER IS MANDATORY** (constitution §5.1): Write the failing test first, +> confirm it fails, then implement until it passes. + +### M1 — Update StorageBackend interface and S3 implementation + +**Goal**: The storage layer can retrieve object bytes; presigned-URL generation is removed. + +**Tasks** (TDD order): + +1. In `api/app/storage/backend.py`: + - Remove the `get_presigned_url` abstract method + - Add `async def get(self, key: str) -> bytes` as a new abstract method + +2. In `api/app/storage/s3_backend.py`: + - Remove the `get_presigned_url` implementation + - Add `async def get(self, key: str) -> bytes`: + ``` + async with self._client() as client: + response = await client.get_object( + Bucket=self._settings.s3_bucket_name, + Key=key, + ) + return await response["Body"].read() + ``` + +3. Verify: `ruff check api/` passes; `mypy` or pyright (if configured) passes. + +**Done criterion**: Interface and implementation compile cleanly; no existing tests break. + +--- + +### M2 — Update the integration test (failing first) + +**Goal**: T055 asserts the new contract (200 + bytes) before any router change. + +**Tasks** (TDD order): + +1. In `api/tests/integration/test_serving.py`, update `test_file_redirect_returns_302`: + - Rename to `test_file_returns_200_with_content` + - Change assertion from `status_code == 302` and `Location` header to: + - `response.status_code == 200` + - `response.headers["content-type"]` starts with `"image/"` + - `response.headers["etag"]` is present and equals `f'"{image.hash}"'` + - `response.headers["cache-control"]` contains `"immutable"` + - `len(response.content) > 0` + - Remove `follow_redirects=False` from the client call + +2. Run `pytest api/tests/integration/test_serving.py` — confirm T055 **fails** (still 302). + +**Done criterion**: Test file updated; new test fails with the current implementation. + +--- + +### M3 — Update the router + +**Goal**: `serve_image_file` proxies bytes from storage instead of redirecting. + +**Tasks** (TDD order — tests already written in M2): + +1. In `api/app/routers/images.py`: + - Remove `from fastapi.responses import RedirectResponse` import + - Add `from fastapi.responses import Response` (already present as `Response` from fastapi) + - Update `serve_image_file`: + + ```python + @router.get("/images/{image_id}/file") + async def serve_image_file( + image_id: uuid.UUID, + db: AsyncSession = Depends(get_db), + storage: StorageBackend = Depends(get_storage), + ): + image_repo = ImageRepository(db) + image = await image_repo.get_by_id(image_id) + if not image: + raise HTTPException( + status_code=404, + detail={"detail": "Image not found", "code": "image_not_found"}, + ) + try: + data = await storage.get(image.storage_key) + except Exception: + raise HTTPException( + status_code=500, + detail={"detail": "Failed to retrieve image content", "code": "storage_error"}, + ) + return Response( + content=data, + media_type=image.mime_type, + headers={ + "ETag": f'"{image.hash}"', + "Cache-Control": "public, max-age=31536000, immutable", + }, + ) + ``` + +2. Run `pytest api/tests/integration/test_serving.py` — confirm all tests pass. +3. Run full test suite: `pytest api/` — confirm no regressions. +4. Run `ruff check api/ && ruff format --check api/`. + +**Done criterion**: All 45+ tests pass; linter clean; the presigned-URL redirect is gone. + +## Post-design Constitution Re-check + +| Principle | Verdict | +|-----------|---------| +| §2.3 Storage abstraction | `get()` added to interface; router only calls `storage.get()` | ✅ | +| §2.6 No speculative abstraction | `get_presigned_url` removed with zero callers remaining | ✅ | +| §3.3 Error shape | `storage_error` code used for retrieval failures | ✅ | +| §5.1 TDD | Test updated to fail before router change | ✅ | + +All gates pass. Feature is ready for `/speckit-tasks`. diff --git a/specs/002-api-image-proxy/research.md b/specs/002-api-image-proxy/research.md new file mode 100644 index 0000000..4afa0a6 --- /dev/null +++ b/specs/002-api-image-proxy/research.md @@ -0,0 +1,56 @@ +# Research: API Image Proxy + +**Branch**: `002-api-image-proxy` | **Date**: 2026-05-03 + +## Decision 1: Storage retrieval method + +**Decision**: Add `async def get(key: str) -> bytes` to `StorageBackend` and remove `get_presigned_url`. + +**Rationale**: The only consumer of `get_presigned_url` is the `serve_image_file` route. Once the route stops redirecting and starts streaming bytes directly, `get_presigned_url` has no call sites. Per §2.6 (no speculative abstraction), it must be removed. A simple `get → bytes` method is consistent with `put` which already operates on `bytes`, and is straightforward to implement and test. At 50 MB maximum file size, loading the full object into memory in a single call is acceptable — the same pattern is used on upload already. + +**Alternatives considered**: +- `async def stream(key) -> AsyncIterator[bytes]`: True streaming avoids buffering but complicates the abstract interface (async generators cannot cleanly implement abstract methods in Python without workarounds). Deferred; can be introduced later if memory pressure is observed. +- Keep `get_presigned_url` and add `get`: Violates §2.6 since `get_presigned_url` would then have no callers. + +--- + +## Decision 2: HTTP response type for the proxy endpoint + +**Decision**: Return `fastapi.Response(content=data, media_type=mime_type)` with the image bytes directly. + +**Rationale**: FastAPI's `Response` with raw bytes is the simplest correct approach when the full content is already in memory. The `mime_type` field is already stored on the `Image` database record, so the router can set `Content-Type` from it without re-inspecting the file. + +**Alternatives considered**: +- `StreamingResponse` with an async generator: Appropriate for true streaming but adds complexity with no benefit when content is already loaded as `bytes`. +- `FileResponse`: For local file paths only, not applicable. + +--- + +## Decision 3: Caching headers + +**Decision**: Add `ETag: ""` and `Cache-Control: public, max-age=31536000, immutable` to the content response. + +**Rationale**: Image files are immutable after upload (constitution §4.2). The SHA-256 hash is already stored on the `Image` record. An `ETag` allows conditional `GET` requests (`If-None-Match`) so browsers skip re-downloading unchanged content. `Cache-Control: immutable` tells browsers the content will never change for this URL, eliminating speculative revalidation. Together these satisfy SC-004 (browser caching). + +**Alternatives considered**: +- `Last-Modified` header: Less precise than ETag for binary content. +- No caching headers: Fails SC-004. + +--- + +## Decision 4: Endpoint path + +**Decision**: Keep the existing path `GET /api/v1/images/{image_id}/file`. Change the response from `302 RedirectResponse` to `200` with binary content. + +**Rationale**: The path already exists, is already referenced by the UI's `getFileUrl()` method, and appears in the existing OpenAPI contract. Changing the response body but not the path means the UI requires no URL-construction changes. The existing integration tests (`test_serving.py`) must be updated to assert `200` instead of `302`. + +**Alternatives considered**: +- New path `/api/v1/images/{id}/content`: Would require UI changes with no benefit; the existing path already semantically expresses "the file content for this image". + +--- + +## Decision 5: Removal of `storage_key` from public API response + +**Decision**: Out of scope for this feature. `storage_key` in the image metadata response is an internal S3 key (SHA-256 hex string), but it is not an actionable credential or hostname. Removal is a separate API-breaking change. + +**Rationale**: The spec explicitly scopes this feature to how image *content* is delivered. Metadata response shape changes are independent and require their own spec and contract update. diff --git a/specs/002-api-image-proxy/spec.md b/specs/002-api-image-proxy/spec.md new file mode 100644 index 0000000..1cc694a --- /dev/null +++ b/specs/002-api-image-proxy/spec.md @@ -0,0 +1,127 @@ +# Feature Specification: API Image Proxy + +**Feature Branch**: `002-api-image-proxy` +**Created**: 2026-05-03 +**Status**: Draft +**Input**: User description: "Instead of directly exposing the Minio storage, proxy fetching images through the API. The API server should talk directly to the S3 backend." + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 — View Images Without Storage Configuration (Priority: P1) + +A user opens the application on any network without any special DNS or host-file +configuration. All images — both thumbnails in the library grid and the full-size +image on the detail page — load correctly. + +**Why this priority**: This is the core motivator for the feature. Direct storage +exposure required `/etc/hosts` hacks to resolve the internal storage hostname; +the proxy removes this friction entirely and makes the application work +out-of-the-box. + +**Independent Test**: Start the application on a machine with no custom +`/etc/hosts` entries for the storage backend. Open the library and verify all +thumbnails load. Open an image detail page and verify the full-size image loads. +Confirm no network errors related to image fetching appear in the browser console. + +**Acceptance Scenarios**: + +1. **Given** the application is running, **When** the user opens the library, + **Then** all image thumbnails display correctly with no host-file configuration + required. + +2. **Given** the user opens an image detail page, **When** the page loads, + **Then** the full-size image displays correctly. + +3. **Given** a request for an image that does not exist, **When** the client + requests its content, **Then** a not-found response is returned with no + storage-specific details exposed. + +--- + +### User Story 2 — Storage Backend Remains Private (Priority: P1) + +The image storage system is never directly reachable or identifiable from a +client browser. Image URLs in the application reference only the API, never the +storage backend's hostname, credentials, or bucket names. + +**Why this priority**: Security and portability. A private storage backend means +credentials cannot be extracted from the browser and the storage layer can be +reconfigured without affecting clients. + +**Independent Test**: Open browser developer tools while browsing the +application. Inspect all image `src` attributes, network requests, and API +responses. Confirm that no request goes directly to the storage backend and no +storage-specific URL, hostname, or credential appears in the browser's network +tab or page source. + +**Acceptance Scenarios**: + +1. **Given** the user is browsing the application, **When** they inspect network + traffic, **Then** all image content is fetched through the application's API + domain — no request goes directly to storage infrastructure. + +2. **Given** any API response containing image metadata, **When** it is + inspected, **Then** no storage-specific URLs, hostnames, bucket names, or + credentials appear in the response body. + +--- + +### Edge Cases + +- What happens when the storage backend is temporarily unavailable? → The proxy + returns a generic server-error response; no storage internals are disclosed. +- What happens when the image exists in the database but the file is missing from + storage? → The proxy returns a not-found response with a generic message. +- What happens when image content is large (up to 50 MB)? → Content streams to + the client without exhausting server memory. + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: The API MUST expose an endpoint that returns image binary content + when given a valid image identifier. +- **FR-002**: The API MUST serve image content with the correct file-type + indicator so that browsers render it natively. +- **FR-003**: The image content endpoint MUST be the sole mechanism by which + clients retrieve image binary data; no other image content URL format MUST be + presented to clients. +- **FR-004**: The storage backend MUST NOT be directly reachable by client + browsers; all image content MUST flow through the API. +- **FR-005**: The API MUST return a not-found response when content is requested + for a non-existent image. +- **FR-006**: The API MUST handle image content requests for files up to 50 MB + without exhausting server memory. +- **FR-007**: The image content endpoint MUST include caching metadata in its + response so that browsers can avoid redundant fetches for the same image. +- **FR-008**: The UI MUST construct all image display URLs using the API's image + content endpoint, for both library thumbnails and the detail view. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: A user can view all images in the library and on detail pages on a + freshly configured machine with no custom DNS or host-file entries for the + storage backend. +- **SC-002**: No image-related network request in the browser targets the storage + backend directly — verifiable by inspecting browser developer tools. +- **SC-003**: Images up to 50 MB load completely without a server-side memory or + timeout error. +- **SC-004**: A browser that has already loaded an image does not re-download it + on a second visit to the same page (browser caching headers are respected). +- **SC-005**: Image load time via the proxy is within 20% of the time taken when + served directly from storage on a local network connection. + +## Assumptions + +- The storage backend is accessible from the API server via an internal network + hostname that is not reachable from client browsers. +- Image files are immutable after upload; once a client has the content for a + given image identifier it need not be re-fetched. +- No authentication is required to access the image content endpoint in Phase 1, + consistent with the application's no-auth Phase 1 stance. +- This feature changes how image content is delivered but does not alter any + image metadata endpoints, the upload flow, or tag management behaviour. +- No new database entities or schema changes are required; only how the existing + stored file is retrieved and served changes. diff --git a/specs/002-api-image-proxy/tasks.md b/specs/002-api-image-proxy/tasks.md new file mode 100644 index 0000000..449c0a9 --- /dev/null +++ b/specs/002-api-image-proxy/tasks.md @@ -0,0 +1,135 @@ +# Tasks: API Image Proxy + +**Input**: Design documents from `specs/002-api-image-proxy/` +**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, data-model.md ✅, contracts/api.md ✅ + +**TDD**: Tests are non-negotiable per constitution §5.1. Every test task MUST be written and confirmed failing before its implementation task runs. + +**Organization**: Tasks are grouped by user story to enable independent implementation and testing. + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel (different files, no dependencies on incomplete tasks) +- **[Story]**: Which user story this task belongs to (US1, US2) + +--- + +## Phase 1: Setup + +No setup required. The project is already initialized; the feature branch `002-api-image-proxy` is active. + +--- + +## Phase 2: Foundational (Blocking Prerequisites) + +**Purpose**: Update the `StorageBackend` interface before any route or test change can reference the new method. These are pure interface/plumbing tasks with no callers yet — no preceding failing test is required because the tasks themselves introduce no observable behaviour change. + +**⚠️ CRITICAL**: Both T001 and T002 must be complete before US1 work begins. + +- [x] T001 Remove abstract method `get_presigned_url` and add `async def get(self, key: str) -> bytes` abstract method (with docstring "Return object content as bytes.") to `StorageBackend` in `api/app/storage/backend.py` +- [x] T002 [P] Remove `get_presigned_url` implementation; add `async def get(self, key: str) -> bytes` that opens an aiobotocore client, calls `client.get_object(Bucket=self._settings.s3_bucket_name, Key=key)`, and returns `await response["Body"].read()` in `api/app/storage/s3_backend.py` + +**Checkpoint**: `ruff check api/` passes; `pytest api/` passes (all existing tests green — `serve_image_file` route still compiles but will change in Phase 3). + +--- + +## Phase 3: User Story 1 — View Images Without Storage Configuration (Priority: P1) 🎯 MVP + +**Goal**: Replace the 302 presigned-URL redirect with a 200 binary response so images load without any DNS or `/etc/hosts` configuration. + +**Independent Test**: Start the application with no custom host entries. Open the library and detail page. All images load. Browser network tab shows only requests to `/api/v1/` — none to the storage hostname. + +### Tests for User Story 1 (write FIRST — must FAIL before T004) ⚠️ + +- [x] T003 [US1] In `api/tests/integration/test_serving.py`: rename `test_file_redirect_returns_302` to `test_file_returns_200_with_content`; remove `follow_redirects=False`; replace the 302/Location assertions with: `response.status_code == 200`, `response.headers["content-type"].startswith("image/")`, `response.headers["etag"] == f'"{uploaded_image_hash}"'` (extract hash from upload response), `"immutable" in response.headers["cache-control"]`, and `len(response.content) > 0`; run `pytest api/tests/integration/test_serving.py::test_file_returns_200_with_content` and confirm it **fails** + +### Implementation for User Story 1 + +- [x] T004 [US1] In `api/app/routers/images.py`: remove `RedirectResponse` from the `fastapi.responses` import; update `serve_image_file` to (a) call `data = await storage.get(image.storage_key)` inside a `try/except Exception` that raises an HTTPException with status 500 and code `"storage_error"`, then (b) return `Response(content=data, media_type=image.mime_type, headers={"ETag": f'"{image.hash}"', "Cache-Control": "public, max-age=31536000, immutable"})` +- [x] T005 [US1] Run `pytest api/tests/integration/test_serving.py` and confirm both `test_file_returns_200_with_content` and `test_file_unknown_id_returns_404` pass; then run `pytest api/` to confirm no regressions across all test files + +**Checkpoint**: User Story 1 is fully functional. Images load via the API proxy with correct caching headers. No redirect to storage occurs. + +--- + +## Phase 4: User Story 2 — Storage Backend Remains Private (Priority: P1) + +**Goal**: Verify, with an explicit test, that no storage-specific detail (hostname, presigned URL, bucket name) appears anywhere in any API response after the proxy change. + +**Independent Test**: Run the new test suite. Inspect browser DevTools network tab: zero requests to the storage host; zero occurrences of "minio", "s3://", or "amazonaws.com" in any response header or body. + +### Tests for User Story 2 (write FIRST — must FAIL on pre-T004 code) ⚠️ + +- [x] T006 [US2] In `api/tests/integration/test_serving.py`: add test `test_file_response_exposes_no_storage_details` that uploads an image, calls `GET /api/v1/images/{id}/file`, then asserts: (a) `"location" not in response.headers` (no redirect target), (b) `"minio" not in response.text.lower()`, (c) `"s3://" not in response.text.lower()`, (d) `"amazonaws.com" not in response.text.lower()`; run the test against the **pre-T004** code and confirm it fails on assertion (a) (`Location` header is present in the redirect response) + +### Implementation for User Story 2 + +No new implementation — US2 is fully satisfied by the T004 router change. The test written in T006 must pass once T004 is complete. + +- [x] T007 [US2] Run `pytest api/tests/integration/test_serving.py` and confirm `test_file_response_exposes_no_storage_details` passes; this confirms US2 is satisfied by the US1 implementation + +**Checkpoint**: Both user stories verified. Storage backend is provably private. Full proxy behaviour confirmed with explicit assertions. + +--- + +## Phase 5: Polish & Cross-Cutting Concerns + +- [x] T008 [P] Run `ruff check api/ && ruff format --check api/` and fix any lint or formatting issues in `api/app/storage/backend.py`, `api/app/storage/s3_backend.py`, and `api/app/routers/images.py` +- [x] T009 Run full test suite `pytest api/ -v` and confirm all tests pass (expected: existing 45 tests + 2 new/updated tests = 47 total); record final count + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Foundational (Phase 2)**: No dependencies — start immediately +- **US1 (Phase 3)**: Depends on Phase 2 complete (T001 + T002 done) +- **US2 (Phase 4)**: Depends on Phase 3 complete (T004 done, since T006 tests against pre-T004 code first) +- **Polish (Phase 5)**: Depends on Phases 3 and 4 complete + +### User Story Dependencies + +- **US1**: Independent — requires only Phase 2 +- **US2**: Logically depends on US1 (its implementation task is zero — it reuses T004's work; T006 test is written pre-T004 to confirm it fails, then passes after T004) + +### Within Each User Story + +- Test tasks MUST be written and confirmed FAILING before implementation tasks run (§5.1) +- T001 and T002 can run in parallel (different files) +- T003 must precede T004 +- T005 validates T004 +- T006 is written pre-T004 but validated post-T004 + +--- + +## Parallel Example: Phase 2 (Foundational) + +```bash +# T001 and T002 touch different files — run in parallel: +Task: "Remove get_presigned_url and add get() abstract method in api/app/storage/backend.py" +Task: "Remove get_presigned_url impl and add get() body in api/app/storage/s3_backend.py" +``` + +--- + +## Implementation Strategy + +### MVP (Full feature — both user stories together) + +1. Complete Phase 2: T001, T002 (parallel) +2. Complete Phase 3: T003 (fail), T004 (implement), T005 (verify) +3. Complete Phase 4: T006 (fail → passes), T007 (verify) +4. Complete Phase 5: T008 (lint), T009 (final count) +5. **STOP and VALIDATE**: Confirm zero browser requests to storage host in DevTools + +### Total tasks: 9 (T001–T009) + +--- + +## Notes + +- [P] tasks touch different files and have no mutual dependencies +- T006 must be run against pre-T004 code to confirm it fails — this is the TDD "red" step for US2 +- `storage_key` in image metadata responses is a SHA-256 hex string (not a URL); it does not constitute an exposed storage credential and is out of scope for this feature per research.md Decision 5 +- After this feature, `get_presigned_url` has no callers anywhere in the codebase — its removal is safe