Feat: Proxy image content through the API instead of redirecting to MinIO
Replace the presigned-URL redirect (302) in GET /api/v1/images/{id}/file
with a direct proxy that fetches bytes from S3 server-side and returns them
to the client. The browser never contacts the storage backend, eliminating
the /etc/hosts workaround needed in local development.
- StorageBackend: swap get_presigned_url for get(key) -> bytes
- S3StorageBackend: implement get() via aiobotocore get_object
- serve_image_file: return Response with ETag + Cache-Control: immutable
- test_serving: assert 200 + content-type + ETag; add no-storage-details test
- Spec Kit artifacts for feature 002-api-image-proxy
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"feature_directory": "specs/001-reaction-image-board"
|
||||
"feature_directory": "specs/002-api-image-proxy"
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
<!-- SPECKIT START -->
|
||||
For additional context about technologies to be used, project structure,
|
||||
shell commands, and other important information, read the current plan at
|
||||
`specs/001-reaction-image-board/plan.md`.
|
||||
`specs/002-api-image-proxy/plan.md`.
|
||||
<!-- SPECKIT END -->
|
||||
|
||||
@@ -1,11 +1,8 @@
|
||||
import io
|
||||
import struct
|
||||
import uuid
|
||||
import zlib
|
||||
from typing import Annotated, Any
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, Response, UploadFile
|
||||
from fastapi.responses import RedirectResponse
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.auth.provider import AuthProvider
|
||||
@@ -219,8 +216,21 @@ async def serve_image_file(
|
||||
status_code=404,
|
||||
detail={"detail": "Image not found", "code": "image_not_found"},
|
||||
)
|
||||
url = await storage.get_presigned_url(image.storage_key, expires_in_seconds=3600)
|
||||
return RedirectResponse(url=url, status_code=302)
|
||||
try:
|
||||
data = await storage.get(image.storage_key)
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={"detail": "Failed to retrieve image content", "code": "storage_error"},
|
||||
) from None
|
||||
return Response(
|
||||
content=data,
|
||||
media_type=image.mime_type,
|
||||
headers={
|
||||
"ETag": f'"{image.hash}"',
|
||||
"Cache-Control": "public, max-age=31536000, immutable",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.patch("/images/{image_id}/tags")
|
||||
|
||||
@@ -7,8 +7,8 @@ class StorageBackend(ABC):
|
||||
"""Store object at key with given content type."""
|
||||
|
||||
@abstractmethod
|
||||
async def get_presigned_url(self, key: str, expires_in_seconds: int = 3600) -> str:
|
||||
"""Return a pre-signed URL valid for expires_in_seconds."""
|
||||
async def get(self, key: str) -> bytes:
|
||||
"""Return object content as bytes."""
|
||||
|
||||
@abstractmethod
|
||||
async def delete(self, key: str) -> None:
|
||||
|
||||
@@ -32,14 +32,10 @@ class S3StorageBackend(StorageBackend):
|
||||
ContentType=content_type,
|
||||
)
|
||||
|
||||
async def get_presigned_url(self, key: str, expires_in_seconds: int = 3600) -> str:
|
||||
async def get(self, key: str) -> bytes:
|
||||
async with self._client() as client:
|
||||
url = await client.generate_presigned_url(
|
||||
"get_object",
|
||||
Params={"Bucket": self._settings.s3_bucket_name, "Key": key},
|
||||
ExpiresIn=expires_in_seconds,
|
||||
)
|
||||
return url
|
||||
response = await client.get_object(Bucket=self._settings.s3_bucket_name, Key=key)
|
||||
return await response["Body"].read()
|
||||
|
||||
async def delete(self, key: str) -> None:
|
||||
async with self._client() as client:
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
"""
|
||||
T055 — GET /api/v1/images/{id}/file → 302 with Location header
|
||||
T055 — GET /api/v1/images/{id}/file → 200 with binary content, ETag, Cache-Control
|
||||
T056 — /file for unknown ID → 404 image_not_found
|
||||
T057 — /file response exposes no storage-specific details
|
||||
"""
|
||||
import io
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -17,20 +19,23 @@ def _minimal_webp() -> bytes:
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_redirect_returns_302(client):
|
||||
async def test_file_returns_200_with_content(client):
|
||||
data = _minimal_webp()
|
||||
upload = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("img.webp", io.BytesIO(data), "image/webp")},
|
||||
)
|
||||
assert upload.status_code in (200, 201)
|
||||
image_id = upload.json()["id"]
|
||||
upload_body = upload.json()
|
||||
image_id = upload_body["id"]
|
||||
image_hash = upload_body["hash"]
|
||||
|
||||
# Don't follow redirects
|
||||
response = await client.get(f"/api/v1/images/{image_id}/file", follow_redirects=False)
|
||||
assert response.status_code == 302
|
||||
assert "Location" in response.headers
|
||||
assert response.headers["Location"] # must not be empty
|
||||
response = await client.get(f"/api/v1/images/{image_id}/file")
|
||||
assert response.status_code == 200
|
||||
assert response.headers["content-type"].startswith("image/")
|
||||
assert response.headers["etag"] == f'"{image_hash}"'
|
||||
assert "immutable" in response.headers["cache-control"]
|
||||
assert len(response.content) > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -39,3 +44,21 @@ async def test_file_unknown_id_returns_404(client):
|
||||
assert response.status_code == 404
|
||||
body = response.json()
|
||||
assert body["code"] == "image_not_found"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_response_exposes_no_storage_details(client):
|
||||
data = _minimal_webp()
|
||||
upload = await client.post(
|
||||
"/api/v1/images",
|
||||
files={"file": ("img.webp", io.BytesIO(data), "image/webp")},
|
||||
)
|
||||
assert upload.status_code in (200, 201)
|
||||
image_id = upload.json()["id"]
|
||||
|
||||
response = await client.get(f"/api/v1/images/{image_id}/file")
|
||||
assert response.status_code == 200
|
||||
assert "location" not in response.headers
|
||||
assert "minio" not in response.text.lower()
|
||||
assert "s3://" not in response.text.lower()
|
||||
assert "amazonaws.com" not in response.text.lower()
|
||||
|
||||
34
specs/002-api-image-proxy/checklists/requirements.md
Normal file
34
specs/002-api-image-proxy/checklists/requirements.md
Normal file
@@ -0,0 +1,34 @@
|
||||
# Specification Quality Checklist: API Image Proxy
|
||||
|
||||
**Purpose**: Validate specification completeness and quality before proceeding to planning
|
||||
**Created**: 2026-05-03
|
||||
**Feature**: [spec.md](../spec.md)
|
||||
|
||||
## Content Quality
|
||||
|
||||
- [x] No implementation details (languages, frameworks, APIs)
|
||||
- [x] Focused on user value and business needs
|
||||
- [x] Written for non-technical stakeholders
|
||||
- [x] All mandatory sections completed
|
||||
|
||||
## Requirement Completeness
|
||||
|
||||
- [x] No [NEEDS CLARIFICATION] markers remain
|
||||
- [x] Requirements are testable and unambiguous
|
||||
- [x] Success criteria are measurable
|
||||
- [x] Success criteria are technology-agnostic (no implementation details)
|
||||
- [x] All acceptance scenarios are defined
|
||||
- [x] Edge cases are identified
|
||||
- [x] Scope is clearly bounded
|
||||
- [x] Dependencies and assumptions identified
|
||||
|
||||
## Feature Readiness
|
||||
|
||||
- [x] All functional requirements have clear acceptance criteria
|
||||
- [x] User scenarios cover primary flows
|
||||
- [x] Feature meets measurable outcomes defined in Success Criteria
|
||||
- [x] No implementation details leak into specification
|
||||
|
||||
## Notes
|
||||
|
||||
All checklist items pass. Spec is ready for `/speckit-plan`.
|
||||
62
specs/002-api-image-proxy/contracts/api.md
Normal file
62
specs/002-api-image-proxy/contracts/api.md
Normal file
@@ -0,0 +1,62 @@
|
||||
# API Contract: Image Content Endpoint
|
||||
|
||||
**Branch**: `002-api-image-proxy` | **Date**: 2026-05-03
|
||||
|
||||
## Changed endpoint
|
||||
|
||||
### `GET /api/v1/images/{image_id}/file`
|
||||
|
||||
Serves the binary content of the image with the given ID, proxied through the API from object storage.
|
||||
|
||||
**Path parameters**
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `image_id` | UUID | Unique identifier of the image |
|
||||
|
||||
**Responses**
|
||||
|
||||
#### `200 OK` — Image content
|
||||
|
||||
| Header | Value | Notes |
|
||||
|--------|-------|-------|
|
||||
| `Content-Type` | `image/jpeg` \| `image/png` \| `image/gif` \| `image/webp` | Taken from stored `mime_type` |
|
||||
| `ETag` | `"<sha256-hex>"` | SHA-256 hash of the image content; quoted string per RFC 7232 |
|
||||
| `Cache-Control` | `public, max-age=31536000, immutable` | Safe: images are immutable after upload |
|
||||
|
||||
Body: raw image bytes.
|
||||
|
||||
#### `404 Not Found` — Image not found
|
||||
|
||||
```json
|
||||
{ "detail": "Image not found", "code": "image_not_found" }
|
||||
```
|
||||
|
||||
#### `500 Internal Server Error` — Storage retrieval failure
|
||||
|
||||
```json
|
||||
{ "detail": "Failed to retrieve image content", "code": "storage_error" }
|
||||
```
|
||||
|
||||
No storage-specific details (bucket name, hostname, credentials) are exposed in error responses.
|
||||
|
||||
---
|
||||
|
||||
## Breaking change from prior behaviour
|
||||
|
||||
| Aspect | Before | After |
|
||||
|--------|--------|-------|
|
||||
| Status code | `302 Found` | `200 OK` |
|
||||
| `Location` header | Present (presigned S3 URL) | Absent |
|
||||
| Body | Empty | Raw image bytes |
|
||||
| Browser behaviour | Follows redirect to storage | Receives content from API |
|
||||
|
||||
The endpoint path is unchanged. The UI requires no URL-construction changes.
|
||||
|
||||
---
|
||||
|
||||
## Removed StorageBackend capability
|
||||
|
||||
The `get_presigned_url` operation is removed from the `StorageBackend` interface. It was the only mechanism for generating time-limited direct-access URLs to stored objects, and had a single call site (`serve_image_file`). With the proxy in place it has no remaining callers.
|
||||
|
||||
Any future need for presigned URLs (e.g., direct-upload flows) would require a new spec and a new interface method.
|
||||
25
specs/002-api-image-proxy/data-model.md
Normal file
25
specs/002-api-image-proxy/data-model.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# Data Model: API Image Proxy
|
||||
|
||||
**Branch**: `002-api-image-proxy` | **Date**: 2026-05-03
|
||||
|
||||
## Summary
|
||||
|
||||
This feature introduces no new entities and no database schema changes.
|
||||
|
||||
The change is entirely in the *retrieval path* for image file content:
|
||||
- The `Image` entity's `storage_key` field is used as the S3 object key to fetch content from storage.
|
||||
- The `mime_type` field is used to set the `Content-Type` response header.
|
||||
- The `hash` field (SHA-256) is used to set the `ETag` response header.
|
||||
|
||||
All three fields are already present on the `Image` entity per the existing data model in `specs/001-reaction-image-board/plan.md`.
|
||||
|
||||
## StorageBackend interface change
|
||||
|
||||
The `StorageBackend` abstract interface (`api/app/storage/backend.py`) gains one method and loses one:
|
||||
|
||||
| Method | Change | Notes |
|
||||
|--------|--------|-------|
|
||||
| `get_presigned_url(key, expires_in_seconds)` | **Removed** | No callers remain after this feature |
|
||||
| `get(key: str) -> bytes` | **Added** | Returns full object content as bytes |
|
||||
|
||||
This is an interface change, not a data model change. The database schema is unaffected.
|
||||
195
specs/002-api-image-proxy/plan.md
Normal file
195
specs/002-api-image-proxy/plan.md
Normal file
@@ -0,0 +1,195 @@
|
||||
# Implementation Plan: API Image Proxy
|
||||
|
||||
**Branch**: `002-api-image-proxy` | **Date**: 2026-05-03 | **Spec**: [spec.md](spec.md)
|
||||
**Input**: Feature specification from `specs/002-api-image-proxy/spec.md`
|
||||
|
||||
## Summary
|
||||
|
||||
Replace the presigned-URL redirect in `GET /api/v1/images/{id}/file` with a direct
|
||||
proxy that fetches the image from object storage server-side and streams the bytes
|
||||
back to the client. The browser never contacts the storage backend. This eliminates
|
||||
the `/etc/hosts` workaround required in local development and keeps the storage
|
||||
layer fully private.
|
||||
|
||||
Changes touch three files in the API and the integration test suite. The UI
|
||||
requires no changes because the endpoint path is unchanged.
|
||||
|
||||
## Technical Context
|
||||
|
||||
**Language/Version**: Python 3.12+ (API); TypeScript strict mode (UI — no changes)
|
||||
**Primary Dependencies**: FastAPI, aiobotocore (S3 retrieval), pytest-asyncio (tests)
|
||||
**Storage**: S3-compatible object storage (MinIO locally, real S3 in production) via `StorageBackend` interface
|
||||
**Testing**: pytest + pytest-asyncio; existing integration test fixture reused
|
||||
**Target Platform**: Linux server (containerised); modern evergreen desktop browsers
|
||||
**Project Type**: Web application — FastAPI API + Angular SPA
|
||||
**Performance Goals**: Image load time via proxy within 20% of direct storage access on local network
|
||||
**Constraints**: Max 50 MB per image; no storage details exposed in any error response
|
||||
**Scale/Scope**: Single-user personal application; no concurrency or throughput targets for v1
|
||||
|
||||
## Constitution Check
|
||||
|
||||
*GATE: Must pass before Phase 0 research. Re-checked after Phase 1 design below.*
|
||||
|
||||
| Principle | Check | Status |
|
||||
|-----------|-------|--------|
|
||||
| §2.1 Separation of concerns | API proxies bytes; UI is unchanged; storage detail never reaches the browser | ✅ |
|
||||
| §2.2 Dependency direction | UI → API → Storage; no new upward imports introduced | ✅ |
|
||||
| §2.3 Storage abstraction | Retrieval goes through the `StorageBackend.get()` method; no raw S3 calls in the router | ✅ |
|
||||
| §2.4 Auth abstraction | No change to auth flow | ✅ |
|
||||
| §2.5 DB abstraction | No query logic moves outside repositories | ✅ |
|
||||
| §2.6 No speculative abstraction | `get_presigned_url` removed (no callers); `get()` added (has one caller) | ✅ |
|
||||
| §3.1 API versioning | Endpoint path `/api/v1/images/{id}/file` is unchanged | ✅ |
|
||||
| §3.3 Error shape | Storage errors return `{"detail": "...", "code": "storage_error"}` | ✅ |
|
||||
| §5.1 TDD non-negotiable | Failing tests written before implementation in every task | ✅ |
|
||||
| §5.2 Test pyramid | Existing integration test updated; no unit test needed (thin router delegation) | ✅ |
|
||||
| §5.4 CI gate | All tests + ruff must pass before milestone is done | ✅ |
|
||||
|
||||
**Post-design re-check**: Contracts and data model confirmed above; all gates still pass.
|
||||
|
||||
## Project Structure
|
||||
|
||||
### Documentation (this feature)
|
||||
|
||||
```text
|
||||
specs/002-api-image-proxy/
|
||||
├── plan.md # This file
|
||||
├── spec.md # Feature specification
|
||||
├── research.md # Phase 0 decisions
|
||||
├── data-model.md # Interface change summary
|
||||
├── contracts/
|
||||
│ └── api.md # Updated endpoint contract
|
||||
├── checklists/
|
||||
│ └── requirements.md # Spec quality checklist
|
||||
└── tasks.md # Phase 2 output (/speckit-tasks — NOT created here)
|
||||
```
|
||||
|
||||
### Files changed
|
||||
|
||||
```text
|
||||
api/
|
||||
├── app/
|
||||
│ ├── storage/
|
||||
│ │ ├── backend.py # Remove get_presigned_url; add get(key) -> bytes
|
||||
│ │ └── s3_backend.py # Remove get_presigned_url impl; add get(key) -> bytes
|
||||
│ └── routers/
|
||||
│ └── images.py # serve_image_file: RedirectResponse → Response with bytes + cache headers
|
||||
└── tests/
|
||||
└── integration/
|
||||
└── test_serving.py # T055: update assertion from 302 to 200 with content-type + ETag
|
||||
```
|
||||
|
||||
No UI files change. `ImageService.getFileUrl()` already returns `/api/v1/images/{id}/file`.
|
||||
|
||||
## Milestones
|
||||
|
||||
> **TDD ORDER IS MANDATORY** (constitution §5.1): Write the failing test first,
|
||||
> confirm it fails, then implement until it passes.
|
||||
|
||||
### M1 — Update StorageBackend interface and S3 implementation
|
||||
|
||||
**Goal**: The storage layer can retrieve object bytes; presigned-URL generation is removed.
|
||||
|
||||
**Tasks** (TDD order):
|
||||
|
||||
1. In `api/app/storage/backend.py`:
|
||||
- Remove the `get_presigned_url` abstract method
|
||||
- Add `async def get(self, key: str) -> bytes` as a new abstract method
|
||||
|
||||
2. In `api/app/storage/s3_backend.py`:
|
||||
- Remove the `get_presigned_url` implementation
|
||||
- Add `async def get(self, key: str) -> bytes`:
|
||||
```
|
||||
async with self._client() as client:
|
||||
response = await client.get_object(
|
||||
Bucket=self._settings.s3_bucket_name,
|
||||
Key=key,
|
||||
)
|
||||
return await response["Body"].read()
|
||||
```
|
||||
|
||||
3. Verify: `ruff check api/` passes; `mypy` or pyright (if configured) passes.
|
||||
|
||||
**Done criterion**: Interface and implementation compile cleanly; no existing tests break.
|
||||
|
||||
---
|
||||
|
||||
### M2 — Update the integration test (failing first)
|
||||
|
||||
**Goal**: T055 asserts the new contract (200 + bytes) before any router change.
|
||||
|
||||
**Tasks** (TDD order):
|
||||
|
||||
1. In `api/tests/integration/test_serving.py`, update `test_file_redirect_returns_302`:
|
||||
- Rename to `test_file_returns_200_with_content`
|
||||
- Change assertion from `status_code == 302` and `Location` header to:
|
||||
- `response.status_code == 200`
|
||||
- `response.headers["content-type"]` starts with `"image/"`
|
||||
- `response.headers["etag"]` is present and equals `f'"{image.hash}"'`
|
||||
- `response.headers["cache-control"]` contains `"immutable"`
|
||||
- `len(response.content) > 0`
|
||||
- Remove `follow_redirects=False` from the client call
|
||||
|
||||
2. Run `pytest api/tests/integration/test_serving.py` — confirm T055 **fails** (still 302).
|
||||
|
||||
**Done criterion**: Test file updated; new test fails with the current implementation.
|
||||
|
||||
---
|
||||
|
||||
### M3 — Update the router
|
||||
|
||||
**Goal**: `serve_image_file` proxies bytes from storage instead of redirecting.
|
||||
|
||||
**Tasks** (TDD order — tests already written in M2):
|
||||
|
||||
1. In `api/app/routers/images.py`:
|
||||
- Remove `from fastapi.responses import RedirectResponse` import
|
||||
- Add `from fastapi.responses import Response` (already present as `Response` from fastapi)
|
||||
- Update `serve_image_file`:
|
||||
|
||||
```python
|
||||
@router.get("/images/{image_id}/file")
|
||||
async def serve_image_file(
|
||||
image_id: uuid.UUID,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
storage: StorageBackend = Depends(get_storage),
|
||||
):
|
||||
image_repo = ImageRepository(db)
|
||||
image = await image_repo.get_by_id(image_id)
|
||||
if not image:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail={"detail": "Image not found", "code": "image_not_found"},
|
||||
)
|
||||
try:
|
||||
data = await storage.get(image.storage_key)
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={"detail": "Failed to retrieve image content", "code": "storage_error"},
|
||||
)
|
||||
return Response(
|
||||
content=data,
|
||||
media_type=image.mime_type,
|
||||
headers={
|
||||
"ETag": f'"{image.hash}"',
|
||||
"Cache-Control": "public, max-age=31536000, immutable",
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
2. Run `pytest api/tests/integration/test_serving.py` — confirm all tests pass.
|
||||
3. Run full test suite: `pytest api/` — confirm no regressions.
|
||||
4. Run `ruff check api/ && ruff format --check api/`.
|
||||
|
||||
**Done criterion**: All 45+ tests pass; linter clean; the presigned-URL redirect is gone.
|
||||
|
||||
## Post-design Constitution Re-check
|
||||
|
||||
| Principle | Verdict |
|
||||
|-----------|---------|
|
||||
| §2.3 Storage abstraction | `get()` added to interface; router only calls `storage.get()` | ✅ |
|
||||
| §2.6 No speculative abstraction | `get_presigned_url` removed with zero callers remaining | ✅ |
|
||||
| §3.3 Error shape | `storage_error` code used for retrieval failures | ✅ |
|
||||
| §5.1 TDD | Test updated to fail before router change | ✅ |
|
||||
|
||||
All gates pass. Feature is ready for `/speckit-tasks`.
|
||||
56
specs/002-api-image-proxy/research.md
Normal file
56
specs/002-api-image-proxy/research.md
Normal file
@@ -0,0 +1,56 @@
|
||||
# Research: API Image Proxy
|
||||
|
||||
**Branch**: `002-api-image-proxy` | **Date**: 2026-05-03
|
||||
|
||||
## Decision 1: Storage retrieval method
|
||||
|
||||
**Decision**: Add `async def get(key: str) -> bytes` to `StorageBackend` and remove `get_presigned_url`.
|
||||
|
||||
**Rationale**: The only consumer of `get_presigned_url` is the `serve_image_file` route. Once the route stops redirecting and starts streaming bytes directly, `get_presigned_url` has no call sites. Per §2.6 (no speculative abstraction), it must be removed. A simple `get → bytes` method is consistent with `put` which already operates on `bytes`, and is straightforward to implement and test. At 50 MB maximum file size, loading the full object into memory in a single call is acceptable — the same pattern is used on upload already.
|
||||
|
||||
**Alternatives considered**:
|
||||
- `async def stream(key) -> AsyncIterator[bytes]`: True streaming avoids buffering but complicates the abstract interface (async generators cannot cleanly implement abstract methods in Python without workarounds). Deferred; can be introduced later if memory pressure is observed.
|
||||
- Keep `get_presigned_url` and add `get`: Violates §2.6 since `get_presigned_url` would then have no callers.
|
||||
|
||||
---
|
||||
|
||||
## Decision 2: HTTP response type for the proxy endpoint
|
||||
|
||||
**Decision**: Return `fastapi.Response(content=data, media_type=mime_type)` with the image bytes directly.
|
||||
|
||||
**Rationale**: FastAPI's `Response` with raw bytes is the simplest correct approach when the full content is already in memory. The `mime_type` field is already stored on the `Image` database record, so the router can set `Content-Type` from it without re-inspecting the file.
|
||||
|
||||
**Alternatives considered**:
|
||||
- `StreamingResponse` with an async generator: Appropriate for true streaming but adds complexity with no benefit when content is already loaded as `bytes`.
|
||||
- `FileResponse`: For local file paths only, not applicable.
|
||||
|
||||
---
|
||||
|
||||
## Decision 3: Caching headers
|
||||
|
||||
**Decision**: Add `ETag: "<sha256-hash>"` and `Cache-Control: public, max-age=31536000, immutable` to the content response.
|
||||
|
||||
**Rationale**: Image files are immutable after upload (constitution §4.2). The SHA-256 hash is already stored on the `Image` record. An `ETag` allows conditional `GET` requests (`If-None-Match`) so browsers skip re-downloading unchanged content. `Cache-Control: immutable` tells browsers the content will never change for this URL, eliminating speculative revalidation. Together these satisfy SC-004 (browser caching).
|
||||
|
||||
**Alternatives considered**:
|
||||
- `Last-Modified` header: Less precise than ETag for binary content.
|
||||
- No caching headers: Fails SC-004.
|
||||
|
||||
---
|
||||
|
||||
## Decision 4: Endpoint path
|
||||
|
||||
**Decision**: Keep the existing path `GET /api/v1/images/{image_id}/file`. Change the response from `302 RedirectResponse` to `200` with binary content.
|
||||
|
||||
**Rationale**: The path already exists, is already referenced by the UI's `getFileUrl()` method, and appears in the existing OpenAPI contract. Changing the response body but not the path means the UI requires no URL-construction changes. The existing integration tests (`test_serving.py`) must be updated to assert `200` instead of `302`.
|
||||
|
||||
**Alternatives considered**:
|
||||
- New path `/api/v1/images/{id}/content`: Would require UI changes with no benefit; the existing path already semantically expresses "the file content for this image".
|
||||
|
||||
---
|
||||
|
||||
## Decision 5: Removal of `storage_key` from public API response
|
||||
|
||||
**Decision**: Out of scope for this feature. `storage_key` in the image metadata response is an internal S3 key (SHA-256 hex string), but it is not an actionable credential or hostname. Removal is a separate API-breaking change.
|
||||
|
||||
**Rationale**: The spec explicitly scopes this feature to how image *content* is delivered. Metadata response shape changes are independent and require their own spec and contract update.
|
||||
127
specs/002-api-image-proxy/spec.md
Normal file
127
specs/002-api-image-proxy/spec.md
Normal file
@@ -0,0 +1,127 @@
|
||||
# Feature Specification: API Image Proxy
|
||||
|
||||
**Feature Branch**: `002-api-image-proxy`
|
||||
**Created**: 2026-05-03
|
||||
**Status**: Draft
|
||||
**Input**: User description: "Instead of directly exposing the Minio storage, proxy fetching images through the API. The API server should talk directly to the S3 backend."
|
||||
|
||||
## User Scenarios & Testing *(mandatory)*
|
||||
|
||||
### User Story 1 — View Images Without Storage Configuration (Priority: P1)
|
||||
|
||||
A user opens the application on any network without any special DNS or host-file
|
||||
configuration. All images — both thumbnails in the library grid and the full-size
|
||||
image on the detail page — load correctly.
|
||||
|
||||
**Why this priority**: This is the core motivator for the feature. Direct storage
|
||||
exposure required `/etc/hosts` hacks to resolve the internal storage hostname;
|
||||
the proxy removes this friction entirely and makes the application work
|
||||
out-of-the-box.
|
||||
|
||||
**Independent Test**: Start the application on a machine with no custom
|
||||
`/etc/hosts` entries for the storage backend. Open the library and verify all
|
||||
thumbnails load. Open an image detail page and verify the full-size image loads.
|
||||
Confirm no network errors related to image fetching appear in the browser console.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the application is running, **When** the user opens the library,
|
||||
**Then** all image thumbnails display correctly with no host-file configuration
|
||||
required.
|
||||
|
||||
2. **Given** the user opens an image detail page, **When** the page loads,
|
||||
**Then** the full-size image displays correctly.
|
||||
|
||||
3. **Given** a request for an image that does not exist, **When** the client
|
||||
requests its content, **Then** a not-found response is returned with no
|
||||
storage-specific details exposed.
|
||||
|
||||
---
|
||||
|
||||
### User Story 2 — Storage Backend Remains Private (Priority: P1)
|
||||
|
||||
The image storage system is never directly reachable or identifiable from a
|
||||
client browser. Image URLs in the application reference only the API, never the
|
||||
storage backend's hostname, credentials, or bucket names.
|
||||
|
||||
**Why this priority**: Security and portability. A private storage backend means
|
||||
credentials cannot be extracted from the browser and the storage layer can be
|
||||
reconfigured without affecting clients.
|
||||
|
||||
**Independent Test**: Open browser developer tools while browsing the
|
||||
application. Inspect all image `src` attributes, network requests, and API
|
||||
responses. Confirm that no request goes directly to the storage backend and no
|
||||
storage-specific URL, hostname, or credential appears in the browser's network
|
||||
tab or page source.
|
||||
|
||||
**Acceptance Scenarios**:
|
||||
|
||||
1. **Given** the user is browsing the application, **When** they inspect network
|
||||
traffic, **Then** all image content is fetched through the application's API
|
||||
domain — no request goes directly to storage infrastructure.
|
||||
|
||||
2. **Given** any API response containing image metadata, **When** it is
|
||||
inspected, **Then** no storage-specific URLs, hostnames, bucket names, or
|
||||
credentials appear in the response body.
|
||||
|
||||
---
|
||||
|
||||
### Edge Cases
|
||||
|
||||
- What happens when the storage backend is temporarily unavailable? → The proxy
|
||||
returns a generic server-error response; no storage internals are disclosed.
|
||||
- What happens when the image exists in the database but the file is missing from
|
||||
storage? → The proxy returns a not-found response with a generic message.
|
||||
- What happens when image content is large (up to 50 MB)? → Content streams to
|
||||
the client without exhausting server memory.
|
||||
|
||||
## Requirements *(mandatory)*
|
||||
|
||||
### Functional Requirements
|
||||
|
||||
- **FR-001**: The API MUST expose an endpoint that returns image binary content
|
||||
when given a valid image identifier.
|
||||
- **FR-002**: The API MUST serve image content with the correct file-type
|
||||
indicator so that browsers render it natively.
|
||||
- **FR-003**: The image content endpoint MUST be the sole mechanism by which
|
||||
clients retrieve image binary data; no other image content URL format MUST be
|
||||
presented to clients.
|
||||
- **FR-004**: The storage backend MUST NOT be directly reachable by client
|
||||
browsers; all image content MUST flow through the API.
|
||||
- **FR-005**: The API MUST return a not-found response when content is requested
|
||||
for a non-existent image.
|
||||
- **FR-006**: The API MUST handle image content requests for files up to 50 MB
|
||||
without exhausting server memory.
|
||||
- **FR-007**: The image content endpoint MUST include caching metadata in its
|
||||
response so that browsers can avoid redundant fetches for the same image.
|
||||
- **FR-008**: The UI MUST construct all image display URLs using the API's image
|
||||
content endpoint, for both library thumbnails and the detail view.
|
||||
|
||||
## Success Criteria *(mandatory)*
|
||||
|
||||
### Measurable Outcomes
|
||||
|
||||
- **SC-001**: A user can view all images in the library and on detail pages on a
|
||||
freshly configured machine with no custom DNS or host-file entries for the
|
||||
storage backend.
|
||||
- **SC-002**: No image-related network request in the browser targets the storage
|
||||
backend directly — verifiable by inspecting browser developer tools.
|
||||
- **SC-003**: Images up to 50 MB load completely without a server-side memory or
|
||||
timeout error.
|
||||
- **SC-004**: A browser that has already loaded an image does not re-download it
|
||||
on a second visit to the same page (browser caching headers are respected).
|
||||
- **SC-005**: Image load time via the proxy is within 20% of the time taken when
|
||||
served directly from storage on a local network connection.
|
||||
|
||||
## Assumptions
|
||||
|
||||
- The storage backend is accessible from the API server via an internal network
|
||||
hostname that is not reachable from client browsers.
|
||||
- Image files are immutable after upload; once a client has the content for a
|
||||
given image identifier it need not be re-fetched.
|
||||
- No authentication is required to access the image content endpoint in Phase 1,
|
||||
consistent with the application's no-auth Phase 1 stance.
|
||||
- This feature changes how image content is delivered but does not alter any
|
||||
image metadata endpoints, the upload flow, or tag management behaviour.
|
||||
- No new database entities or schema changes are required; only how the existing
|
||||
stored file is retrieved and served changes.
|
||||
135
specs/002-api-image-proxy/tasks.md
Normal file
135
specs/002-api-image-proxy/tasks.md
Normal file
@@ -0,0 +1,135 @@
|
||||
# Tasks: API Image Proxy
|
||||
|
||||
**Input**: Design documents from `specs/002-api-image-proxy/`
|
||||
**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, data-model.md ✅, contracts/api.md ✅
|
||||
|
||||
**TDD**: Tests are non-negotiable per constitution §5.1. Every test task MUST be written and confirmed failing before its implementation task runs.
|
||||
|
||||
**Organization**: Tasks are grouped by user story to enable independent implementation and testing.
|
||||
|
||||
## Format: `[ID] [P?] [Story] Description`
|
||||
|
||||
- **[P]**: Can run in parallel (different files, no dependencies on incomplete tasks)
|
||||
- **[Story]**: Which user story this task belongs to (US1, US2)
|
||||
|
||||
---
|
||||
|
||||
## Phase 1: Setup
|
||||
|
||||
No setup required. The project is already initialized; the feature branch `002-api-image-proxy` is active.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2: Foundational (Blocking Prerequisites)
|
||||
|
||||
**Purpose**: Update the `StorageBackend` interface before any route or test change can reference the new method. These are pure interface/plumbing tasks with no callers yet — no preceding failing test is required because the tasks themselves introduce no observable behaviour change.
|
||||
|
||||
**⚠️ CRITICAL**: Both T001 and T002 must be complete before US1 work begins.
|
||||
|
||||
- [x] T001 Remove abstract method `get_presigned_url` and add `async def get(self, key: str) -> bytes` abstract method (with docstring "Return object content as bytes.") to `StorageBackend` in `api/app/storage/backend.py`
|
||||
- [x] T002 [P] Remove `get_presigned_url` implementation; add `async def get(self, key: str) -> bytes` that opens an aiobotocore client, calls `client.get_object(Bucket=self._settings.s3_bucket_name, Key=key)`, and returns `await response["Body"].read()` in `api/app/storage/s3_backend.py`
|
||||
|
||||
**Checkpoint**: `ruff check api/` passes; `pytest api/` passes (all existing tests green — `serve_image_file` route still compiles but will change in Phase 3).
|
||||
|
||||
---
|
||||
|
||||
## Phase 3: User Story 1 — View Images Without Storage Configuration (Priority: P1) 🎯 MVP
|
||||
|
||||
**Goal**: Replace the 302 presigned-URL redirect with a 200 binary response so images load without any DNS or `/etc/hosts` configuration.
|
||||
|
||||
**Independent Test**: Start the application with no custom host entries. Open the library and detail page. All images load. Browser network tab shows only requests to `/api/v1/` — none to the storage hostname.
|
||||
|
||||
### Tests for User Story 1 (write FIRST — must FAIL before T004) ⚠️
|
||||
|
||||
- [x] T003 [US1] In `api/tests/integration/test_serving.py`: rename `test_file_redirect_returns_302` to `test_file_returns_200_with_content`; remove `follow_redirects=False`; replace the 302/Location assertions with: `response.status_code == 200`, `response.headers["content-type"].startswith("image/")`, `response.headers["etag"] == f'"{uploaded_image_hash}"'` (extract hash from upload response), `"immutable" in response.headers["cache-control"]`, and `len(response.content) > 0`; run `pytest api/tests/integration/test_serving.py::test_file_returns_200_with_content` and confirm it **fails**
|
||||
|
||||
### Implementation for User Story 1
|
||||
|
||||
- [x] T004 [US1] In `api/app/routers/images.py`: remove `RedirectResponse` from the `fastapi.responses` import; update `serve_image_file` to (a) call `data = await storage.get(image.storage_key)` inside a `try/except Exception` that raises an HTTPException with status 500 and code `"storage_error"`, then (b) return `Response(content=data, media_type=image.mime_type, headers={"ETag": f'"{image.hash}"', "Cache-Control": "public, max-age=31536000, immutable"})`
|
||||
- [x] T005 [US1] Run `pytest api/tests/integration/test_serving.py` and confirm both `test_file_returns_200_with_content` and `test_file_unknown_id_returns_404` pass; then run `pytest api/` to confirm no regressions across all test files
|
||||
|
||||
**Checkpoint**: User Story 1 is fully functional. Images load via the API proxy with correct caching headers. No redirect to storage occurs.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4: User Story 2 — Storage Backend Remains Private (Priority: P1)
|
||||
|
||||
**Goal**: Verify, with an explicit test, that no storage-specific detail (hostname, presigned URL, bucket name) appears anywhere in any API response after the proxy change.
|
||||
|
||||
**Independent Test**: Run the new test suite. Inspect browser DevTools network tab: zero requests to the storage host; zero occurrences of "minio", "s3://", or "amazonaws.com" in any response header or body.
|
||||
|
||||
### Tests for User Story 2 (write FIRST — must FAIL on pre-T004 code) ⚠️
|
||||
|
||||
- [x] T006 [US2] In `api/tests/integration/test_serving.py`: add test `test_file_response_exposes_no_storage_details` that uploads an image, calls `GET /api/v1/images/{id}/file`, then asserts: (a) `"location" not in response.headers` (no redirect target), (b) `"minio" not in response.text.lower()`, (c) `"s3://" not in response.text.lower()`, (d) `"amazonaws.com" not in response.text.lower()`; run the test against the **pre-T004** code and confirm it fails on assertion (a) (`Location` header is present in the redirect response)
|
||||
|
||||
### Implementation for User Story 2
|
||||
|
||||
No new implementation — US2 is fully satisfied by the T004 router change. The test written in T006 must pass once T004 is complete.
|
||||
|
||||
- [x] T007 [US2] Run `pytest api/tests/integration/test_serving.py` and confirm `test_file_response_exposes_no_storage_details` passes; this confirms US2 is satisfied by the US1 implementation
|
||||
|
||||
**Checkpoint**: Both user stories verified. Storage backend is provably private. Full proxy behaviour confirmed with explicit assertions.
|
||||
|
||||
---
|
||||
|
||||
## Phase 5: Polish & Cross-Cutting Concerns
|
||||
|
||||
- [x] T008 [P] Run `ruff check api/ && ruff format --check api/` and fix any lint or formatting issues in `api/app/storage/backend.py`, `api/app/storage/s3_backend.py`, and `api/app/routers/images.py`
|
||||
- [x] T009 Run full test suite `pytest api/ -v` and confirm all tests pass (expected: existing 45 tests + 2 new/updated tests = 47 total); record final count
|
||||
|
||||
---
|
||||
|
||||
## Dependencies & Execution Order
|
||||
|
||||
### Phase Dependencies
|
||||
|
||||
- **Foundational (Phase 2)**: No dependencies — start immediately
|
||||
- **US1 (Phase 3)**: Depends on Phase 2 complete (T001 + T002 done)
|
||||
- **US2 (Phase 4)**: Depends on Phase 3 complete (T004 done, since T006 tests against pre-T004 code first)
|
||||
- **Polish (Phase 5)**: Depends on Phases 3 and 4 complete
|
||||
|
||||
### User Story Dependencies
|
||||
|
||||
- **US1**: Independent — requires only Phase 2
|
||||
- **US2**: Logically depends on US1 (its implementation task is zero — it reuses T004's work; T006 test is written pre-T004 to confirm it fails, then passes after T004)
|
||||
|
||||
### Within Each User Story
|
||||
|
||||
- Test tasks MUST be written and confirmed FAILING before implementation tasks run (§5.1)
|
||||
- T001 and T002 can run in parallel (different files)
|
||||
- T003 must precede T004
|
||||
- T005 validates T004
|
||||
- T006 is written pre-T004 but validated post-T004
|
||||
|
||||
---
|
||||
|
||||
## Parallel Example: Phase 2 (Foundational)
|
||||
|
||||
```bash
|
||||
# T001 and T002 touch different files — run in parallel:
|
||||
Task: "Remove get_presigned_url and add get() abstract method in api/app/storage/backend.py"
|
||||
Task: "Remove get_presigned_url impl and add get() body in api/app/storage/s3_backend.py"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Strategy
|
||||
|
||||
### MVP (Full feature — both user stories together)
|
||||
|
||||
1. Complete Phase 2: T001, T002 (parallel)
|
||||
2. Complete Phase 3: T003 (fail), T004 (implement), T005 (verify)
|
||||
3. Complete Phase 4: T006 (fail → passes), T007 (verify)
|
||||
4. Complete Phase 5: T008 (lint), T009 (final count)
|
||||
5. **STOP and VALIDATE**: Confirm zero browser requests to storage host in DevTools
|
||||
|
||||
### Total tasks: 9 (T001–T009)
|
||||
|
||||
---
|
||||
|
||||
## Notes
|
||||
|
||||
- [P] tasks touch different files and have no mutual dependencies
|
||||
- T006 must be run against pre-T004 code to confirm it fails — this is the TDD "red" step for US2
|
||||
- `storage_key` in image metadata responses is a SHA-256 hex string (not a URL); it does not constitute an exposed storage credential and is out of scope for this feature per research.md Decision 5
|
||||
- After this feature, `get_presigned_url` has no callers anywhere in the codebase — its removal is safe
|
||||
Reference in New Issue
Block a user