Feat: Replace UUID image identifiers with 8-character base62 short IDs

Short IDs become the canonical identifier in URLs (/i/:short_id),
MinIO/R2 storage keys, and all API responses. Hash-based deduplication
is preserved. Includes two-phase Alembic migration (003 adds nullable
column, 004 enforces NOT NULL) with a backfill script to copy storage
objects and populate short_id for existing images.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-10 00:13:55 +00:00
parent 87eb2703f5
commit 61d923d5be
41 changed files with 1445 additions and 137 deletions

View File

@@ -1,5 +1,3 @@
_BASE_ENV = {
"DATABASE_URL": "postgresql+asyncpg://u:p@localhost/db",
"S3_ENDPOINT_URL": "http://localhost:9000",
@@ -26,6 +24,7 @@ def test_settings_load_from_env(monkeypatch):
import importlib
import app.config as config_module
importlib.reload(config_module)
s = config_module.Settings()
@@ -43,6 +42,7 @@ def test_settings_max_upload_bytes_override(monkeypatch):
import importlib
import app.config as config_module
importlib.reload(config_module)
s = config_module.Settings()
@@ -55,6 +55,7 @@ def test_settings_jwt_expiry_override(monkeypatch):
import importlib
import app.config as config_module
importlib.reload(config_module)
s = config_module.Settings()
@@ -67,6 +68,7 @@ def test_api_docs_enabled_default(monkeypatch):
import importlib
import app.config as config_module
importlib.reload(config_module)
s = config_module.Settings()
@@ -79,6 +81,7 @@ def test_api_docs_enabled_false(monkeypatch):
import importlib
import app.config as config_module
importlib.reload(config_module)
s = config_module.Settings()
@@ -91,6 +94,7 @@ def test_api_docs_invalid_value_defaults_to_enabled(monkeypatch):
import importlib
import app.config as config_module
importlib.reload(config_module)
s = config_module.Settings()

View File

@@ -1,6 +1,6 @@
import hashlib
from app.utils import compute_sha256
from app.utils import compute_sha256, generate_short_id
def test_sha256_known_bytes():
@@ -19,3 +19,24 @@ def test_sha256_returns_64_char_hex():
result = compute_sha256(b"test data")
assert len(result) == 64
assert all(c in "0123456789abcdef" for c in result)
def test_generate_short_id_length():
assert len(generate_short_id()) == 8
def test_generate_short_id_charset():
result = generate_short_id()
assert all(
c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" for c in result
)
def test_generate_short_id_randomness():
assert generate_short_id() != generate_short_id()
def test_generate_short_id_importable():
from app.utils import generate_short_id as fn
assert callable(fn)

View File

@@ -0,0 +1,110 @@
"""Unit tests for migrate_to_short_ids script logic."""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
@pytest.fixture
def mock_image_null_short_id():
img = MagicMock()
img.id = "img-uuid-1"
img.short_id = None
img.storage_key = "oldhashkey1234567890"
img.thumbnail_key = "oldhashkey1234567890-thumb"
img.mime_type = "image/jpeg"
return img
@pytest.fixture
def mock_image_with_short_id():
img = MagicMock()
img.id = "img-uuid-2"
img.short_id = "AbCd1234"
img.storage_key = "AbCd1234"
img.thumbnail_key = "AbCd1234-thumb"
img.mime_type = "image/jpeg"
return img
@pytest.mark.asyncio
async def test_migrate_processes_image_without_short_id(mock_image_null_short_id):
"""Images with short_id IS NULL are processed: storage copied, DB updated, old keys deleted."""
from scripts.migrate_to_short_ids import migrate_image
storage = MagicMock()
storage.get = AsyncMock(return_value=b"imagedata")
storage.put = AsyncMock()
storage.delete = AsyncMock()
session = MagicMock()
session.execute = AsyncMock()
session.flush = AsyncMock()
old_key = mock_image_null_short_id.storage_key
new_short_id = "NewSh123"
with patch("scripts.migrate_to_short_ids.generate_short_id", return_value=new_short_id):
result = await migrate_image(mock_image_null_short_id, storage, session)
assert result is True
storage.put.assert_any_call(new_short_id, b"imagedata", "image/jpeg")
storage.delete.assert_any_call(old_key)
@pytest.mark.asyncio
async def test_migrate_skips_image_with_short_id(mock_image_with_short_id):
"""Images that already have a short_id are skipped."""
from scripts.migrate_to_short_ids import migrate_image
storage = MagicMock()
session = MagicMock()
result = await migrate_image(mock_image_with_short_id, storage, session)
assert result is False
storage.get.assert_not_called() if hasattr(storage.get, "assert_not_called") else None
@pytest.mark.asyncio
async def test_migrate_continues_on_storage_error(mock_image_null_short_id):
"""If storage copy fails, error is logged and migrate_image returns False without aborting."""
from scripts.migrate_to_short_ids import migrate_image
storage = MagicMock()
storage.get = AsyncMock(side_effect=Exception("storage read error"))
storage.put = AsyncMock()
storage.delete = AsyncMock()
session = MagicMock()
session.execute = AsyncMock()
session.flush = AsyncMock()
with patch("scripts.migrate_to_short_ids.generate_short_id", return_value="ErrSh123"):
result = await migrate_image(mock_image_null_short_id, storage, session)
assert result is False
storage.put.assert_not_called()
@pytest.mark.asyncio
async def test_migrate_summary_counts(mock_image_null_short_id, mock_image_with_short_id):
"""run_migration reports correct migrated and skipped counts."""
from scripts.migrate_to_short_ids import run_migration
storage = MagicMock()
storage.get = AsyncMock(return_value=b"data")
storage.put = AsyncMock()
storage.delete = AsyncMock()
session = MagicMock()
session.execute = AsyncMock()
session.flush = AsyncMock()
images = [mock_image_null_short_id, mock_image_with_short_id]
with patch("scripts.migrate_to_short_ids.generate_short_id", return_value="NewSh999"):
migrated, skipped, failed = await run_migration(images, storage, session)
assert migrated == 1
assert skipped == 1
assert failed == 0

View File

@@ -0,0 +1,59 @@
"""Unit tests for short_id generation, validation, and repository lookup."""
import re
from unittest.mock import AsyncMock, MagicMock
import pytest
from fastapi import HTTPException
from app.routers.images import _validate_short_id
from app.utils import generate_short_id
_SHORT_ID_RE = re.compile(r"^[a-zA-Z0-9]{8}$")
def test_validate_short_id_accepts_valid():
_validate_short_id("AbCd1234") # must not raise
def test_validate_short_id_rejects_too_long():
with pytest.raises(HTTPException) as exc:
_validate_short_id("toolong!!")
assert exc.value.status_code == 422
def test_validate_short_id_rejects_too_short():
with pytest.raises(HTTPException) as exc:
_validate_short_id("short")
assert exc.value.status_code == 422
def test_validate_short_id_rejects_invalid_chars():
with pytest.raises(HTTPException) as exc:
_validate_short_id("has spa!")
assert exc.value.status_code == 422
def test_generate_short_id_unique():
ids = {generate_short_id() for _ in range(100)}
assert len(ids) > 90 # collision in 100 draws would be astronomically unlikely
def test_repo_get_by_short_id_uses_correct_field():
"""get_by_short_id selects on Image.short_id, not Image.id."""
import asyncio
from app.repositories.image_repo import ImageRepository
mock_session = MagicMock()
scalar = MagicMock()
scalar.scalar_one_or_none = MagicMock(return_value=None)
mock_session.execute = AsyncMock(return_value=scalar)
repo = ImageRepository(mock_session)
asyncio.get_event_loop().run_until_complete(repo.get_by_short_id("AbCd1234"))
call_args = mock_session.execute.call_args[0][0]
compiled = call_args.compile(compile_kwargs={"literal_binds": True})
assert "short_id" in str(compiled)
assert "AbCd1234" in str(compiled)

View File

@@ -2,17 +2,21 @@
T037 — tag normalisation: uppercase → lowercase, whitespace stripped
T038 — tag validation: rejects names > 64 chars, invalid chars
"""
import pytest
from app.repositories.tag_repo import TagRepository
@pytest.mark.parametrize("raw,expected", [
("Cat", "cat"),
(" funny ", "funny"),
("REACTION", "reaction"),
(" MiXeD ", "mixed"),
])
@pytest.mark.parametrize(
"raw,expected",
[
("Cat", "cat"),
(" funny ", "funny"),
("REACTION", "reaction"),
(" MiXeD ", "mixed"),
],
)
def test_normalise_lowercases_and_strips(raw, expected):
assert TagRepository.normalise(raw) == expected

View File

@@ -1,4 +1,5 @@
"""Unit tests for thumbnail generation utility."""
import io
from PIL import Image as PILImage

View File

@@ -1,14 +1,13 @@
import uuid
from unittest.mock import MagicMock
import pytest
from app.routers.images import _image_to_dict
def _make_image(*, thumbnail_key=None):
img = MagicMock()
img.id = uuid.UUID("00000000-0000-0000-0000-000000000001")
img.short_id = "AbCd1234"
img.hash = "abc123"
img.filename = "test.jpg"
img.mime_type = "image/jpeg"
@@ -27,6 +26,7 @@ def test_cdn_configured_with_thumbnail():
result = _image_to_dict(img, cdn_base="https://cdn.example.com")
assert result["file_url"] == "https://cdn.example.com/abc123storagekey"
assert result["thumbnail_url"] == "https://cdn.example.com/abc123storagekey-thumb"
assert result["short_id"] == "AbCd1234"
def test_cdn_configured_no_thumbnail():
@@ -34,19 +34,20 @@ def test_cdn_configured_no_thumbnail():
result = _image_to_dict(img, cdn_base="https://cdn.example.com")
assert result["file_url"] == "https://cdn.example.com/abc123storagekey"
assert result["thumbnail_url"] is None
assert result["short_id"] == "AbCd1234"
def test_no_cdn_with_thumbnail():
img = _make_image(thumbnail_key="abc123storagekey-thumb")
result = _image_to_dict(img, cdn_base=None)
assert result["file_url"] == "/api/v1/images/00000000-0000-0000-0000-000000000001/file"
assert result["thumbnail_url"] == "/api/v1/images/00000000-0000-0000-0000-000000000001/thumbnail"
assert result["file_url"] == "/api/v1/i/AbCd1234/file"
assert result["thumbnail_url"] == "/api/v1/i/AbCd1234/thumbnail"
def test_no_cdn_no_thumbnail():
img = _make_image(thumbnail_key=None)
result = _image_to_dict(img, cdn_base=None)
assert result["file_url"] == "/api/v1/images/00000000-0000-0000-0000-000000000001/file"
assert result["file_url"] == "/api/v1/i/AbCd1234/file"
assert result["thumbnail_url"] is None
@@ -63,3 +64,9 @@ def test_cdn_trailing_whitespace_normalised():
result = _image_to_dict(img, cdn_base="https://cdn.example.com ")
assert result["file_url"] == "https://cdn.example.com/abc123storagekey"
assert result["thumbnail_url"] == "https://cdn.example.com/abc123storagekey-thumb"
def test_short_id_in_response():
img = _make_image()
result = _image_to_dict(img, cdn_base=None)
assert result["short_id"] == "AbCd1234"