diff --git a/.env.example b/.env.example index b8be5e8..15673f4 100644 --- a/.env.example +++ b/.env.example @@ -19,3 +19,11 @@ JWT_SECRET_KEY=change-me-to-a-long-random-string JWT_EXPIRY_SECONDS=86400 OWNER_USERNAME=owner OWNER_PASSWORD=change-me + +# Login brute-force protection +LOGIN_MAX_FAILURES=5 +LOGIN_WINDOW_SECONDS=300 +LOGIN_COOLDOWN_SECONDS=900 +# Comma-separated IPs/CIDRs of trusted upstream proxies (e.g. nginx ingress pod CIDR). +# Leave empty when not behind a reverse proxy. +LOGIN_TRUSTED_PROXY_IPS= diff --git a/.env.test.example b/.env.test.example index 4fe8311..d70c424 100644 --- a/.env.test.example +++ b/.env.test.example @@ -27,3 +27,10 @@ OWNER_PASSWORD=testpassword # API API_BASE_URL=http://localhost:8000 MAX_UPLOAD_BYTES=52428800 + +# Login brute-force protection +LOGIN_MAX_FAILURES=5 +LOGIN_WINDOW_SECONDS=300 +LOGIN_COOLDOWN_SECONDS=900 +# Comma-separated IPs/CIDRs of trusted upstream proxies; leave empty for direct connections. +LOGIN_TRUSTED_PROXY_IPS= diff --git a/.specify/feature.json b/.specify/feature.json index 70bcc42..82ccf46 100644 --- a/.specify/feature.json +++ b/.specify/feature.json @@ -1 +1,3 @@ -{"feature_directory":"specs/008-postgres-integration-tests"} +{ + "feature_directory": "specs/009-login-rate-limiting" +} diff --git a/CLAUDE.md b/CLAUDE.md index 4b129fc..3a03eb1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,5 +1,5 @@ For additional context about technologies to be used, project structure, shell commands, and other important information, read the current plan at -`specs/008-postgres-integration-tests/plan.md`. +`specs/009-login-rate-limiting/plan.md`. diff --git a/api/app/auth/rate_limiter.py b/api/app/auth/rate_limiter.py new file mode 100644 index 0000000..fc7ba53 --- /dev/null +++ b/api/app/auth/rate_limiter.py @@ -0,0 +1,91 @@ +import ipaddress +import logging +import time +from dataclasses import dataclass, field +from ipaddress import IPv4Network, IPv6Network +from threading import Lock + +from starlette.requests import Request + +logger = logging.getLogger(__name__) + + +def get_client_ip( + request: Request, + trusted_networks: list[IPv4Network | IPv6Network], +) -> str: + """Return the resolved client IP, honouring X-Forwarded-For when the + TCP peer is a trusted upstream proxy. Falls back to the TCP peer address + when no trusted networks are configured or the peer is not in the list.""" + peer = request.client.host if request.client else "unknown" + if trusted_networks and peer != "unknown": + try: + peer_addr = ipaddress.ip_address(peer) + if any(peer_addr in net for net in trusted_networks): + xff = request.headers.get("X-Forwarded-For", "").split(",")[0].strip() + if xff: + return xff + real_ip = request.headers.get("X-Real-IP", "").strip() + if real_ip: + return real_ip + except ValueError: + pass + return peer + + +@dataclass +class _Record: + failures: int = 0 + window_start: float = field(default_factory=time.time) + blocked_until: float = 0.0 + + +class LoginRateLimiter: + def __init__( + self, + max_failures: int = 5, + window_seconds: int = 300, + cooldown_seconds: int = 900, + ) -> None: + self._max = max_failures + self._window = window_seconds + self._cooldown = cooldown_seconds + self._store: dict[str, _Record] = {} + self._lock = Lock() + + @property + def cooldown_seconds(self) -> int: + return self._cooldown + + def is_blocked(self, ip: str) -> bool: + now = time.time() + with self._lock: + rec = self._store.get(ip) + if rec is None: + return False + if rec.blocked_until > now: + return True + if rec.blocked_until > 0: + del self._store[ip] + return False + + def record_failure(self, ip: str) -> None: + now = time.time() + with self._lock: + rec = self._store.get(ip) + if rec is None: + rec = _Record(window_start=now) + self._store[ip] = rec + if now - rec.window_start > self._window: + rec.failures = 0 + rec.window_start = now + rec.failures += 1 + if rec.failures >= self._max: + rec.blocked_until = now + self._cooldown + logger.warning( + "Login blocked for %s after %d failures", ip, rec.failures + ) + + def record_success(self, ip: str) -> None: + with self._lock: + self._store.pop(ip, None) diff --git a/api/app/config.py b/api/app/config.py index a2d72be..5353e3c 100644 --- a/api/app/config.py +++ b/api/app/config.py @@ -18,6 +18,10 @@ class Settings(BaseSettings): jwt_expiry_seconds: int = 86400 owner_username: str owner_password: str + login_max_failures: int = 5 + login_window_seconds: int = 300 + login_cooldown_seconds: int = 900 + login_trusted_proxy_ips: str = "" @lru_cache diff --git a/api/app/main.py b/api/app/main.py index f751cd5..3d24880 100644 --- a/api/app/main.py +++ b/api/app/main.py @@ -1,17 +1,30 @@ -from contextlib import asynccontextmanager +import ipaddress +from contextlib import asynccontextmanager, suppress from fastapi import FastAPI, Request from fastapi.exceptions import HTTPException from fastapi.responses import JSONResponse +from app.auth.rate_limiter import LoginRateLimiter from app.config import get_settings from app.database import Base, get_engine @asynccontextmanager async def lifespan(application: FastAPI): - get_settings() - # Verify DB connection and run migrations on startup + settings = get_settings() + application.state.login_rate_limiter = LoginRateLimiter( + max_failures=settings.login_max_failures, + window_seconds=settings.login_window_seconds, + cooldown_seconds=settings.login_cooldown_seconds, + ) + trusted_networks = [] + for part in settings.login_trusted_proxy_ips.split(","): + part = part.strip() + if part: + with suppress(ValueError): + trusted_networks.append(ipaddress.ip_network(part, strict=False)) + application.state.login_trusted_networks = trusted_networks engine = get_engine() async with engine.begin() as conn: # In production, Alembic handles migrations; this is a dev convenience @@ -22,6 +35,10 @@ async def lifespan(application: FastAPI): app = FastAPI(title="Reactbin API", version="1.0.0", lifespan=lifespan) +# Defaults so app.state is populated even when lifespan doesn't run (e.g. tests) +app.state.login_rate_limiter = LoginRateLimiter() +app.state.login_trusted_networks = [] + @app.exception_handler(HTTPException) async def http_exception_handler(request: Request, exc: HTTPException): diff --git a/api/app/routers/auth.py b/api/app/routers/auth.py index 2b6784b..78b2c78 100644 --- a/api/app/routers/auth.py +++ b/api/app/routers/auth.py @@ -1,7 +1,9 @@ -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Request +from fastapi.responses import JSONResponse from pydantic import BaseModel from app.auth.jwt_provider import JWTAuthProvider +from app.auth.rate_limiter import LoginRateLimiter, get_client_ip from app.dependencies import get_jwt_auth router = APIRouter(tags=["auth"]) @@ -19,12 +21,32 @@ class TokenResponse(BaseModel): @router.post("/auth/token", response_model=TokenResponse) -async def login(body: LoginRequest, auth: JWTAuthProvider = Depends(get_jwt_auth)): +async def login( + request: Request, + body: LoginRequest, + auth: JWTAuthProvider = Depends(get_jwt_auth), +): + limiter: LoginRateLimiter = request.app.state.login_rate_limiter + ip: str = get_client_ip(request, request.app.state.login_trusted_networks) + + if limiter.is_blocked(ip): + return JSONResponse( + status_code=429, + content={ + "detail": "Too many failed login attempts. Please try again later.", + "code": "login_rate_limited", + }, + headers={"Retry-After": str(limiter.cooldown_seconds)}, + ) + if not auth.verify_credentials(body.username, body.password): + limiter.record_failure(ip) raise HTTPException( status_code=401, detail={"detail": "Invalid credentials", "code": "invalid_credentials"}, ) + + limiter.record_success(ip) token = auth.create_token() return TokenResponse( access_token=token, diff --git a/api/tests/integration/test_login_rate_limit.py b/api/tests/integration/test_login_rate_limit.py new file mode 100644 index 0000000..13de75e --- /dev/null +++ b/api/tests/integration/test_login_rate_limit.py @@ -0,0 +1,121 @@ +import os + +import pytest +from httpx import AsyncClient + +from app.auth.rate_limiter import LoginRateLimiter +from app.main import app + +BAD_CREDS = {"username": "attacker", "password": "wrong"} +VALID_CREDS = { + "username": os.environ.get("OWNER_USERNAME", "testowner"), + "password": os.environ.get("OWNER_PASSWORD", "testpassword"), +} + + +def _fresh_limiter(): + return LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=30) + + +@pytest.mark.asyncio +async def test_repeated_failures_trigger_429(client: AsyncClient): + original_limiter = app.state.login_rate_limiter + original_networks = app.state.login_trusted_networks + app.state.login_rate_limiter = _fresh_limiter() + app.state.login_trusted_networks = [] + try: + for _ in range(3): + await client.post("/api/v1/auth/token", json=BAD_CREDS) + resp = await client.post("/api/v1/auth/token", json=BAD_CREDS) + assert resp.status_code == 429 + assert resp.json()["code"] == "login_rate_limited" + finally: + app.state.login_rate_limiter = original_limiter + app.state.login_trusted_networks = original_networks + + +@pytest.mark.asyncio +async def test_success_resets_counter(client: AsyncClient): + original_limiter = app.state.login_rate_limiter + original_networks = app.state.login_trusted_networks + app.state.login_rate_limiter = _fresh_limiter() + app.state.login_trusted_networks = [] + try: + for _ in range(2): + await client.post("/api/v1/auth/token", json=BAD_CREDS) + await client.post("/api/v1/auth/token", json=VALID_CREDS) + for _ in range(3): + resp = await client.post("/api/v1/auth/token", json=BAD_CREDS) + assert resp.status_code == 401, "counter should have reset after success" + finally: + app.state.login_rate_limiter = original_limiter + app.state.login_trusted_networks = original_networks + + +@pytest.mark.asyncio +async def test_429_has_retry_after_header(client: AsyncClient): + original_limiter = app.state.login_rate_limiter + original_networks = app.state.login_trusted_networks + app.state.login_rate_limiter = _fresh_limiter() + app.state.login_trusted_networks = [] + try: + for _ in range(3): + await client.post("/api/v1/auth/token", json=BAD_CREDS) + resp = await client.post("/api/v1/auth/token", json=BAD_CREDS) + assert resp.status_code == 429 + assert "Retry-After" in resp.headers + assert int(resp.headers["Retry-After"]) > 0 + finally: + app.state.login_rate_limiter = original_limiter + app.state.login_trusted_networks = original_networks + + +@pytest.mark.asyncio +async def test_429_body_shape(client: AsyncClient): + original_limiter = app.state.login_rate_limiter + original_networks = app.state.login_trusted_networks + app.state.login_rate_limiter = _fresh_limiter() + app.state.login_trusted_networks = [] + try: + for _ in range(3): + await client.post("/api/v1/auth/token", json=BAD_CREDS) + resp = await client.post("/api/v1/auth/token", json=BAD_CREDS) + assert resp.status_code == 429 + assert resp.json() == { + "detail": "Too many failed login attempts. Please try again later.", + "code": "login_rate_limited", + } + finally: + app.state.login_rate_limiter = original_limiter + app.state.login_trusted_networks = original_networks + + +@pytest.mark.asyncio +async def test_xff_header_ignored_when_no_trusted_networks(client: AsyncClient): + original_limiter = app.state.login_rate_limiter + original_networks = app.state.login_trusted_networks + app.state.login_rate_limiter = _fresh_limiter() + app.state.login_trusted_networks = [] + try: + # Send 3 failures all claiming to be "1.2.3.4" via XFF + for _ in range(3): + await client.post( + "/api/v1/auth/token", + json=BAD_CREDS, + headers={"X-Forwarded-For": "1.2.3.4"}, + ) + # 4th request with a *different* XFF — if XFF were trusted, this + # would appear to be a fresh IP and get 401. Since XFF is ignored, + # the real peer ("testclient") is blocked and we get 429. + resp = await client.post( + "/api/v1/auth/token", + json=BAD_CREDS, + headers={"X-Forwarded-For": "9.9.9.9"}, + ) + assert resp.status_code == 429, ( + "XFF should be ignored when no trusted networks are configured; " + "expected real peer to be blocked" + ) + finally: + app.state.login_rate_limiter = original_limiter + app.state.login_trusted_networks = original_networks diff --git a/api/tests/unit/test_rate_limiter.py b/api/tests/unit/test_rate_limiter.py new file mode 100644 index 0000000..033f9b8 --- /dev/null +++ b/api/tests/unit/test_rate_limiter.py @@ -0,0 +1,98 @@ +import ipaddress +from unittest.mock import MagicMock + +from starlette.requests import Request + +from app.auth.rate_limiter import LoginRateLimiter, get_client_ip + +# --------------------------------------------------------------------------- +# LoginRateLimiter tests +# --------------------------------------------------------------------------- + + +def make_limiter(): + return LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=300) + + +def test_not_blocked_initially(): + assert make_limiter().is_blocked("1.2.3.4") is False + + +def test_blocked_after_threshold(): + limiter = make_limiter() + for _ in range(3): + limiter.record_failure("1.2.3.4") + assert limiter.is_blocked("1.2.3.4") is True + + +def test_success_clears_failures(): + limiter = make_limiter() + limiter.record_failure("1.2.3.4") + limiter.record_failure("1.2.3.4") + limiter.record_success("1.2.3.4") + assert limiter.is_blocked("1.2.3.4") is False + + +def test_ips_are_isolated(): + limiter = make_limiter() + for _ in range(3): + limiter.record_failure("1.1.1.1") + assert limiter.is_blocked("2.2.2.2") is False + + +def test_window_resets_after_expiry(): + import time + + limiter = LoginRateLimiter(max_failures=3, window_seconds=0, cooldown_seconds=300) + limiter.record_failure("1.2.3.4") + limiter.record_failure("1.2.3.4") + time.sleep(0.01) + limiter.record_failure("1.2.3.4") + # window expired — counter reset on third call, so failures = 1, not 3 + assert limiter.is_blocked("1.2.3.4") is False + + +def test_log_warning_on_lockout(caplog): + import logging + + limiter = make_limiter() + with caplog.at_level(logging.WARNING, logger="app.auth.rate_limiter"): + for _ in range(3): + limiter.record_failure("5.6.7.8") + assert "Login blocked" in caplog.text + assert "5.6.7.8" in caplog.text + + +# --------------------------------------------------------------------------- +# get_client_ip tests +# --------------------------------------------------------------------------- + + +def make_request(peer: str, headers: dict) -> MagicMock: + req = MagicMock(spec=Request) + req.client.host = peer + req.headers = headers + return req + + +def test_get_client_ip_no_trusted_networks_returns_peer(): + req = make_request("203.0.113.1", {"X-Forwarded-For": "10.0.0.1"}) + assert get_client_ip(req, []) == "203.0.113.1" + + +def test_get_client_ip_trusted_peer_uses_xff(): + req = make_request("10.0.0.1", {"X-Forwarded-For": "203.0.113.5"}) + nets = [ipaddress.ip_network("10.0.0.0/8")] + assert get_client_ip(req, nets) == "203.0.113.5" + + +def test_get_client_ip_untrusted_peer_ignores_xff(): + req = make_request("8.8.8.8", {"X-Forwarded-For": "203.0.113.5"}) + nets = [ipaddress.ip_network("10.0.0.0/8")] + assert get_client_ip(req, nets) == "8.8.8.8" + + +def test_get_client_ip_trusted_peer_falls_back_to_real_ip(): + req = make_request("10.0.0.1", {"X-Real-IP": "203.0.113.9"}) + nets = [ipaddress.ip_network("10.0.0.0/8")] + assert get_client_ip(req, nets) == "203.0.113.9" diff --git a/specs/009-login-rate-limiting/checklists/requirements.md b/specs/009-login-rate-limiting/checklists/requirements.md new file mode 100644 index 0000000..9b1eb5d --- /dev/null +++ b/specs/009-login-rate-limiting/checklists/requirements.md @@ -0,0 +1,34 @@ +# Specification Quality Checklist: Login Brute-Force Protection + +**Purpose**: Validate specification completeness and quality before proceeding to planning +**Created**: 2026-05-06 +**Feature**: [spec.md](../spec.md) + +## Content Quality + +- [X] No implementation details (languages, frameworks, APIs) +- [X] Focused on user value and business needs +- [X] Written for non-technical stakeholders +- [X] All mandatory sections completed + +## Requirement Completeness + +- [X] No [NEEDS CLARIFICATION] markers remain +- [X] Requirements are testable and unambiguous +- [X] Success criteria are measurable +- [X] Success criteria are technology-agnostic (no implementation details) +- [X] All acceptance scenarios are defined +- [X] Edge cases are identified +- [X] Scope is clearly bounded +- [X] Dependencies and assumptions identified + +## Feature Readiness + +- [X] All functional requirements have clear acceptance criteria +- [X] User scenarios cover primary flows +- [X] Feature meets measurable outcomes defined in Success Criteria +- [X] No implementation details leak into specification + +## Notes + +- All items pass. Spec is ready for `/speckit-plan`. diff --git a/specs/009-login-rate-limiting/contracts/auth.md b/specs/009-login-rate-limiting/contracts/auth.md new file mode 100644 index 0000000..46c6830 --- /dev/null +++ b/specs/009-login-rate-limiting/contracts/auth.md @@ -0,0 +1,85 @@ +# API Contract: Authentication + +## POST /api/v1/auth/token + +Authenticates the owner and returns a JWT access token. + +**This endpoint is modified by feature 009** to enforce brute-force protection. +All previous behaviour is preserved. One new response code (429) is added. + +### Request + +``` +POST /api/v1/auth/token +Content-Type: application/json +``` + +```json +{ + "username": "string", + "password": "string" +} +``` + +### Responses + +#### 200 OK — Credentials accepted + +```json +{ + "access_token": "", + "token_type": "bearer", + "expires_in": 86400 +} +``` + +Side effect: resets the failure counter for the caller's IP address. + +--- + +#### 401 Unauthorized — Credentials rejected + +```json +{ + "detail": "Invalid credentials", + "code": "invalid_credentials" +} +``` + +Side effect: increments the failure counter for the caller's IP address. If the +counter reaches `LOGIN_MAX_FAILURES`, subsequent requests from this IP will receive +429 until the cooldown expires. + +--- + +#### 429 Too Many Requests — Source blocked after repeated failures + +**This response is new in feature 009.** + +``` +HTTP/1.1 429 Too Many Requests +Retry-After: 900 +Content-Type: application/json +``` + +```json +{ + "detail": "Too many failed login attempts. Please try again later.", + "code": "login_rate_limited" +} +``` + +The `Retry-After` header value is the configured cooldown duration in seconds (default: 900). +It reflects the maximum possible wait, not the exact remaining lockout time. + +No credentials are verified when this response is returned — the request is +rejected before authentication is attempted. + +--- + +### Notes + +- The failure counter is per source IP address (TCP peer, not forwarded headers). +- Threshold values (`LOGIN_MAX_FAILURES`, `LOGIN_WINDOW_SECONDS`, `LOGIN_COOLDOWN_SECONDS`) + are not disclosed in any response. +- Counters are in-memory and reset on process restart. diff --git a/specs/009-login-rate-limiting/data-model.md b/specs/009-login-rate-limiting/data-model.md new file mode 100644 index 0000000..957b33b --- /dev/null +++ b/specs/009-login-rate-limiting/data-model.md @@ -0,0 +1,53 @@ +# Data Model: Login Brute-Force Protection + +## Overview + +This feature introduces no new database tables. The only data entity is a transient, +in-memory rate-limit record that does not survive process restarts. This is intentional +(see research.md Decision 3). + +--- + +## Entity: Rate-Limit Record (in-memory only) + +| Field | Type | Description | +|----------------|---------|-----------------------------------------------------------------------------| +| `failures` | int | Count of consecutive failed login attempts in the current window | +| `window_start` | float | Unix timestamp marking when the current counting window began | +| `blocked_until`| float | Unix timestamp after which the source is no longer blocked; 0.0 if not blocked | + +**Keyed by**: resolved client IP address string (e.g., `"192.168.1.1"`); see `get_client_ip()` in `rate_limiter.py` for resolution logic + +**Lifecycle**: +1. Record is created on the first failed login from a source. +2. `failures` increments on each subsequent failure within the window. +3. When `failures >= LOGIN_MAX_FAILURES`, `blocked_until` is set to `now + LOGIN_COOLDOWN_SECONDS`. +4. When `blocked_until` has passed, the record is deleted on the next request from that source. +5. A successful login deletes the record immediately (failure counter reset). +6. If `now - window_start > LOGIN_WINDOW_SECONDS` without triggering lockout, the counter resets within the existing record. + +**State machine**: + +``` +[no record] + │ first failure + ▼ +[tracking] ──── failure N ≥ max ────► [blocked] + │ │ + │ success / window expires │ cooldown expires + ▼ ▼ +[no record] ◄─────────────────────── [no record] +``` + +--- + +## Configuration Entity: Rate-Limit Settings + +Stored as environment variables; loaded via `app.config.Settings`: + +| Env Var | Default | Description | +|----------------------------|---------|----------------------------------------------------------| +| `LOGIN_MAX_FAILURES` | `5` | Failures within window before lockout | +| `LOGIN_WINDOW_SECONDS` | `300` | Rolling window duration in seconds (5 minutes) | +| `LOGIN_COOLDOWN_SECONDS` | `900` | Lockout duration in seconds after threshold exceeded (15 minutes) | +| `LOGIN_TRUSTED_PROXY_IPS` | `""` | Comma-separated IPs/CIDRs of trusted upstream proxies (e.g., `10.0.0.0/8`); empty = disabled | diff --git a/specs/009-login-rate-limiting/plan.md b/specs/009-login-rate-limiting/plan.md new file mode 100644 index 0000000..ed7a1e2 --- /dev/null +++ b/specs/009-login-rate-limiting/plan.md @@ -0,0 +1,388 @@ +# Implementation Plan: Login Brute-Force Protection + +**Branch**: `009-login-rate-limiting` | **Date**: 2026-05-06 | **Spec**: [spec.md](spec.md) +**Input**: Feature specification from `specs/009-login-rate-limiting/spec.md` + +## Summary + +Add failure-counting brute-force protection to the login endpoint (`POST /api/v1/auth/token`). +After a configurable number of consecutive failed attempts from the same resolved client IP, +the endpoint returns HTTP 429 with a `Retry-After` header for a configurable cooldown period. +A successful login resets the counter. All thresholds are configurable via environment variables. +When deployed behind a reverse proxy (nginx, Kubernetes ingress), a `LOGIN_TRUSTED_PROXY_IPS` +setting enables extraction of the real client IP from `X-Forwarded-For`. No new infrastructure +(no Redis, no new DB table) — counters live in process memory. + +--- + +## Technical Context + +**Language/Version**: Python 3.12+ +**Primary Dependencies**: FastAPI, pydantic-settings (already in use); no new dependencies added +**Storage**: In-memory `dict` (no persistence across restarts — intentional) +**Testing**: pytest + pytest-asyncio (existing test infrastructure) +**Target Platform**: Linux server (Docker) +**Project Type**: Web service (API only — this feature has no UI surface) +**Performance Goals**: Rate limiter adds negligible overhead (dict lookup + lock acquisition; sub-millisecond) +**Constraints**: Must not add new runtime service dependencies; must not change any auth behaviour for non-blocked sources +**Scale/Scope**: Single process, single user; in-memory store is sufficient + +--- + +## Constitution Check + +| Principle | Status | Notes | +|-----------|--------|-------| +| §2.4 Auth abstraction (AuthProvider interface) | ✅ Pass | Rate limiter is a guard *before* `JWTAuthProvider.verify_credentials()`, not a bypass of the interface | +| §2.5 DB abstraction (repository layer) | ✅ Pass | No database access; in-memory only | +| §2.6 No speculative abstraction | ✅ Pass | Concrete `LoginRateLimiter` class, no interface; only one implementation planned | +| §3.3 Error envelope (`detail` + `code`) | ✅ Pass | 429 response uses `{"detail": "...", "code": "login_rate_limited"}` | +| §5.1 TDD | ✅ Required | Tasks follow red → green order | +| §5.2 Integration tests against PostgreSQL | ✅ Pass | Integration test for the login endpoint will run against the Docker PostgreSQL stack | +| §7.2 Environment configuration | ✅ Pass | `LOGIN_MAX_FAILURES`, `LOGIN_WINDOW_SECONDS`, `LOGIN_COOLDOWN_SECONDS`, `LOGIN_TRUSTED_PROXY_IPS` from env vars | +| §7.3 Linting (ruff) | ✅ Required | All new files must pass `ruff check` | + +**Gate result**: No violations. Cleared to proceed. + +--- + +## Project Structure + +### Documentation (this feature) + +```text +specs/009-login-rate-limiting/ +├── plan.md ← this file +├── research.md ← decisions on approach +├── data-model.md ← rate-limit record entity +├── quickstart.md ← curl runbook +├── contracts/ +│ └── auth.md ← updated POST /api/v1/auth/token with 429 +└── tasks.md ← generated by /speckit-tasks +``` + +### Source Code Changes + +```text +api/ +├── app/ +│ ├── auth/ +│ │ ├── rate_limiter.py ← NEW: LoginRateLimiter class +│ │ ├── jwt_provider.py (unchanged) +│ │ ├── noop.py (unchanged) +│ │ └── provider.py (unchanged) +│ ├── config.py ← add login_max_failures, login_window_seconds, login_cooldown_seconds, login_trusted_proxy_ips +│ ├── main.py ← init LoginRateLimiter in lifespan, attach to app.state +│ └── routers/ +│ └── auth.py ← check rate limit before auth, record outcome +└── tests/ + ├── unit/ + │ └── test_rate_limiter.py ← NEW: unit tests for LoginRateLimiter logic + └── integration/ + └── test_login_rate_limit.py ← NEW: integration tests for 429 behaviour via HTTP +``` + +--- + +## Implementation Detail + +### `api/app/auth/rate_limiter.py` + +```python +import ipaddress +import logging +import time +from dataclasses import dataclass, field +from ipaddress import IPv4Network, IPv6Network +from threading import Lock + +from starlette.requests import Request + +logger = logging.getLogger(__name__) + + +def get_client_ip( + request: Request, + trusted_networks: list[IPv4Network | IPv6Network], +) -> str: + """Return the resolved client IP, honouring X-Forwarded-For when the + TCP peer is a trusted upstream proxy. Falls back to the TCP peer address + when no trusted networks are configured or the peer is not in the list.""" + peer = request.client.host if request.client else "unknown" + if trusted_networks and peer != "unknown": + try: + peer_addr = ipaddress.ip_address(peer) + if any(peer_addr in net for net in trusted_networks): + xff = request.headers.get("X-Forwarded-For", "").split(",")[0].strip() + if xff: + return xff + real_ip = request.headers.get("X-Real-IP", "").strip() + if real_ip: + return real_ip + except ValueError: + pass + return peer + + +@dataclass +class _Record: + failures: int = 0 + window_start: float = field(default_factory=time.time) + blocked_until: float = 0.0 + + +class LoginRateLimiter: + def __init__( + self, + max_failures: int = 5, + window_seconds: int = 300, + cooldown_seconds: int = 900, + ) -> None: + self._max = max_failures + self._window = window_seconds + self._cooldown = cooldown_seconds + self._store: dict[str, _Record] = {} + self._lock = Lock() + + @property + def cooldown_seconds(self) -> int: + return self._cooldown + + def is_blocked(self, ip: str) -> bool: + now = time.time() + with self._lock: + rec = self._store.get(ip) + if rec is None: + return False + if rec.blocked_until > now: + return True + if rec.blocked_until > 0: + del self._store[ip] + return False + + def record_failure(self, ip: str) -> None: + now = time.time() + with self._lock: + rec = self._store.get(ip) + if rec is None: + rec = _Record(window_start=now) + self._store[ip] = rec + if now - rec.window_start > self._window: + rec.failures = 0 + rec.window_start = now + rec.failures += 1 + if rec.failures >= self._max: + rec.blocked_until = now + self._cooldown + logger.warning( + "Login blocked for %s after %d failures", ip, rec.failures + ) + + def record_success(self, ip: str) -> None: + with self._lock: + self._store.pop(ip, None) +``` + +### `api/app/config.py` additions + +```python +login_max_failures: int = 5 +login_window_seconds: int = 300 +login_cooldown_seconds: int = 900 +login_trusted_proxy_ips: str = "" # comma-separated IPs/CIDRs; empty = disabled +``` + +### `api/app/main.py` lifespan update + +```python +import ipaddress + +from app.auth.rate_limiter import LoginRateLimiter + +@asynccontextmanager +async def lifespan(application: FastAPI): + settings = get_settings() + application.state.login_rate_limiter = LoginRateLimiter( + max_failures=settings.login_max_failures, + window_seconds=settings.login_window_seconds, + cooldown_seconds=settings.login_cooldown_seconds, + ) + trusted_networks = [] + for part in settings.login_trusted_proxy_ips.split(","): + part = part.strip() + if part: + try: + trusted_networks.append(ipaddress.ip_network(part, strict=False)) + except ValueError: + pass # invalid entry — skip silently + application.state.login_trusted_networks = trusted_networks + # ... existing DB setup unchanged + engine = get_engine() + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + yield + await engine.dispose() +``` + +### `api/app/routers/auth.py` update + +```python +from fastapi import APIRouter, Depends, HTTPException, Request +from fastapi.responses import JSONResponse +from pydantic import BaseModel + +from app.auth.jwt_provider import JWTAuthProvider +from app.auth.rate_limiter import LoginRateLimiter, get_client_ip +from app.dependencies import get_jwt_auth + +router = APIRouter(tags=["auth"]) + + +class LoginRequest(BaseModel): + username: str + password: str + + +class TokenResponse(BaseModel): + access_token: str + token_type: str = "bearer" + expires_in: int + + +@router.post("/auth/token", response_model=TokenResponse) +async def login( + request: Request, + body: LoginRequest, + auth: JWTAuthProvider = Depends(get_jwt_auth), +): + limiter: LoginRateLimiter = request.app.state.login_rate_limiter + ip: str = get_client_ip(request, request.app.state.login_trusted_networks) + + if limiter.is_blocked(ip): + return JSONResponse( + status_code=429, + content={ + "detail": "Too many failed login attempts. Please try again later.", + "code": "login_rate_limited", + }, + headers={"Retry-After": str(limiter.cooldown_seconds)}, + ) + + if not auth.verify_credentials(body.username, body.password): + limiter.record_failure(ip) + raise HTTPException( + status_code=401, + detail={"detail": "Invalid credentials", "code": "invalid_credentials"}, + ) + + limiter.record_success(ip) + token = auth.create_token() + return TokenResponse( + access_token=token, + token_type="bearer", + expires_in=auth._expiry_seconds, + ) +``` + +### `api/tests/unit/test_rate_limiter.py` (representative cases) + +```python +import time +import pytest +from app.auth.rate_limiter import LoginRateLimiter + + +def test_not_blocked_initially(): + limiter = LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=300) + assert limiter.is_blocked("1.2.3.4") is False + + +def test_blocked_after_threshold(): + limiter = LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=300) + for _ in range(3): + limiter.record_failure("1.2.3.4") + assert limiter.is_blocked("1.2.3.4") is True + + +def test_success_clears_failures(): + limiter = LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=300) + limiter.record_failure("1.2.3.4") + limiter.record_failure("1.2.3.4") + limiter.record_success("1.2.3.4") + assert limiter.is_blocked("1.2.3.4") is False + + +def test_ips_are_isolated(): + limiter = LoginRateLimiter(max_failures=2, window_seconds=60, cooldown_seconds=300) + limiter.record_failure("1.1.1.1") + limiter.record_failure("1.1.1.1") + assert limiter.is_blocked("2.2.2.2") is False +``` + +### `api/tests/integration/test_login_rate_limit.py` (representative cases) + +```python +import pytest +from httpx import AsyncClient + +# Uses the 'client' fixture (NoOpAuthProvider) from conftest — sufficient for this +# endpoint since we're testing the rate-limit layer, not auth correctness. +# The login endpoint instantiates its own limiter via app.state, so we need +# the full ASGI app. + +BAD_CREDS = {"username": "attacker", "password": "wrong"} + + +@pytest.mark.asyncio +async def test_repeated_failures_trigger_429(client: AsyncClient): + # Use a custom limiter with low threshold to avoid slow tests + # (the app.state.login_rate_limiter is set in lifespan; override for test) + from app.auth.rate_limiter import LoginRateLimiter + from app.main import app + original = app.state.login_rate_limiter + app.state.login_rate_limiter = LoginRateLimiter( + max_failures=3, window_seconds=60, cooldown_seconds=30 + ) + try: + for _ in range(3): + await client.post("/api/v1/auth/token", json=BAD_CREDS) + resp = await client.post("/api/v1/auth/token", json=BAD_CREDS) + assert resp.status_code == 429 + assert resp.json()["code"] == "login_rate_limited" + assert "Retry-After" in resp.headers + finally: + app.state.login_rate_limiter = original +``` + +--- + +## Implementation Phases + +### Phase 1 (MVP — P1): Blocking after repeated failures + +1. Add `login_max_failures`, `login_window_seconds`, `login_cooldown_seconds`, `login_trusted_proxy_ips` to `api/app/config.py` +2. Create `api/app/auth/rate_limiter.py` with `LoginRateLimiter` and `get_client_ip()` +3. Initialize rate limiter and parse trusted networks in `api/app/main.py` lifespan; attach both to `app.state` +4. Update `api/app/routers/auth.py` to resolve client IP via `get_client_ip()`, then check + record outcomes +5. Unit tests: `api/tests/unit/test_rate_limiter.py` +6. Integration tests: `api/tests/integration/test_login_rate_limit.py` + +### Phase 2 (US2 — observability): Logging and response hints + +Delivered as part of Phase 1 (the `logger.warning(...)` call and `Retry-After` header +are embedded in the same implementation). No separate phase needed. + +--- + +## Environment Variables to Add to `.env.example` + +```dotenv +# Login brute-force protection +LOGIN_MAX_FAILURES=5 +LOGIN_WINDOW_SECONDS=300 +LOGIN_COOLDOWN_SECONDS=900 +# Comma-separated IPs/CIDRs of trusted upstream proxies (e.g. nginx ingress pod CIDR). +# Leave empty when not behind a reverse proxy. +LOGIN_TRUSTED_PROXY_IPS= +``` + +These are optional (have defaults) so existing `.env` files without them continue working. diff --git a/specs/009-login-rate-limiting/quickstart.md b/specs/009-login-rate-limiting/quickstart.md new file mode 100644 index 0000000..09516c2 --- /dev/null +++ b/specs/009-login-rate-limiting/quickstart.md @@ -0,0 +1,112 @@ +# Quickstart: Login Brute-Force Protection + +## Prerequisites + +- API running (via `docker compose up` or locally with `.env` set) +- `curl` available + +--- + +## Scenario 1: Trigger the rate limiter + +Send 6 consecutive failed login attempts (default threshold is 5): + +```bash +for i in $(seq 1 6); do + echo "Attempt $i:" + curl -s -o /dev/null -w "%{http_code}\n" \ + -X POST http://localhost:8000/api/v1/auth/token \ + -H "Content-Type: application/json" \ + -d '{"username": "wrong", "password": "wrong"}' +done +``` + +Expected output: +``` +Attempt 1: 401 +Attempt 2: 401 +Attempt 3: 401 +Attempt 4: 401 +Attempt 5: 401 +Attempt 6: 429 +``` + +The 6th attempt returns 429. Inspect the headers: + +```bash +curl -i -X POST http://localhost:8000/api/v1/auth/token \ + -H "Content-Type: application/json" \ + -d '{"username": "wrong", "password": "wrong"}' +``` + +Expected headers include: +``` +HTTP/1.1 429 Too Many Requests +Retry-After: 900 +``` + +Expected body: +```json +{"detail": "Too many failed login attempts. Please try again later.", "code": "login_rate_limited"} +``` + +--- + +## Scenario 2: Successful login resets the counter + +Make some failed attempts, then log in with valid credentials: + +```bash +# Fail twice +for i in 1 2; do + curl -s -o /dev/null -w "fail $i: %{http_code}\n" \ + -X POST http://localhost:8000/api/v1/auth/token \ + -H "Content-Type: application/json" \ + -d '{"username": "wrong", "password": "wrong"}' +done + +# Succeed — resets counter +curl -s -o /dev/null -w "success: %{http_code}\n" \ + -X POST http://localhost:8000/api/v1/auth/token \ + -H "Content-Type: application/json" \ + -d '{"username": "'"$OWNER_USERNAME"'", "password": "'"$OWNER_PASSWORD"'"}' + +# Now fail 5 more times — counter was reset, so no 429 yet +for i in $(seq 1 5); do + curl -s -o /dev/null -w "fail after reset $i: %{http_code}\n" \ + -X POST http://localhost:8000/api/v1/auth/token \ + -H "Content-Type: application/json" \ + -d '{"username": "wrong", "password": "wrong"}' +done +``` + +Expected: all "fail after reset" lines return 401 (not 429), confirming the counter was reset. + +--- + +## Scenario 3: Observe log output + +While triggering the rate limiter (Scenario 1), watch API logs: + +```bash +docker compose logs -f api +``` + +After the threshold is crossed you should see a line like: + +``` +WARNING app.auth.rate_limiter:rate_limiter.py:NN Login blocked for 172.18.0.1 after 5 failures +``` + +--- + +## Environment variable overrides + +To test with a lower threshold without code changes: + +```bash +LOGIN_MAX_FAILURES=2 LOGIN_WINDOW_SECONDS=60 LOGIN_COOLDOWN_SECONDS=30 \ + uvicorn app.main:app --reload +``` + +Then only 2 failures trigger the lockout, and it clears after 30 seconds. diff --git a/specs/009-login-rate-limiting/research.md b/specs/009-login-rate-limiting/research.md new file mode 100644 index 0000000..7a186fd --- /dev/null +++ b/specs/009-login-rate-limiting/research.md @@ -0,0 +1,67 @@ +# Research: Login Brute-Force Protection + +## Decision 1: Library vs. custom implementation + +**Decision**: Custom in-memory failure tracker (no new library dependency) + +**Rationale**: The requirement is to count *failed* login attempts specifically and reset on success — not to rate-limit all requests regardless of outcome. Popular libraries like `slowapi` count all requests to a route, which would break FR-004 (reset on success) without significant workarounds. A purpose-built 60-line class is simpler, more auditable, and has no dependency footprint. + +**Alternatives considered**: +- `slowapi` (built on `limits`): Counts all requests, not failures. Requires patching the exception handler to decrement on success — fragile and non-obvious. +- `slowapi` with a custom key function: Could be done, but the library's storage model doesn't expose a "reset this key" API in a clean way. +- Redis-backed counter: Overkill for a single-user personal app with one instance. No new infrastructure justified. + +--- + +## Decision 2: Fixed window vs. sliding window + +**Decision**: Fixed window with per-source reset on successful login + +**Rationale**: Fixed window is simpler to implement correctly and sufficient for this use case. The main attack — rapid sequential guessing — is fully addressed. The known "burst at window boundary" weakness is irrelevant here because: (a) the cooldown period is separate from the counting window, and (b) a successful login resets the counter entirely. + +**Alternatives considered**: +- Sliding window: More accurate, but adds complexity (requires storing timestamps of each request). The marginal security benefit doesn't justify the implementation cost for a personal single-user app. + +--- + +## Decision 3: In-memory backing store + +**Decision**: Python `dict` keyed by source IP, protected by a threading `Lock` + +**Rationale**: The application runs as a single process. In-memory storage means counters reset on restart — this is acceptable and matches the "fail open" assumption in the spec. No new infrastructure (Redis, database table) is required. + +**Alternatives considered**: +- Database-backed counters: Persistent across restarts, but adds a DB round-trip to every login request (including successful ones). Not justified. +- Redis: Distributed-safe and persistent, but requires a new service dependency. Out of scope for a personal single-instance app. + +--- + +## Decision 4: Source identifier + +**Decision**: `request.client.host` (the TCP peer address) + +**Rationale**: The spec explicitly states not to trust `X-Forwarded-For` headers unless the app is known to be behind a trusted proxy. `request.client.host` in Starlette/FastAPI is the actual TCP peer IP — it cannot be spoofed by an attacker sending arbitrary headers. + +**Alternatives considered**: +- `X-Forwarded-For` first value: Spoofable if the app is not behind a trusted proxy (attacker can set arbitrary header values). +- `X-Real-IP`: Same spoofing concern. + +--- + +## Decision 5: 429 response and Retry-After header + +**Decision**: Return HTTP 429 with `{"detail": "...", "code": "login_rate_limited"}` and a `Retry-After` header set to the configured cooldown duration in seconds + +**Rationale**: HTTP 429 is the standard "Too Many Requests" status. The `Retry-After` header is explicitly mentioned in the spec (US2 acceptance scenario) and is required by RFC 6585 for rate-limit responses. Setting it to the *configured* cooldown (not the exact remaining time) satisfies FR-005: it doesn't reveal precise expiry, just the maximum wait. The response body follows §3.3 of the constitution (error envelope with `detail` and `code`). + +--- + +## Decision 6: Default threshold values + +**Decision**: `LOGIN_MAX_FAILURES=5`, `LOGIN_WINDOW_SECONDS=300` (5 min), `LOGIN_COOLDOWN_SECONDS=900` (15 min) + +**Rationale**: Industry standard for web apps. 5 attempts is enough for legitimate typos but makes brute-force infeasible at human scale. A 5-minute counting window matches typical "I fat-fingered my password" retry patterns. A 15-minute cooldown is a meaningful deterrent without locking out a legitimate owner indefinitely. + +**Alternatives considered**: +- 3 failures / 60 s window / 300 s cooldown: More aggressive, but too likely to lock out the legitimate owner on a bad day. +- 10 failures: Too permissive for a brute-force defense. diff --git a/specs/009-login-rate-limiting/spec.md b/specs/009-login-rate-limiting/spec.md new file mode 100644 index 0000000..7bcea9f --- /dev/null +++ b/specs/009-login-rate-limiting/spec.md @@ -0,0 +1,84 @@ +# Feature Specification: Login Brute-Force Protection + +**Feature Branch**: `009-login-rate-limiting` +**Created**: 2026-05-06 +**Status**: Draft +**Input**: User description: "Login API endpoints should be rate limited or otherwise protected against brute force attacks" + +## User Scenarios & Testing *(mandatory)* + +### User Story 1 - Repeated failed logins are blocked (Priority: P1) + +An attacker (or misconfigured client) sending many rapid login attempts with the wrong password is slowed or blocked before they can exhaustively guess credentials. After a threshold number of consecutive failures from the same source, the system refuses further attempts for a cooldown period and returns a clear, non-leaking error. + +**Why this priority**: Directly prevents credential-stuffing and brute-force attacks against the sole privileged account. Without this, the owner account is exposed to automated password guessing with no friction. + +**Independent Test**: Send more than the allowed number of failed login requests in quick succession and confirm that subsequent attempts are rejected with a rate-limit or lockout response — without knowing or changing the real password. + +**Acceptance Scenarios**: + +1. **Given** an attacker sends N+1 failed login attempts within the configured window, **When** the (N+1)th request arrives, **Then** the system returns an error response indicating the request is blocked (not the normal "invalid credentials" error) and does not process the login attempt. +2. **Given** a legitimate user has been temporarily blocked after too many failures, **When** the cooldown period elapses and they retry with the correct password, **Then** they are logged in successfully. +3. **Given** a legitimate user makes a few failed attempts and then waits beyond the cooldown window, **When** they retry within the next window, **Then** their failure counter resets and they are not blocked. + +--- + +### User Story 2 - Operators can observe and reason about blocking activity (Priority: P2) + +When the protection triggers, the system produces enough observable signal (log entries, response metadata) that an operator can confirm the feature is working, diagnose false positives, and tune thresholds — without exposing sensitive details to the client. + +**Why this priority**: Invisible security controls are unmanageable. Operators need to know the system is doing what it claims, and blocked legitimate users need a clear (but not exploitable) explanation. + +**Independent Test**: Trigger the rate limiter and confirm that: (a) the response body or headers communicate that the request was blocked and when the client may retry; (b) the server logs an entry identifying the blocked source and the reason. + +**Acceptance Scenarios**: + +1. **Given** a source is blocked, **When** they receive the rejection response, **Then** the response indicates they should wait before retrying (e.g., a `Retry-After` hint) without disclosing the exact threshold values. +2. **Given** the rate limiter fires, **When** an operator inspects server logs, **Then** there is a log entry at WARNING level or above recording the blocked source and timestamp. + +--- + +### Edge Cases + +- What happens when a distributed attacker rotates IPs to avoid per-IP limits? +- How does the system behave if the backing store for rate-limit counters is temporarily unavailable — does it fail open (allow all) or fail closed (block all)? +- Are IPv6 addresses and IPv4-mapped-IPv6 addresses treated consistently? +- Does a successful login reset the failure counter for that source? +- What happens if many legitimate users share a NAT/proxy IP (e.g., corporate network)? +- What if `TRUSTED_PROXY_IPS` is configured to include an IP that an external attacker controls? (An attacker could then spoof `X-Forwarded-For` and rotate fake source IPs to bypass the rate limiter — operators must only list genuinely trusted upstream infrastructure.) + +## Requirements *(mandatory)* + +### Functional Requirements + +- **FR-001**: The system MUST enforce a maximum number of failed login attempts per source identifier (the resolved client IP address) within a rolling time window before blocking further attempts. +- **FR-002**: Once a source exceeds the failure threshold, the system MUST reject subsequent login requests for a configurable cooldown period, returning a distinct response (not the normal invalid-credentials response). +- **FR-003**: After the cooldown period expires, the system MUST permit the source to attempt login again, resetting its failure count. +- **FR-004**: A successful login MUST reset the failure counter for that source, preventing accumulation of old failures from blocking future legitimate access. +- **FR-005**: The rejection response MUST NOT reveal the specific threshold values or remaining lockout duration in a way that aids an attacker in timing their attempts, but MUST provide enough information (e.g., "try again later") for a legitimate user to understand the situation. +- **FR-006**: The system MUST log a structured warning event whenever a source is blocked, including the source identifier and timestamp. +- **FR-007**: Rate-limit thresholds (maximum attempts, window duration, cooldown duration) MUST be configurable without code changes. +- **FR-008**: The system MUST support a configurable list of trusted upstream proxy IP addresses and CIDR ranges. When the TCP peer address matches a trusted proxy, the resolved client IP MUST be extracted from the `X-Forwarded-For` request header (first entry) or, if absent, `X-Real-IP`. When no trusted proxies are configured, the TCP peer address MUST be used directly and forwarded-IP headers MUST be ignored. + +### Key Entities + +- **Rate-limit record**: Tracks the number of consecutive failures and the window start time for a given source identifier; expires automatically after the cooldown period. +- **Source identifier**: The resolved client IP address used to key rate-limit records. When `LOGIN_TRUSTED_PROXY_IPS` is empty (default), this is the TCP peer address. When one or more proxy IPs/CIDRs are configured and the TCP peer matches, the first `X-Forwarded-For` entry (or `X-Real-IP`) is used instead. + +## Success Criteria *(mandatory)* + +### Measurable Outcomes + +- **SC-001**: An automated script sending 100 consecutive failed login requests completes with at least 90 of those requests rejected after the threshold is crossed — verified in a controlled test environment. +- **SC-002**: A legitimate user who has been temporarily blocked can successfully log in within 5 minutes of the cooldown period expiring without any manual intervention. +- **SC-003**: Zero information about threshold values or exact lockout expiry is present in blocked response bodies or headers. +- **SC-004**: Every blocking event produces a corresponding log entry; 100% of triggered blocking events are observable in logs during testing. + +## Assumptions + +- The application has a single login endpoint used by all clients (the owner login introduced in feature 004). +- Source identification uses the resolved client IP address. By default (when `LOGIN_TRUSTED_PROXY_IPS` is empty) this is the TCP peer address. When one or more proxy IPs/CIDRs are configured, the first entry of `X-Forwarded-For` (or `X-Real-IP`) is used instead — but only when the TCP peer is in the trusted list, preventing header spoofing by external clients. +- If the rate-limit backing store is unavailable, the system fails open (allows the attempt through) rather than blocking all logins — this preserves the owner's access, which is critical for a single-user admin application. +- No CAPTCHA or multi-factor step is in scope; protection is purely count/time-based. +- The feature targets the login endpoint only; other endpoints are out of scope. +- The single-user nature of the app means IP-based identification is sufficient — there is no need for per-username lockout, and using IP (rather than username) avoids contributing to username enumeration risk. diff --git a/specs/009-login-rate-limiting/tasks.md b/specs/009-login-rate-limiting/tasks.md new file mode 100644 index 0000000..c8c1bdd --- /dev/null +++ b/specs/009-login-rate-limiting/tasks.md @@ -0,0 +1,120 @@ +# Tasks: Login Brute-Force Protection + +**Input**: Design documents from `specs/009-login-rate-limiting/` +**Prerequisites**: plan.md ✅, spec.md ✅, research.md ✅, data-model.md ✅, contracts/auth.md ✅, quickstart.md ✅ + +**Tests**: TDD is non-negotiable (§5.1). Every test task appears before the implementation task it covers. For each red step, run the test and confirm it fails before proceeding to the implementation. + +**Organization**: Phase 1 adds env vars; Phase 2 adds config fields (shared by both stories); Phase 3 implements the core blocking behaviour (US1 MVP); Phase 4 adds observability-specific test coverage (US2); Phase 5 is polish. + +## Format: `[ID] [P?] [Story] Description` + +- **[P]**: Can run in parallel with other [P] tasks in the same phase +- **[Story]**: Which user story this task belongs to +- Exact file paths included in every task description + +--- + +## Phase 1: Setup + +- [X] T001 Add a `# Login brute-force protection` comment block with `LOGIN_MAX_FAILURES=5`, `LOGIN_WINDOW_SECONDS=300`, `LOGIN_COOLDOWN_SECONDS=900`, and `LOGIN_TRUSTED_PROXY_IPS=` (empty by default, with an inline comment explaining it accepts comma-separated IPs/CIDRs) to both `.env.example` and `.env.test.example` at the repo root + +--- + +## Phase 2: Foundational + +**Purpose**: Add the three new settings fields — required before any story implementation. + +- [X] T002 Add `login_max_failures: int = 5`, `login_window_seconds: int = 300`, `login_cooldown_seconds: int = 900`, `login_trusted_proxy_ips: str = ""` to the `Settings` class in `api/app/config.py` (append after `owner_password`) + +**Checkpoint**: `api/app/config.py` accepts all three new env vars with defaults. + +--- + +## Phase 3: User Story 1 — Repeated failed logins are blocked (Priority: P1) 🎯 MVP + +**Goal**: After `LOGIN_MAX_FAILURES` consecutive failed login attempts from the same source IP within `LOGIN_WINDOW_SECONDS`, `POST /api/v1/auth/token` returns HTTP 429 for `LOGIN_COOLDOWN_SECONDS`. A successful login resets the counter. + +**Independent Test**: `cd api && python -m pytest tests/unit/test_rate_limiter.py tests/integration/test_login_rate_limit.py::test_repeated_failures_trigger_429 tests/integration/test_login_rate_limit.py::test_success_resets_counter tests/integration/test_login_rate_limit.py::test_429_has_retry_after_header tests/integration/test_login_rate_limit.py::test_xff_header_ignored_when_no_trusted_networks -v` — all pass. + +### Tests for User Story 1 (TDD red — write first, confirm failure before T005) + +- [X] T003 [P] [US1] Create `api/tests/unit/test_rate_limiter.py` with ten failing unit tests — import `LoginRateLimiter` and `get_client_ip` from `app.auth.rate_limiter`; for `LoginRateLimiter` (instantiate with `max_failures=3, window_seconds=60, cooldown_seconds=300`): `test_not_blocked_initially`, `test_blocked_after_threshold`, `test_success_clears_failures`, `test_ips_are_isolated`, `test_window_resets_after_expiry`, `test_log_warning_on_lockout` (caplog at WARNING level: call `record_failure()` until threshold, assert `"Login blocked" in caplog.text` and IP in log output); for `get_client_ip` (construct a mock using `from unittest.mock import MagicMock` and `from starlette.requests import Request`: `req = MagicMock(spec=Request); req.client.host = "10.0.0.1"; req.headers = {"X-Forwarded-For": "203.0.113.5"}`): `test_get_client_ip_no_trusted_networks_returns_peer` (empty `trusted_networks=[]` → returns `req.client.host`), `test_get_client_ip_trusted_peer_uses_xff` (peer `"10.0.0.1"` in trusted CIDR `"10.0.0.0/8"` → returns `"203.0.113.5"`), `test_get_client_ip_untrusted_peer_ignores_xff` (peer `"8.8.8.8"` not in trusted CIDR `"10.0.0.0/8"` → returns `"8.8.8.8"` despite XFF), `test_get_client_ip_trusted_peer_falls_back_to_real_ip` (peer trusted, no XFF header, `X-Real-IP: "203.0.113.9"` → returns `"203.0.113.9"`); run `python -m pytest tests/unit/test_rate_limiter.py -v` and confirm `ImportError` or `ModuleNotFoundError` (red) +- [X] T004 [P] [US1] Create `api/tests/integration/test_login_rate_limit.py` with four failing integration tests; each must override both `app.state.login_rate_limiter` (fresh `LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=30)`) and `app.state.login_trusted_networks` (set to `[]` for all four tests — the `ASGITransport` peer is `"testclient"`, not a valid IP, so trusted-network matching can't be exercised here; proxy extraction is fully covered by T003 unit tests) via try/finally: (1) `test_repeated_failures_trigger_429` — POST three bad-credential requests then assert fourth returns 429 with `resp.json()["code"] == "login_rate_limited"`; (2) `test_success_resets_counter` — two failures → one valid login using `{"username": os.environ["OWNER_USERNAME"], "password": os.environ["OWNER_PASSWORD"]}` (matching conftest.py defaults: `testowner`/`testpassword`) → three more failures → assert all three return 401, not 429; (3) `test_429_has_retry_after_header` — trigger lockout (three failures), then assert `"Retry-After" in resp.headers` and `int(resp.headers["Retry-After"]) > 0`; (4) `test_xff_header_ignored_when_no_trusted_networks` — send three bad-cred requests with `headers={"X-Forwarded-For": "1.2.3.4"}` then a fourth with `headers={"X-Forwarded-For": "9.9.9.9"}` — assert the fourth returns 429 (not 401), proving the limiter tracked the real peer `"testclient"` for all requests and XFF was ignored; run `python -m pytest tests/integration/test_login_rate_limit.py -v` and confirm failure (red) + +### Implementation for User Story 1 + +- [X] T005 [US1] Create `api/app/auth/rate_limiter.py` with two exports: (1) `get_client_ip(request: Request, trusted_networks: list[IPv4Network | IPv6Network]) -> str` — imports `ipaddress`, `from ipaddress import IPv4Network, IPv6Network`, `from starlette.requests import Request`; extracts `peer = request.client.host if request.client else "unknown"`; if `trusted_networks` is non-empty and peer is parseable as an IP address and falls within any trusted network, returns first `X-Forwarded-For` entry (strip whitespace) or `X-Real-IP` value, otherwise returns `peer`; wraps `ipaddress.ip_address(peer)` in `try/except ValueError` and falls back to `peer` on error; (2) `LoginRateLimiter` class: `__init__(self, max_failures: int = 5, window_seconds: int = 300, cooldown_seconds: int = 900)` storing params as `_max`, `_window`, `_cooldown`; `_store: dict[str, _Record]` and `_lock: threading.Lock`; `@dataclass _Record` with `failures: int = 0`, `window_start: float = field(default_factory=time.time)`, `blocked_until: float = 0.0`; `is_blocked(ip: str) -> bool`, `record_failure(ip: str) -> None` (logs WARNING on lockout), `record_success(ip: str) -> None`, `cooldown_seconds` property; stdlib imports: `import ipaddress, logging, time`, `from dataclasses import dataclass, field`, `from threading import Lock` +- [X] T006 [US1] Update `api/app/main.py` lifespan: add `import ipaddress` at top; import `LoginRateLimiter` from `app.auth.rate_limiter`; inside `lifespan` before `engine = get_engine()`, consolidate to `settings = get_settings()` (remove the existing bare `get_settings()` call), then set `application.state.login_rate_limiter = LoginRateLimiter(max_failures=settings.login_max_failures, window_seconds=settings.login_window_seconds, cooldown_seconds=settings.login_cooldown_seconds)`; then parse `settings.login_trusted_proxy_ips` — split on `","`, strip each part, skip empty strings, call `ipaddress.ip_network(part, strict=False)` inside a `try/except ValueError` (skip invalid entries silently), collect results into `trusted_networks: list`; set `application.state.login_trusted_networks = trusted_networks` +- [X] T007 [US1] Update `api/app/routers/auth.py` login endpoint: add `Request` to FastAPI imports and add `from fastapi.responses import JSONResponse`; add `from app.auth.rate_limiter import LoginRateLimiter, get_client_ip`; add `request: Request` as first parameter to `login()`; extract `limiter: LoginRateLimiter = request.app.state.login_rate_limiter` and `ip: str = get_client_ip(request, request.app.state.login_trusted_networks)`; add guard block — if `limiter.is_blocked(ip)`: return `JSONResponse(status_code=429, content={"detail": "Too many failed login attempts. Please try again later.", "code": "login_rate_limited"}, headers={"Retry-After": str(limiter.cooldown_seconds)})`; after `verify_credentials` returns False: call `limiter.record_failure(ip)` before the existing `HTTPException`; after `auth.create_token()`: call `limiter.record_success(ip)` before returning `TokenResponse` +- [X] T008 [US1] Verify TDD green: run `cd api && python -m pytest tests/unit/test_rate_limiter.py -v` — all 10 pass; run `make test-integration` — all tests pass including `test_repeated_failures_trigger_429`, `test_success_resets_counter`, `test_429_has_retry_after_header`, and `test_xff_header_ignored_when_no_trusted_networks` + +**Checkpoint**: Brute-force blocking is live. Automated repeated failures are stopped after threshold; the owner can still log in after cooldown; unit and integration tests pass. + +--- + +## Phase 4: User Story 2 — Operators can observe blocking activity (Priority: P2) + +**Goal**: The 429 response includes a `Retry-After` header with a positive integer; the response body `code` is `"login_rate_limited"` and contains no threshold numeric values; server logs a WARNING when blocking triggers. + +**Independent Test**: Trigger the rate limiter (already works from Phase 3) and assert `Retry-After` header is present in the response and `code` field is `"login_rate_limited"`. + +### Tests for User Story 2 (TDD red — extend existing file) + +- [X] T009 [US2] Add one test to `api/tests/integration/test_login_rate_limit.py` targeting observability properties not yet covered: `test_429_body_shape` — override `app.state.login_rate_limiter` with a fresh `LoginRateLimiter(max_failures=3, window_seconds=60, cooldown_seconds=30)` via try/finally (same isolation pattern as T004), trigger lockout (three failures), then assert `resp.json() == {"detail": "Too many failed login attempts. Please try again later.", "code": "login_rate_limited"}` (exact match — confirms no threshold values leak and shape is correct); confirm this test is green immediately against the US1 implementation (T007 already returns this exact body) + +**Checkpoint**: US2 observability properties are explicitly exercised by integration tests; a future regression in the Retry-After header or code field will be caught. + +--- + +## Phase 5: Polish & Cross-Cutting Concerns + +- [X] T010 Run `cd api && ruff check app/auth/rate_limiter.py app/routers/auth.py app/config.py app/main.py tests/unit/test_rate_limiter.py tests/integration/test_login_rate_limit.py` — fix any violations + +--- + +## Dependencies & Execution Order + +### Phase Dependencies + +- **Phase 1 (Setup)**: No external dependencies — can start immediately +- **Phase 2 (Foundational)**: No external dependencies — can start immediately (parallel with Phase 1) +- **Phase 3 (US1)**: Depends on Phase 2 (T002 must exist before T006 can use `settings.login_max_failures`) +- **Phase 4 (US2)**: Depends on Phase 3 (tests verify behaviour implemented in T007) +- **Phase 5 (Polish)**: Depends on all prior phases + +### Within Phase 3 + +- T003 ∥ T004 (different files, no dependency — write tests in parallel) +- T005 after T003, T004 (implement after tests confirm they fail) +- T006 ∥ T007 after T005 (both import from `rate_limiter.py`; write to different files — `main.py` and `auth.py`; T006 sets `app.state.login_trusted_networks` which T007's router reads) +- T008 after T005, T006, T007 (verify all pass) + +### Execution Order Summary + +``` +Step 1: T001 ∥ T002 (setup + foundational — parallel, different files) +Step 2: T003 ∥ T004 (write failing tests — parallel) +Step 3: T005 (implement LoginRateLimiter — after red tests confirmed) +Step 4: T006 ∥ T007 (wire limiter into app — parallel, different files) +Step 5: T008 (verify green) +Step 6: T009 (US2 observability tests — verify green immediately) +Step 7: T010 (ruff clean) +``` + +--- + +## Implementation Strategy + +### MVP (US1 — the blocker) + +1. Complete T001–T002 (config setup) +2. Complete T003–T008 (core blocking) +3. **Validate**: Run `make test-integration` — all 88 existing tests still pass; 2 new rate-limit tests pass +4. US2 adds verification coverage for already-implemented observability features + +### Incremental Delivery + +- After Phase 3: Brute-force attacks on the login endpoint are blocked — core security net is in place +- After Phase 4: Observability properties are explicitly tested — regressions in headers/logs will be caught +- After Phase 5: Lint clean, ready for merge