from datetime import datetime from uuid import UUID import httpx from fastapi import APIRouter, Depends, HTTPException, status from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from proxy_pool.common.dependencies import get_db, get_registry from proxy_pool.config import get_settings from proxy_pool.plugins.registry import PluginRegistry from proxy_pool.proxy.models import Proxy, ProxySource from proxy_pool.proxy.schemas import ( ProxyListParams, ProxyListResponse, ProxyResponse, ProxySourceCreate, ProxySourceResponse, ProxySourceUpdate, ) from proxy_pool.proxy.service import query_proxies router = APIRouter(prefix="/sources", tags=["sources"]) proxy_router = APIRouter(prefix="/proxies", tags=["proxies"]) @router.get("", response_model=list[ProxySourceResponse]) async def list_sources( is_active: bool | None = None, db: AsyncSession = Depends(get_db), ) -> list[ProxySourceResponse]: query = select(ProxySource) if is_active is not None: query = query.where(ProxySource.is_active == is_active) query = query.order_by(ProxySource.created_at.desc()) result = await db.execute(query) sources = result.scalars().all() return [ProxySourceResponse.model_validate(s) for s in sources] @router.post( "", response_model=ProxySourceResponse, status_code=status.HTTP_201_CREATED, ) async def create_source( body: ProxySourceCreate, db: AsyncSession = Depends(get_db), registry: PluginRegistry = Depends(get_registry), ) -> ProxySourceResponse: # Validate parser exists try: registry.get_parser(body.parser_name) except Exception: raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_CONTENT, detail=f"No parser registered with name '{body.parser_name}'", ) from None source = ProxySource( url=body.url, parser_name=body.parser_name, cron_schedule=body.cron_schedule, default_protocol=body.default_protocol, ) db.add(source) await db.commit() await db.refresh(source) return ProxySourceResponse.model_validate(source) @router.get("/{source_id}", response_model=ProxySourceResponse) async def get_source( source_id: UUID, db: AsyncSession = Depends(get_db), ) -> ProxySourceResponse: source = await db.get(ProxySource, source_id) if source is None: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Source not found", ) return ProxySourceResponse.model_validate(source) @router.patch("/{source_id}", response_model=ProxySourceResponse) async def update_source( source_id: UUID, body: ProxySourceUpdate, db: AsyncSession = Depends(get_db), registry: PluginRegistry = Depends(get_registry), ) -> ProxySourceResponse: source = await db.get(ProxySource, source_id) if source is None: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Source not found", ) update_data = body.model_dump(exclude_unset=True) if "parser_name" in update_data: try: registry.get_parser(update_data["parser_name"]) except Exception: raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_CONTENT, detail=f"No parser registered with name '{update_data['parser_name']}'", ) from None for field, value in update_data.items(): setattr(source, field, value) await db.commit() await db.refresh(source) return ProxySourceResponse.model_validate(source) @router.delete("/{source_id}", status_code=status.HTTP_204_NO_CONTENT) async def delete_source( source_id: UUID, db: AsyncSession = Depends(get_db), ) -> None: source = await db.get(ProxySource, source_id) if source is None: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Source not found", ) await db.delete(source) await db.commit() @router.post("/{source_id}/scrape") async def trigger_scrape( source_id: UUID, db: AsyncSession = Depends(get_db), registry: PluginRegistry = Depends(get_registry), ): source = await db.get(ProxySource, source_id) if source is None: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Source not found", ) from None try: parser = registry.get_parser(source.parser_name) except Exception: raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=f"Parser '{source.parser_name}' not registered", ) from None settings = get_settings() try: async with httpx.AsyncClient( timeout=settings.proxy.scrape_timeout_seconds, headers={"User-Agent": settings.proxy.scrape_user_agent}, ) as client: response = await client.get(str(source.url)) response.raise_for_status() except httpx.HTTPError as err: raise HTTPException( status_code=status.HTTP_502_BAD_GATEWAY, detail=f"Failed to fetch source: {err}", ) from None discovered = await parser.parse( raw=response.content, source_url=str(source.url), source_id=source.id, default_protocol=source.default_protocol.value, ) if discovered: from sqlalchemy.dialects.postgresql import insert as pg_insert from proxy_pool.proxy.models import Proxy, ProxyStatus values = [ { "ip": p.ip, "port": p.port, "protocol": p.protocol, "source_id": source.id, "status": ProxyStatus.UNCHECKED, } for p in discovered ] stmt = pg_insert(Proxy).values(values) stmt = stmt.on_conflict_do_update( index_elements=["ip", "port", "protocol"], set_={"source_id": stmt.excluded.source_id}, ) await db.execute(stmt) source.last_scraped_at = datetime.now() await db.commit() return { "source_id": str(source.id), "proxies_discovered": len(discovered), } @proxy_router.get("", response_model=ProxyListResponse) async def list_proxies( params: ProxyListParams = Depends(), db: AsyncSession = Depends(get_db), ) -> ProxyListResponse: proxies, total = await query_proxies( db, status=params.status, protocol=params.protocol, anonymity=params.anonymity, country=params.country, min_score=params.min_score, max_latency_ms=params.max_latency_ms, min_uptime_pct=params.min_uptime_pct, verified_within_minutes=params.verified_within_minutes, sort_by=params.sort_by, sort_order=params.sort_order, limit=params.limit, offset=params.offset, ) return ProxyListResponse( items=[ProxyResponse.model_validate(p) for p in proxies], total_count=total, limit=params.limit, offset=params.offset, ) @proxy_router.get("/{proxy_id}", response_model=ProxyResponse) async def get_proxy( proxy_id: UUID, db: AsyncSession = Depends(get_db), ) -> ProxyResponse: proxy = await db.get(Proxy, proxy_id) if proxy is None: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Proxy not found", ) from None return ProxyResponse.model_validate(proxy)