254 lines
7.4 KiB
Python

from datetime import datetime
from uuid import UUID
import httpx
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from proxy_pool.common.dependencies import get_db, get_registry
from proxy_pool.config import get_settings
from proxy_pool.plugins.registry import PluginRegistry
from proxy_pool.proxy.models import Proxy, ProxySource
from proxy_pool.proxy.schemas import (
ProxyListParams,
ProxyListResponse,
ProxyResponse,
ProxySourceCreate,
ProxySourceResponse,
ProxySourceUpdate,
)
from proxy_pool.proxy.service import query_proxies
router = APIRouter(prefix="/sources", tags=["sources"])
proxy_router = APIRouter(prefix="/proxies", tags=["proxies"])
@router.get("", response_model=list[ProxySourceResponse])
async def list_sources(
is_active: bool | None = None,
db: AsyncSession = Depends(get_db),
) -> list[ProxySourceResponse]:
query = select(ProxySource)
if is_active is not None:
query = query.where(ProxySource.is_active == is_active)
query = query.order_by(ProxySource.created_at.desc())
result = await db.execute(query)
sources = result.scalars().all()
return [ProxySourceResponse.model_validate(s) for s in sources]
@router.post(
"",
response_model=ProxySourceResponse,
status_code=status.HTTP_201_CREATED,
)
async def create_source(
body: ProxySourceCreate,
db: AsyncSession = Depends(get_db),
registry: PluginRegistry = Depends(get_registry),
) -> ProxySourceResponse:
# Validate parser exists
try:
registry.get_parser(body.parser_name)
except Exception:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
detail=f"No parser registered with name '{body.parser_name}'",
) from None
source = ProxySource(
url=body.url,
parser_name=body.parser_name,
cron_schedule=body.cron_schedule,
default_protocol=body.default_protocol,
)
db.add(source)
await db.commit()
await db.refresh(source)
return ProxySourceResponse.model_validate(source)
@router.get("/{source_id}", response_model=ProxySourceResponse)
async def get_source(
source_id: UUID,
db: AsyncSession = Depends(get_db),
) -> ProxySourceResponse:
source = await db.get(ProxySource, source_id)
if source is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Source not found",
)
return ProxySourceResponse.model_validate(source)
@router.patch("/{source_id}", response_model=ProxySourceResponse)
async def update_source(
source_id: UUID,
body: ProxySourceUpdate,
db: AsyncSession = Depends(get_db),
registry: PluginRegistry = Depends(get_registry),
) -> ProxySourceResponse:
source = await db.get(ProxySource, source_id)
if source is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Source not found",
)
update_data = body.model_dump(exclude_unset=True)
if "parser_name" in update_data:
try:
registry.get_parser(update_data["parser_name"])
except Exception:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
detail=f"No parser registered with name '{update_data['parser_name']}'",
) from None
for field, value in update_data.items():
setattr(source, field, value)
await db.commit()
await db.refresh(source)
return ProxySourceResponse.model_validate(source)
@router.delete("/{source_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_source(
source_id: UUID,
db: AsyncSession = Depends(get_db),
) -> None:
source = await db.get(ProxySource, source_id)
if source is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Source not found",
)
await db.delete(source)
await db.commit()
@router.post("/{source_id}/scrape")
async def trigger_scrape(
source_id: UUID,
db: AsyncSession = Depends(get_db),
registry: PluginRegistry = Depends(get_registry),
):
source = await db.get(ProxySource, source_id)
if source is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Source not found",
) from None
try:
parser = registry.get_parser(source.parser_name)
except Exception:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Parser '{source.parser_name}' not registered",
) from None
settings = get_settings()
try:
async with httpx.AsyncClient(
timeout=settings.proxy.scrape_timeout_seconds,
headers={"User-Agent": settings.proxy.scrape_user_agent},
) as client:
response = await client.get(str(source.url))
response.raise_for_status()
except httpx.HTTPError as err:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail=f"Failed to fetch source: {err}",
) from None
discovered = await parser.parse(
raw=response.content,
source_url=str(source.url),
source_id=source.id,
default_protocol=source.default_protocol.value,
)
if discovered:
from sqlalchemy.dialects.postgresql import insert as pg_insert
from proxy_pool.proxy.models import Proxy, ProxyStatus
values = [
{
"ip": p.ip,
"port": p.port,
"protocol": p.protocol,
"source_id": source.id,
"status": ProxyStatus.UNCHECKED,
}
for p in discovered
]
stmt = pg_insert(Proxy).values(values)
stmt = stmt.on_conflict_do_update(
index_elements=["ip", "port", "protocol"],
set_={"source_id": stmt.excluded.source_id},
)
await db.execute(stmt)
source.last_scraped_at = datetime.now()
await db.commit()
return {
"source_id": str(source.id),
"proxies_discovered": len(discovered),
}
@proxy_router.get("", response_model=ProxyListResponse)
async def list_proxies(
params: ProxyListParams = Depends(),
db: AsyncSession = Depends(get_db),
) -> ProxyListResponse:
proxies, total = await query_proxies(
db,
status=params.status,
protocol=params.protocol,
anonymity=params.anonymity,
country=params.country,
min_score=params.min_score,
max_latency_ms=params.max_latency_ms,
min_uptime_pct=params.min_uptime_pct,
verified_within_minutes=params.verified_within_minutes,
sort_by=params.sort_by,
sort_order=params.sort_order,
limit=params.limit,
offset=params.offset,
)
return ProxyListResponse(
items=[ProxyResponse.model_validate(p) for p in proxies],
total_count=total,
limit=params.limit,
offset=params.offset,
)
@proxy_router.get("/{proxy_id}", response_model=ProxyResponse)
async def get_proxy(
proxy_id: UUID,
db: AsyncSession = Depends(get_db),
) -> ProxyResponse:
proxy = await db.get(Proxy, proxy_id)
if proxy is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Proxy not found",
) from None
return ProxyResponse.model_validate(proxy)