254 lines
7.4 KiB
Python
254 lines
7.4 KiB
Python
from datetime import datetime
|
|
from uuid import UUID
|
|
|
|
import httpx
|
|
from fastapi import APIRouter, Depends, HTTPException, status
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from proxy_pool.common.dependencies import get_db, get_registry
|
|
from proxy_pool.config import get_settings
|
|
from proxy_pool.plugins.registry import PluginRegistry
|
|
from proxy_pool.proxy.models import Proxy, ProxySource
|
|
from proxy_pool.proxy.schemas import (
|
|
ProxyListParams,
|
|
ProxyListResponse,
|
|
ProxyResponse,
|
|
ProxySourceCreate,
|
|
ProxySourceResponse,
|
|
ProxySourceUpdate,
|
|
)
|
|
from proxy_pool.proxy.service import query_proxies
|
|
|
|
router = APIRouter(prefix="/sources", tags=["sources"])
|
|
proxy_router = APIRouter(prefix="/proxies", tags=["proxies"])
|
|
|
|
|
|
@router.get("", response_model=list[ProxySourceResponse])
|
|
async def list_sources(
|
|
is_active: bool | None = None,
|
|
db: AsyncSession = Depends(get_db),
|
|
) -> list[ProxySourceResponse]:
|
|
query = select(ProxySource)
|
|
if is_active is not None:
|
|
query = query.where(ProxySource.is_active == is_active)
|
|
query = query.order_by(ProxySource.created_at.desc())
|
|
|
|
result = await db.execute(query)
|
|
sources = result.scalars().all()
|
|
return [ProxySourceResponse.model_validate(s) for s in sources]
|
|
|
|
|
|
@router.post(
|
|
"",
|
|
response_model=ProxySourceResponse,
|
|
status_code=status.HTTP_201_CREATED,
|
|
)
|
|
async def create_source(
|
|
body: ProxySourceCreate,
|
|
db: AsyncSession = Depends(get_db),
|
|
registry: PluginRegistry = Depends(get_registry),
|
|
) -> ProxySourceResponse:
|
|
# Validate parser exists
|
|
try:
|
|
registry.get_parser(body.parser_name)
|
|
except Exception:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
|
|
detail=f"No parser registered with name '{body.parser_name}'",
|
|
) from None
|
|
|
|
source = ProxySource(
|
|
url=body.url,
|
|
parser_name=body.parser_name,
|
|
cron_schedule=body.cron_schedule,
|
|
default_protocol=body.default_protocol,
|
|
)
|
|
db.add(source)
|
|
await db.commit()
|
|
await db.refresh(source)
|
|
|
|
return ProxySourceResponse.model_validate(source)
|
|
|
|
|
|
@router.get("/{source_id}", response_model=ProxySourceResponse)
|
|
async def get_source(
|
|
source_id: UUID,
|
|
db: AsyncSession = Depends(get_db),
|
|
) -> ProxySourceResponse:
|
|
source = await db.get(ProxySource, source_id)
|
|
if source is None:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Source not found",
|
|
)
|
|
return ProxySourceResponse.model_validate(source)
|
|
|
|
|
|
@router.patch("/{source_id}", response_model=ProxySourceResponse)
|
|
async def update_source(
|
|
source_id: UUID,
|
|
body: ProxySourceUpdate,
|
|
db: AsyncSession = Depends(get_db),
|
|
registry: PluginRegistry = Depends(get_registry),
|
|
) -> ProxySourceResponse:
|
|
source = await db.get(ProxySource, source_id)
|
|
if source is None:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Source not found",
|
|
)
|
|
|
|
update_data = body.model_dump(exclude_unset=True)
|
|
|
|
if "parser_name" in update_data:
|
|
try:
|
|
registry.get_parser(update_data["parser_name"])
|
|
except Exception:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
|
|
detail=f"No parser registered with name '{update_data['parser_name']}'",
|
|
) from None
|
|
|
|
for field, value in update_data.items():
|
|
setattr(source, field, value)
|
|
|
|
await db.commit()
|
|
await db.refresh(source)
|
|
|
|
return ProxySourceResponse.model_validate(source)
|
|
|
|
|
|
@router.delete("/{source_id}", status_code=status.HTTP_204_NO_CONTENT)
|
|
async def delete_source(
|
|
source_id: UUID,
|
|
db: AsyncSession = Depends(get_db),
|
|
) -> None:
|
|
source = await db.get(ProxySource, source_id)
|
|
if source is None:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Source not found",
|
|
)
|
|
await db.delete(source)
|
|
await db.commit()
|
|
|
|
|
|
@router.post("/{source_id}/scrape")
|
|
async def trigger_scrape(
|
|
source_id: UUID,
|
|
db: AsyncSession = Depends(get_db),
|
|
registry: PluginRegistry = Depends(get_registry),
|
|
):
|
|
source = await db.get(ProxySource, source_id)
|
|
if source is None:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Source not found",
|
|
) from None
|
|
|
|
try:
|
|
parser = registry.get_parser(source.parser_name)
|
|
except Exception:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
detail=f"Parser '{source.parser_name}' not registered",
|
|
) from None
|
|
|
|
settings = get_settings()
|
|
|
|
try:
|
|
async with httpx.AsyncClient(
|
|
timeout=settings.proxy.scrape_timeout_seconds,
|
|
headers={"User-Agent": settings.proxy.scrape_user_agent},
|
|
) as client:
|
|
response = await client.get(str(source.url))
|
|
response.raise_for_status()
|
|
except httpx.HTTPError as err:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_502_BAD_GATEWAY,
|
|
detail=f"Failed to fetch source: {err}",
|
|
) from None
|
|
|
|
discovered = await parser.parse(
|
|
raw=response.content,
|
|
source_url=str(source.url),
|
|
source_id=source.id,
|
|
default_protocol=source.default_protocol.value,
|
|
)
|
|
|
|
if discovered:
|
|
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
|
|
|
from proxy_pool.proxy.models import Proxy, ProxyStatus
|
|
|
|
values = [
|
|
{
|
|
"ip": p.ip,
|
|
"port": p.port,
|
|
"protocol": p.protocol,
|
|
"source_id": source.id,
|
|
"status": ProxyStatus.UNCHECKED,
|
|
}
|
|
for p in discovered
|
|
]
|
|
|
|
stmt = pg_insert(Proxy).values(values)
|
|
stmt = stmt.on_conflict_do_update(
|
|
index_elements=["ip", "port", "protocol"],
|
|
set_={"source_id": stmt.excluded.source_id},
|
|
)
|
|
await db.execute(stmt)
|
|
|
|
source.last_scraped_at = datetime.now()
|
|
await db.commit()
|
|
|
|
return {
|
|
"source_id": str(source.id),
|
|
"proxies_discovered": len(discovered),
|
|
}
|
|
|
|
|
|
@proxy_router.get("", response_model=ProxyListResponse)
|
|
async def list_proxies(
|
|
params: ProxyListParams = Depends(),
|
|
db: AsyncSession = Depends(get_db),
|
|
) -> ProxyListResponse:
|
|
proxies, total = await query_proxies(
|
|
db,
|
|
status=params.status,
|
|
protocol=params.protocol,
|
|
anonymity=params.anonymity,
|
|
country=params.country,
|
|
min_score=params.min_score,
|
|
max_latency_ms=params.max_latency_ms,
|
|
min_uptime_pct=params.min_uptime_pct,
|
|
verified_within_minutes=params.verified_within_minutes,
|
|
sort_by=params.sort_by,
|
|
sort_order=params.sort_order,
|
|
limit=params.limit,
|
|
offset=params.offset,
|
|
)
|
|
|
|
return ProxyListResponse(
|
|
items=[ProxyResponse.model_validate(p) for p in proxies],
|
|
total_count=total,
|
|
limit=params.limit,
|
|
offset=params.offset,
|
|
)
|
|
|
|
|
|
@proxy_router.get("/{proxy_id}", response_model=ProxyResponse)
|
|
async def get_proxy(
|
|
proxy_id: UUID,
|
|
db: AsyncSession = Depends(get_db),
|
|
) -> ProxyResponse:
|
|
proxy = await db.get(Proxy, proxy_id)
|
|
if proxy is None:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Proxy not found",
|
|
) from None
|
|
|
|
return ProxyResponse.model_validate(proxy)
|