implement url unshortening for t.co links

This commit is contained in:
agatha 2024-04-13 19:53:50 -04:00
parent 73df28f14b
commit 53bfc1b6e1
2 changed files with 24 additions and 4 deletions

View File

@ -1,2 +1,3 @@
fastapi fastapi
uvicorn[standard] uvicorn[standard]
requests

View File

@ -1,3 +1,5 @@
import re
import requests
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from typing import Optional from typing import Optional
@ -18,6 +20,21 @@ app.add_middleware(
) )
def unshorten_url(url: str):
pattern = re.compile(r"<title>(.*?)<\/title>")
response = requests.get(
url=url,
headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0"}
)
match = pattern.search(response.text)
if match:
return match.group(1)
else:
return None
@app.get('/') @app.get('/')
async def receive_url(url: Optional[str] = None): async def receive_url(url: Optional[str] = None):
if url is None: if url is None:
@ -25,7 +42,9 @@ async def receive_url(url: Optional[str] = None):
domain = urlparse(url).netloc domain = urlparse(url).netloc
if domain in SHORTEN_DOMAINS: if domain not in SHORTEN_DOMAINS:
return {"result": "shortened url"}
else:
return {"error": f"cannot shorten {url}"} return {"error": f"cannot shorten {url}"}
unshortened = unshorten_url(url)
return {"result": unshortened}