From 53bfc1b6e11ea4f7e85eda291108e14384205d95 Mon Sep 17 00:00:00 2001 From: agatha Date: Sat, 13 Apr 2024 19:53:50 -0400 Subject: [PATCH] implement url unshortening for t.co links --- server/requirements.txt | 3 ++- server/src/main.py | 25 ++++++++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/server/requirements.txt b/server/requirements.txt index 8e0578a..4bf6b8c 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -1,2 +1,3 @@ fastapi -uvicorn[standard] \ No newline at end of file +uvicorn[standard] +requests \ No newline at end of file diff --git a/server/src/main.py b/server/src/main.py index 62a3674..91792ba 100644 --- a/server/src/main.py +++ b/server/src/main.py @@ -1,3 +1,5 @@ +import re +import requests from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from typing import Optional @@ -18,6 +20,21 @@ app.add_middleware( ) +def unshorten_url(url: str): + pattern = re.compile(r"(.*?)<\/title>") + + response = requests.get( + url=url, + headers={"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0"} + ) + + match = pattern.search(response.text) + if match: + return match.group(1) + else: + return None + + @app.get('/') async def receive_url(url: Optional[str] = None): if url is None: @@ -25,7 +42,9 @@ async def receive_url(url: Optional[str] = None): domain = urlparse(url).netloc - if domain in SHORTEN_DOMAINS: - return {"result": "shortened url"} - else: + if domain not in SHORTEN_DOMAINS: return {"error": f"cannot shorten {url}"} + + unshortened = unshorten_url(url) + + return {"result": unshortened}