Compare commits

...

4 Commits

Author SHA1 Message Date
5ac63e1fb5 feat: write proxies to database 2024-09-22 14:15:01 -04:00
d634e51cdf fix __repr__ 2024-09-22 14:13:06 -04:00
e124aea332 update .gitignore to ignore sqlite db 2024-09-22 14:11:50 -04:00
336cd61a8c add init_db 2024-09-22 14:11:17 -04:00
5 changed files with 37 additions and 9 deletions

3
.gitignore vendored
View File

@ -6,3 +6,6 @@ __pycache__/
# proxies dev results # proxies dev results
proxies/ proxies/
# sqlite database
*.db

View File

@ -1,9 +1,5 @@
# harvester/db/models.py
from sqlalchemy import Column, Integer, String, DateTime from sqlalchemy import Column, Integer, String, DateTime
from sqlalchemy.ext.declarative import declarative_base from .schema import Base
Base = declarative_base()
class Proxy(Base): class Proxy(Base):
@ -21,5 +17,5 @@ class Proxy(Base):
def __repr__(self): def __repr__(self):
return ( return (
f'Proxy(id={self.id}, host={self.host}, port={self.port}, ' f'Proxy(id={self.id}, host={self.host}, port={self.port}, '
f'egress={self.egress}, created_at={self.created_at})' f'egress_ip={self.egress_ip}, date_added={self.created_at})'
) )

10
harvester/db/schema.py Normal file
View File

@ -0,0 +1,10 @@
from sqlalchemy import MetaData, create_engine
from sqlalchemy.ext.declarative import declarative_base
metadata = MetaData()
Base = declarative_base(metadata=metadata)
def init_db(engine_url):
engine = create_engine(engine_url)
Base.metadata.create_all(engine)

View File

@ -1,5 +1,3 @@
# harvester/db/session.py
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
@ -25,4 +23,4 @@ def get_session_factory(engine_url):
def get_session(engine_url): def get_session(engine_url):
return get_session_factory(engine_url).create_session() return get_session_factory(engine_url).create_session()

21
main.py
View File

@ -5,8 +5,17 @@ import time
import concurrent.futures import concurrent.futures
import logging import logging
import os import os
from datetime import datetime
from harvester.db.models import Proxy
from harvester.db.schema import init_db
from harvester.db.session import SessionFactory
from harvester.proxy import fetch_all, validate_socks from harvester.proxy import fetch_all, validate_socks
DATABASE_URL = os.environ.get('DATABASE_URL', 'sqlite:///proxies.db')
init_db(DATABASE_URL)
session_factory = SessionFactory(DATABASE_URL)
def read_file(path): def read_file(path):
with open(path, 'r', encoding='utf-8') as file: with open(path, 'r', encoding='utf-8') as file:
@ -48,6 +57,18 @@ def main():
valid.append(proxy) valid.append(proxy)
print(f'{proxy} -> {ip}') print(f'{proxy} -> {ip}')
# Save to DB
session = session_factory.create_session()
proxy = Proxy(
host=proxy.split(':')[0],
port=int(proxy.split(':')[1]),
egress_ip=ip,
date_added=datetime.now(),
date_validated=datetime.now(),
)
session.add(proxy)
session.commit()
# Create output directory if it does not exist # Create output directory if it does not exist
if not os.path.exists('proxies'): if not os.path.exists('proxies'):
os.makedirs('proxies') os.makedirs('proxies')