Compare commits
No commits in common. "db" and "master" have entirely different histories.
3
.gitignore
vendored
3
.gitignore
vendored
@ -6,6 +6,3 @@ __pycache__/
|
|||||||
|
|
||||||
# proxies dev results
|
# proxies dev results
|
||||||
proxies/
|
proxies/
|
||||||
|
|
||||||
# sqlite database
|
|
||||||
*.db
|
|
||||||
|
116
alembic.ini
116
alembic.ini
@ -1,116 +0,0 @@
|
|||||||
# A generic, single database configuration.
|
|
||||||
|
|
||||||
[alembic]
|
|
||||||
# path to migration scripts
|
|
||||||
# Use forward slashes (/) also on windows to provide an os agnostic path
|
|
||||||
script_location = migrations
|
|
||||||
|
|
||||||
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
|
||||||
# Uncomment the line below if you want the files to be prepended with date and time
|
|
||||||
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
|
||||||
# for all available tokens
|
|
||||||
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
|
||||||
|
|
||||||
# sys.path path, will be prepended to sys.path if present.
|
|
||||||
# defaults to the current working directory.
|
|
||||||
prepend_sys_path = .
|
|
||||||
|
|
||||||
# timezone to use when rendering the date within the migration file
|
|
||||||
# as well as the filename.
|
|
||||||
# If specified, requires the python>=3.9 or backports.zoneinfo library.
|
|
||||||
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
|
|
||||||
# string value is passed to ZoneInfo()
|
|
||||||
# leave blank for localtime
|
|
||||||
# timezone =
|
|
||||||
|
|
||||||
# max length of characters to apply to the "slug" field
|
|
||||||
# truncate_slug_length = 40
|
|
||||||
|
|
||||||
# set to 'true' to run the environment during
|
|
||||||
# the 'revision' command, regardless of autogenerate
|
|
||||||
# revision_environment = false
|
|
||||||
|
|
||||||
# set to 'true' to allow .pyc and .pyo files without
|
|
||||||
# a source .py file to be detected as revisions in the
|
|
||||||
# versions/ directory
|
|
||||||
# sourceless = false
|
|
||||||
|
|
||||||
# version location specification; This defaults
|
|
||||||
# to migrations/versions. When using multiple version
|
|
||||||
# directories, initial revisions must be specified with --version-path.
|
|
||||||
# The path separator used here should be the separator specified by "version_path_separator" below.
|
|
||||||
# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions
|
|
||||||
|
|
||||||
# version path separator; As mentioned above, this is the character used to split
|
|
||||||
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
|
|
||||||
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
|
|
||||||
# Valid values for version_path_separator are:
|
|
||||||
#
|
|
||||||
# version_path_separator = :
|
|
||||||
# version_path_separator = ;
|
|
||||||
# version_path_separator = space
|
|
||||||
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
|
|
||||||
|
|
||||||
# set to 'true' to search source files recursively
|
|
||||||
# in each "version_locations" directory
|
|
||||||
# new in Alembic version 1.10
|
|
||||||
# recursive_version_locations = false
|
|
||||||
|
|
||||||
# the output encoding used when revision files
|
|
||||||
# are written from script.py.mako
|
|
||||||
# output_encoding = utf-8
|
|
||||||
|
|
||||||
sqlalchemy.url = sqlite:///proxies.db
|
|
||||||
|
|
||||||
|
|
||||||
[post_write_hooks]
|
|
||||||
# post_write_hooks defines scripts or Python functions that are run
|
|
||||||
# on newly generated revision scripts. See the documentation for further
|
|
||||||
# detail and examples
|
|
||||||
|
|
||||||
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
|
||||||
# hooks = black
|
|
||||||
# black.type = console_scripts
|
|
||||||
# black.entrypoint = black
|
|
||||||
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
|
||||||
|
|
||||||
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
|
|
||||||
# hooks = ruff
|
|
||||||
# ruff.type = exec
|
|
||||||
# ruff.executable = %(here)s/.venv/bin/ruff
|
|
||||||
# ruff.options = --fix REVISION_SCRIPT_FILENAME
|
|
||||||
|
|
||||||
# Logging configuration
|
|
||||||
[loggers]
|
|
||||||
keys = root,sqlalchemy,alembic
|
|
||||||
|
|
||||||
[handlers]
|
|
||||||
keys = console
|
|
||||||
|
|
||||||
[formatters]
|
|
||||||
keys = generic
|
|
||||||
|
|
||||||
[logger_root]
|
|
||||||
level = WARN
|
|
||||||
handlers = console
|
|
||||||
qualname =
|
|
||||||
|
|
||||||
[logger_sqlalchemy]
|
|
||||||
level = WARN
|
|
||||||
handlers =
|
|
||||||
qualname = sqlalchemy.engine
|
|
||||||
|
|
||||||
[logger_alembic]
|
|
||||||
level = INFO
|
|
||||||
handlers =
|
|
||||||
qualname = alembic
|
|
||||||
|
|
||||||
[handler_console]
|
|
||||||
class = StreamHandler
|
|
||||||
args = (sys.stderr,)
|
|
||||||
level = NOTSET
|
|
||||||
formatter = generic
|
|
||||||
|
|
||||||
[formatter_generic]
|
|
||||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
|
||||||
datefmt = %H:%M:%S
|
|
@ -1,21 +0,0 @@
|
|||||||
from sqlalchemy import Column, Integer, String, DateTime
|
|
||||||
from .schema import Base
|
|
||||||
|
|
||||||
|
|
||||||
class Proxy(Base):
|
|
||||||
__tablename__ = 'proxies'
|
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True)
|
|
||||||
host = Column(String, nullable=False)
|
|
||||||
port = Column(Integer, nullable=False)
|
|
||||||
asn = Column(String)
|
|
||||||
egress_ip = Column(String)
|
|
||||||
egress_asn = Column(String)
|
|
||||||
date_added = Column(DateTime, nullable=False)
|
|
||||||
date_validated = Column(DateTime)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return (
|
|
||||||
f'Proxy(id={self.id}, host={self.host}, port={self.port}, '
|
|
||||||
f'egress_ip={self.egress_ip}, date_added={self.created_at})'
|
|
||||||
)
|
|
@ -1,10 +0,0 @@
|
|||||||
from sqlalchemy import MetaData, create_engine
|
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
|
||||||
|
|
||||||
metadata = MetaData()
|
|
||||||
Base = declarative_base(metadata=metadata)
|
|
||||||
|
|
||||||
|
|
||||||
def init_db(engine_url):
|
|
||||||
engine = create_engine(engine_url)
|
|
||||||
Base.metadata.create_all(engine)
|
|
@ -1,26 +0,0 @@
|
|||||||
from sqlalchemy import create_engine
|
|
||||||
from sqlalchemy.orm import sessionmaker
|
|
||||||
|
|
||||||
|
|
||||||
class SessionFactory:
|
|
||||||
def __init__(self, engine_url):
|
|
||||||
self.engine = create_engine(engine_url)
|
|
||||||
self.Session = sessionmaker(bind=self.engine)
|
|
||||||
|
|
||||||
def create_session(self):
|
|
||||||
return self.Session()
|
|
||||||
|
|
||||||
|
|
||||||
# Create a singleton instance of the SessionFactory
|
|
||||||
session_factory = None
|
|
||||||
|
|
||||||
|
|
||||||
def get_session_factory(engine_url):
|
|
||||||
global session_factory
|
|
||||||
if session_factory is None:
|
|
||||||
session_factory = SessionFactory(engine_url)
|
|
||||||
return session_factory
|
|
||||||
|
|
||||||
|
|
||||||
def get_session(engine_url):
|
|
||||||
return get_session_factory(engine_url).create_session()
|
|
42
main.py
42
main.py
@ -5,17 +5,8 @@ import time
|
|||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from datetime import datetime
|
|
||||||
from harvester.db.models import Proxy
|
|
||||||
from harvester.db.schema import init_db
|
|
||||||
from harvester.db.session import SessionFactory
|
|
||||||
from harvester.proxy import fetch_all, validate_socks
|
from harvester.proxy import fetch_all, validate_socks
|
||||||
|
|
||||||
DATABASE_URL = os.environ.get('DATABASE_URL', 'sqlite:///proxies.db')
|
|
||||||
|
|
||||||
init_db(DATABASE_URL)
|
|
||||||
session_factory = SessionFactory(DATABASE_URL)
|
|
||||||
|
|
||||||
|
|
||||||
def read_file(path):
|
def read_file(path):
|
||||||
with open(path, 'r', encoding='utf-8') as file:
|
with open(path, 'r', encoding='utf-8') as file:
|
||||||
@ -29,11 +20,10 @@ def write_file(path, data):
|
|||||||
file.write(data)
|
file.write(data)
|
||||||
|
|
||||||
|
|
||||||
def validate_proxies():
|
def main():
|
||||||
pass
|
"""Main entry point."""
|
||||||
|
logging.basicConfig(level=logging.WARN)
|
||||||
|
|
||||||
|
|
||||||
def gather_proxies():
|
|
||||||
# Load proxy source list and fetch proxies
|
# Load proxy source list and fetch proxies
|
||||||
urls = read_file('data/proxy-sources.txt')
|
urls = read_file('data/proxy-sources.txt')
|
||||||
proxies = fetch_all(urls)
|
proxies = fetch_all(urls)
|
||||||
@ -58,26 +48,6 @@ def gather_proxies():
|
|||||||
valid.append(proxy)
|
valid.append(proxy)
|
||||||
print(f'{proxy} -> {ip}')
|
print(f'{proxy} -> {ip}')
|
||||||
|
|
||||||
# Save to DB
|
|
||||||
session = session_factory.create_session()
|
|
||||||
existing_proxy = session.query(Proxy).filter_by(host=proxy.split(':')[0],
|
|
||||||
port=int(proxy.split(':')[1])).first()
|
|
||||||
|
|
||||||
if existing_proxy:
|
|
||||||
existing_proxy.date_validated = datetime.now()
|
|
||||||
existing_proxy.egress_ip = ip
|
|
||||||
else:
|
|
||||||
new_proxy = Proxy(
|
|
||||||
host=proxy.split(':')[0],
|
|
||||||
port=int(proxy.split(':')[1]),
|
|
||||||
egress_ip=ip,
|
|
||||||
date_added=datetime.now(),
|
|
||||||
date_validated=datetime.now(),
|
|
||||||
)
|
|
||||||
session.add(new_proxy)
|
|
||||||
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
# Create output directory if it does not exist
|
# Create output directory if it does not exist
|
||||||
if not os.path.exists('proxies'):
|
if not os.path.exists('proxies'):
|
||||||
os.makedirs('proxies')
|
os.makedirs('proxies')
|
||||||
@ -96,11 +66,5 @@ def gather_proxies():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main entry point."""
|
|
||||||
logging.basicConfig(level=logging.WARN)
|
|
||||||
gather_proxies()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
@ -1 +0,0 @@
|
|||||||
Generic single-database configuration.
|
|
@ -1,80 +0,0 @@
|
|||||||
from logging.config import fileConfig
|
|
||||||
|
|
||||||
from sqlalchemy import engine_from_config
|
|
||||||
from sqlalchemy import pool
|
|
||||||
|
|
||||||
from alembic import context
|
|
||||||
|
|
||||||
from harvester.db.models import Base
|
|
||||||
|
|
||||||
# this is the Alembic Config object, which provides
|
|
||||||
# access to the values within the .ini file in use.
|
|
||||||
config = context.config
|
|
||||||
|
|
||||||
# Interpret the config file for Python logging.
|
|
||||||
# This line sets up loggers basically.
|
|
||||||
if config.config_file_name is not None:
|
|
||||||
fileConfig(config.config_file_name)
|
|
||||||
|
|
||||||
# add your model's MetaData object here
|
|
||||||
# for 'autogenerate' support
|
|
||||||
# from myapp import mymodel
|
|
||||||
# target_metadata = mymodel.Base.metadata
|
|
||||||
target_metadata = Base.metadata
|
|
||||||
|
|
||||||
# other values from the config, defined by the needs of env.py,
|
|
||||||
# can be acquired:
|
|
||||||
# my_important_option = config.get_main_option("my_important_option")
|
|
||||||
# ... etc.
|
|
||||||
|
|
||||||
|
|
||||||
def run_migrations_offline() -> None:
|
|
||||||
"""Run migrations in 'offline' mode.
|
|
||||||
|
|
||||||
This configures the context with just a URL
|
|
||||||
and not an Engine, though an Engine is acceptable
|
|
||||||
here as well. By skipping the Engine creation
|
|
||||||
we don't even need a DBAPI to be available.
|
|
||||||
|
|
||||||
Calls to context.execute() here emit the given string to the
|
|
||||||
script output.
|
|
||||||
|
|
||||||
"""
|
|
||||||
url = config.get_main_option("sqlalchemy.url")
|
|
||||||
context.configure(
|
|
||||||
url=url,
|
|
||||||
target_metadata=target_metadata,
|
|
||||||
literal_binds=True,
|
|
||||||
dialect_opts={"paramstyle": "named"},
|
|
||||||
)
|
|
||||||
|
|
||||||
with context.begin_transaction():
|
|
||||||
context.run_migrations()
|
|
||||||
|
|
||||||
|
|
||||||
def run_migrations_online() -> None:
|
|
||||||
"""Run migrations in 'online' mode.
|
|
||||||
|
|
||||||
In this scenario we need to create an Engine
|
|
||||||
and associate a connection with the context.
|
|
||||||
|
|
||||||
"""
|
|
||||||
connectable = engine_from_config(
|
|
||||||
config.get_section(config.config_ini_section, {}),
|
|
||||||
prefix="sqlalchemy.",
|
|
||||||
poolclass=pool.NullPool,
|
|
||||||
)
|
|
||||||
|
|
||||||
with connectable.connect() as connection:
|
|
||||||
context.configure(
|
|
||||||
connection=connection, target_metadata=target_metadata
|
|
||||||
)
|
|
||||||
|
|
||||||
with context.begin_transaction():
|
|
||||||
context.run_migrations()
|
|
||||||
|
|
||||||
|
|
||||||
if context.is_offline_mode():
|
|
||||||
run_migrations_offline()
|
|
||||||
else:
|
|
||||||
run_migrations_online()
|
|
@ -1,26 +0,0 @@
|
|||||||
"""${message}
|
|
||||||
|
|
||||||
Revision ID: ${up_revision}
|
|
||||||
Revises: ${down_revision | comma,n}
|
|
||||||
Create Date: ${create_date}
|
|
||||||
|
|
||||||
"""
|
|
||||||
from typing import Sequence, Union
|
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
|
||||||
${imports if imports else ""}
|
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
|
||||||
revision: str = ${repr(up_revision)}
|
|
||||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
|
||||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
|
||||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
|
||||||
|
|
||||||
|
|
||||||
def upgrade() -> None:
|
|
||||||
${upgrades if upgrades else "pass"}
|
|
||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
|
||||||
${downgrades if downgrades else "pass"}
|
|
@ -1,30 +0,0 @@
|
|||||||
"""Initial migration
|
|
||||||
|
|
||||||
Revision ID: 2ef68467c443
|
|
||||||
Revises:
|
|
||||||
Create Date: 2024-09-22 15:06:36.590091
|
|
||||||
|
|
||||||
"""
|
|
||||||
from typing import Sequence, Union
|
|
||||||
|
|
||||||
from alembic import op
|
|
||||||
import sqlalchemy as sa
|
|
||||||
|
|
||||||
|
|
||||||
# revision identifiers, used by Alembic.
|
|
||||||
revision: str = '2ef68467c443'
|
|
||||||
down_revision: Union[str, None] = None
|
|
||||||
branch_labels: Union[str, Sequence[str], None] = None
|
|
||||||
depends_on: Union[str, Sequence[str], None] = None
|
|
||||||
|
|
||||||
|
|
||||||
def upgrade() -> None:
|
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
|
||||||
pass
|
|
||||||
# ### end Alembic commands ###
|
|
||||||
|
|
||||||
|
|
||||||
def downgrade() -> None:
|
|
||||||
# ### commands auto generated by Alembic - please adjust! ###
|
|
||||||
pass
|
|
||||||
# ### end Alembic commands ###
|
|
@ -1,4 +1,2 @@
|
|||||||
requests
|
requests
|
||||||
requests[socks]
|
requests[socks]
|
||||||
sqlalchemy
|
|
||||||
alembic
|
|
Loading…
Reference in New Issue
Block a user