71 lines
2.1 KiB
Python
71 lines
2.1 KiB
Python
"""Harvester: Proxy collection tool
|
|
Inspired by https://github.com/acidvegas/proxytools
|
|
"""
|
|
import time
|
|
import concurrent.futures
|
|
import logging
|
|
import os
|
|
from harvester.proxy import fetch_all, validate_socks
|
|
|
|
|
|
def read_file(path):
|
|
with open(path, 'r', encoding='utf-8') as file:
|
|
data = [line.strip() for line in file.readlines()]
|
|
|
|
return data
|
|
|
|
|
|
def write_file(path, data):
|
|
with open(path, 'w', encoding='utf-8') as file:
|
|
file.write(data)
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
logging.basicConfig(level=logging.WARN)
|
|
|
|
# Load proxy source list and fetch proxies
|
|
urls = read_file('data/proxy-sources.txt')
|
|
proxies = fetch_all(urls)
|
|
print(f'Fetched {len(proxies)} proxies!')
|
|
|
|
# Concurrently validate proxies with ThreadPoolExecutor
|
|
valid = []
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor:
|
|
validate_futures = {executor.submit(validate_socks, proxy): proxy for proxy in proxies}
|
|
for future in concurrent.futures.as_completed(validate_futures):
|
|
proxy = validate_futures[future]
|
|
|
|
try:
|
|
response = future.result()
|
|
response.raise_for_status()
|
|
except Exception as exception:
|
|
# TODO: Handle exceptions differently. See https://git.juggalol.com/agatha/harvester/issues/1.
|
|
logging.info(str(exception))
|
|
continue
|
|
|
|
ip = response.text.strip()
|
|
valid.append(proxy)
|
|
print(f'{proxy} -> {ip}')
|
|
|
|
# Create output directory if it does not exist
|
|
if not os.path.exists('proxies'):
|
|
os.makedirs('proxies')
|
|
|
|
# Write to file with timestamp
|
|
write_file(
|
|
path=f'proxies/valid-socks-{time.strftime("%Y%m%d%H%M%S")}.txt',
|
|
data='\n'.join(valid)
|
|
)
|
|
# Write proxychains conf
|
|
proxychains_template = read_file('templates/proxychains.conf')
|
|
proxychains_data = [f'socks5 {proxy.replace(":", " ")}' for proxy in proxies]
|
|
write_file(
|
|
path=f'proxies/proxychains-{time.strftime("%Y%m%d%H%M%S")}.conf',
|
|
data='\n'.join(proxychains_template + proxychains_data)
|
|
)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|