From 1c47b53b4c02e0db523fd9315132dfa7722a06c0 Mon Sep 17 00:00:00 2001 From: agatha Date: Tue, 7 Nov 2023 18:53:07 -0500 Subject: [PATCH] Implement simple validation --- harvester/proxy.py | 23 +++++++++-- main.py | 101 +++++++++++++++++---------------------------- requirements.txt | 3 +- 3 files changed, 60 insertions(+), 67 deletions(-) diff --git a/harvester/proxy.py b/harvester/proxy.py index d7b5c81..f0eabfb 100644 --- a/harvester/proxy.py +++ b/harvester/proxy.py @@ -23,7 +23,7 @@ def fetch_list(url): with WARNING and an empty list will be returned. """ try: - response = requests.get(url) + response = requests.get(url, timeout=5) response.raise_for_status() except requests.RequestException: logging.warning(f'Error fetching proxies from {url}') @@ -63,6 +63,21 @@ def fetch_all(urls, max_workers=8): return proxies -def validate(proxy, type='http'): - # Check regex - return None +def validate_socks(proxy): + """Validate a SOCKS proxy. + + Args: + proxy (str): Proxy connection string. [username:password]@server:port. + + Returns: + requests.Response: Response object. + + Raises: + requests.Exception and subclasses. + """ + response = requests.get( + 'https://icanhazip.com', + proxies={'http': f'socks5://{proxy}', 'https': f'socks5://{proxy}'}, + timeout=10 + ) + return response diff --git a/main.py b/main.py index 8482e85..2b15b7a 100644 --- a/main.py +++ b/main.py @@ -1,74 +1,51 @@ """Harvester: Proxy collection tool Inspired by https://github.com/acidvegas/proxytools """ +import concurrent.futures import logging -from harvester.proxy import fetch_all +from harvester.proxy import fetch_all, validate_socks -URLS = [ - 'https://api.openproxylist.xyz/socks4.txt', - 'https://api.openproxylist.xyz/socks5.txt', - 'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4', - 'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4', - 'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks5', - 'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5', - 'https://proxy-list.download/api/v1/get?type=socks4', - 'https://proxy-list.download/api/v1/get?type=socks5', - 'https://proxyscan.io/download?type=socks4', - 'https://proxyscan.io/download?type=socks5', - 'https://proxyspace.pro/socks4.txt', - 'https://proxyspace.pro/socks5.txt', - 'https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks4.txt', - 'https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks5.txt', - 'https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS4.txt', - 'https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS5.txt', - 'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt', - 'https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks4.txt', - 'https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks5.txt', - 'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks4.txt', - 'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt', - 'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS4.txt', - 'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt', - 'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt', - 'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt', - 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt', - 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt', - 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks4.txt', - 'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks5.txt', - 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt', - 'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt', - 'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks4.txt', - 'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt', - 'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt', - 'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt', - 'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks4.txt', - 'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks5.txt', - 'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks4.txt', - 'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks5.txt', - 'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt', - 'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt', - 'https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks4.txt', - 'https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks5.txt', - 'https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks4.txt', - 'https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks5.txt', - 'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt', - 'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt', - 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt', - 'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt', - 'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks4.txt', - 'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks5.txt', - 'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks4.txt', - 'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks5.txt', - 'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks4.txt', - 'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks5.txt', - 'https://spys.me/socks.txt', - 'https://spys.one/en/socks-proxy-list/' -] +def load_urls(path): + with open(path, 'r', encoding='utf-8') as file: + urls = [line.strip() for line in file.readlines()] + + return urls + def main(): """Main entry point.""" - logging.basicConfig(level=logging.INFO) - proxies = fetch_all(URLS) + logging.basicConfig(level=logging.WARN) + + # Load proxy source list and fetch proxies + urls = load_urls('data/proxy-sources.txt') + proxies = fetch_all(urls) + print(f'Fetched {len(proxies)} proxies!') + + # Concurrently validate proxies with ThreadPoolExecutor + valid = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor: + validate_futures = {executor.submit(validate_socks, proxy): proxy for proxy in proxies} + for future in concurrent.futures.as_completed(validate_futures): + proxy = validate_futures[future] + + try: + response = future.result() + response.raise_for_status() + except Exception as exception: + # TODO: Handle exceptions differently. See issues. + logging.info(str(exception)) + continue + + ip = response.text.strip() + valid.append(proxy) + print(f'{proxy} -> {ip}') + + with open('data/valid-socks.txt', 'w', encoding='utf-8') as file: + file.write('\n'.join(valid)) + + for proxy in valid: + print(proxy) if __name__ == '__main__': diff --git a/requirements.txt b/requirements.txt index 663bd1f..10e8710 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -requests \ No newline at end of file +requests +requests[socks] \ No newline at end of file