Implement simple validation

This commit is contained in:
agatha 2023-11-07 18:53:07 -05:00
parent 9048aaac19
commit 1c47b53b4c
3 changed files with 60 additions and 67 deletions

View File

@ -23,7 +23,7 @@ def fetch_list(url):
with WARNING and an empty list will be returned. with WARNING and an empty list will be returned.
""" """
try: try:
response = requests.get(url) response = requests.get(url, timeout=5)
response.raise_for_status() response.raise_for_status()
except requests.RequestException: except requests.RequestException:
logging.warning(f'Error fetching proxies from {url}') logging.warning(f'Error fetching proxies from {url}')
@ -63,6 +63,21 @@ def fetch_all(urls, max_workers=8):
return proxies return proxies
def validate(proxy, type='http'): def validate_socks(proxy):
# Check regex """Validate a SOCKS proxy.
return None
Args:
proxy (str): Proxy connection string. [username:password]@server:port.
Returns:
requests.Response: Response object.
Raises:
requests.Exception and subclasses.
"""
response = requests.get(
'https://icanhazip.com',
proxies={'http': f'socks5://{proxy}', 'https': f'socks5://{proxy}'},
timeout=10
)
return response

101
main.py
View File

@ -1,74 +1,51 @@
"""Harvester: Proxy collection tool """Harvester: Proxy collection tool
Inspired by https://github.com/acidvegas/proxytools Inspired by https://github.com/acidvegas/proxytools
""" """
import concurrent.futures
import logging import logging
from harvester.proxy import fetch_all from harvester.proxy import fetch_all, validate_socks
URLS = [ def load_urls(path):
'https://api.openproxylist.xyz/socks4.txt', with open(path, 'r', encoding='utf-8') as file:
'https://api.openproxylist.xyz/socks5.txt', urls = [line.strip() for line in file.readlines()]
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4',
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4', return urls
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks5',
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5',
'https://proxy-list.download/api/v1/get?type=socks4',
'https://proxy-list.download/api/v1/get?type=socks5',
'https://proxyscan.io/download?type=socks4',
'https://proxyscan.io/download?type=socks5',
'https://proxyspace.pro/socks4.txt',
'https://proxyspace.pro/socks5.txt',
'https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks4.txt',
'https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks5.txt',
'https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS4.txt',
'https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS5.txt',
'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',
'https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks4.txt',
'https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks5.txt',
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks4.txt',
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt',
'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS4.txt',
'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt',
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt',
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt',
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks4.txt',
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks5.txt',
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt',
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt',
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks4.txt',
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt',
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt',
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt',
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks4.txt',
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks5.txt',
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks4.txt',
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks5.txt',
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt',
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt',
'https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks4.txt',
'https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks5.txt',
'https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks4.txt',
'https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks5.txt',
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt',
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt',
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',
'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks4.txt',
'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks5.txt',
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks4.txt',
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks5.txt',
'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks4.txt',
'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks5.txt',
'https://spys.me/socks.txt',
'https://spys.one/en/socks-proxy-list/'
]
def main(): def main():
"""Main entry point.""" """Main entry point."""
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.WARN)
proxies = fetch_all(URLS)
# Load proxy source list and fetch proxies
urls = load_urls('data/proxy-sources.txt')
proxies = fetch_all(urls)
print(f'Fetched {len(proxies)} proxies!')
# Concurrently validate proxies with ThreadPoolExecutor
valid = []
with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor:
validate_futures = {executor.submit(validate_socks, proxy): proxy for proxy in proxies}
for future in concurrent.futures.as_completed(validate_futures):
proxy = validate_futures[future]
try:
response = future.result()
response.raise_for_status()
except Exception as exception:
# TODO: Handle exceptions differently. See issues.
logging.info(str(exception))
continue
ip = response.text.strip()
valid.append(proxy)
print(f'{proxy} -> {ip}')
with open('data/valid-socks.txt', 'w', encoding='utf-8') as file:
file.write('\n'.join(valid))
for proxy in valid:
print(proxy)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -1 +1,2 @@
requests requests
requests[socks]