Implement simple validation
This commit is contained in:
parent
9048aaac19
commit
1c47b53b4c
@ -23,7 +23,7 @@ def fetch_list(url):
|
||||
with WARNING and an empty list will be returned.
|
||||
"""
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response = requests.get(url, timeout=5)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException:
|
||||
logging.warning(f'Error fetching proxies from {url}')
|
||||
@ -63,6 +63,21 @@ def fetch_all(urls, max_workers=8):
|
||||
return proxies
|
||||
|
||||
|
||||
def validate(proxy, type='http'):
|
||||
# Check regex
|
||||
return None
|
||||
def validate_socks(proxy):
|
||||
"""Validate a SOCKS proxy.
|
||||
|
||||
Args:
|
||||
proxy (str): Proxy connection string. [username:password]@server:port.
|
||||
|
||||
Returns:
|
||||
requests.Response: Response object.
|
||||
|
||||
Raises:
|
||||
requests.Exception and subclasses.
|
||||
"""
|
||||
response = requests.get(
|
||||
'https://icanhazip.com',
|
||||
proxies={'http': f'socks5://{proxy}', 'https': f'socks5://{proxy}'},
|
||||
timeout=10
|
||||
)
|
||||
return response
|
||||
|
101
main.py
101
main.py
@ -1,74 +1,51 @@
|
||||
"""Harvester: Proxy collection tool
|
||||
Inspired by https://github.com/acidvegas/proxytools
|
||||
"""
|
||||
import concurrent.futures
|
||||
import logging
|
||||
from harvester.proxy import fetch_all
|
||||
from harvester.proxy import fetch_all, validate_socks
|
||||
|
||||
|
||||
URLS = [
|
||||
'https://api.openproxylist.xyz/socks4.txt',
|
||||
'https://api.openproxylist.xyz/socks5.txt',
|
||||
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4',
|
||||
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4',
|
||||
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks5',
|
||||
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5',
|
||||
'https://proxy-list.download/api/v1/get?type=socks4',
|
||||
'https://proxy-list.download/api/v1/get?type=socks5',
|
||||
'https://proxyscan.io/download?type=socks4',
|
||||
'https://proxyscan.io/download?type=socks5',
|
||||
'https://proxyspace.pro/socks4.txt',
|
||||
'https://proxyspace.pro/socks5.txt',
|
||||
'https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks4.txt',
|
||||
'https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks5.txt',
|
||||
'https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS4.txt',
|
||||
'https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS5.txt',
|
||||
'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',
|
||||
'https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks4.txt',
|
||||
'https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks5.txt',
|
||||
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks4.txt',
|
||||
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt',
|
||||
'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS4.txt',
|
||||
'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt',
|
||||
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt',
|
||||
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt',
|
||||
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',
|
||||
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',
|
||||
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks4.txt',
|
||||
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks5.txt',
|
||||
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt',
|
||||
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt',
|
||||
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks4.txt',
|
||||
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt',
|
||||
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt',
|
||||
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt',
|
||||
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks4.txt',
|
||||
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks5.txt',
|
||||
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks4.txt',
|
||||
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks5.txt',
|
||||
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt',
|
||||
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt',
|
||||
'https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks4.txt',
|
||||
'https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks5.txt',
|
||||
'https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks4.txt',
|
||||
'https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks5.txt',
|
||||
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt',
|
||||
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt',
|
||||
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',
|
||||
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',
|
||||
'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks4.txt',
|
||||
'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks5.txt',
|
||||
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks4.txt',
|
||||
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks5.txt',
|
||||
'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks4.txt',
|
||||
'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks5.txt',
|
||||
'https://spys.me/socks.txt',
|
||||
'https://spys.one/en/socks-proxy-list/'
|
||||
]
|
||||
def load_urls(path):
|
||||
with open(path, 'r', encoding='utf-8') as file:
|
||||
urls = [line.strip() for line in file.readlines()]
|
||||
|
||||
return urls
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
proxies = fetch_all(URLS)
|
||||
logging.basicConfig(level=logging.WARN)
|
||||
|
||||
# Load proxy source list and fetch proxies
|
||||
urls = load_urls('data/proxy-sources.txt')
|
||||
proxies = fetch_all(urls)
|
||||
print(f'Fetched {len(proxies)} proxies!')
|
||||
|
||||
# Concurrently validate proxies with ThreadPoolExecutor
|
||||
valid = []
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor:
|
||||
validate_futures = {executor.submit(validate_socks, proxy): proxy for proxy in proxies}
|
||||
for future in concurrent.futures.as_completed(validate_futures):
|
||||
proxy = validate_futures[future]
|
||||
|
||||
try:
|
||||
response = future.result()
|
||||
response.raise_for_status()
|
||||
except Exception as exception:
|
||||
# TODO: Handle exceptions differently. See issues.
|
||||
logging.info(str(exception))
|
||||
continue
|
||||
|
||||
ip = response.text.strip()
|
||||
valid.append(proxy)
|
||||
print(f'{proxy} -> {ip}')
|
||||
|
||||
with open('data/valid-socks.txt', 'w', encoding='utf-8') as file:
|
||||
file.write('\n'.join(valid))
|
||||
|
||||
for proxy in valid:
|
||||
print(proxy)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -1 +1,2 @@
|
||||
requests
|
||||
requests
|
||||
requests[socks]
|
Loading…
Reference in New Issue
Block a user