2023-11-06 19:41:05 +00:00
|
|
|
"""Harvester: Proxy collection tool
|
|
|
|
Inspired by https://github.com/acidvegas/proxytools
|
|
|
|
"""
|
2023-11-07 23:53:07 +00:00
|
|
|
import concurrent.futures
|
2023-11-06 21:47:22 +00:00
|
|
|
import logging
|
2023-11-07 23:53:07 +00:00
|
|
|
from harvester.proxy import fetch_all, validate_socks
|
2023-11-06 19:41:05 +00:00
|
|
|
|
|
|
|
|
2023-11-07 23:53:07 +00:00
|
|
|
def load_urls(path):
|
|
|
|
with open(path, 'r', encoding='utf-8') as file:
|
|
|
|
urls = [line.strip() for line in file.readlines()]
|
|
|
|
|
|
|
|
return urls
|
|
|
|
|
2023-11-06 19:41:05 +00:00
|
|
|
|
|
|
|
def main():
|
|
|
|
"""Main entry point."""
|
2023-11-07 23:53:07 +00:00
|
|
|
logging.basicConfig(level=logging.WARN)
|
|
|
|
|
|
|
|
# Load proxy source list and fetch proxies
|
|
|
|
urls = load_urls('data/proxy-sources.txt')
|
|
|
|
proxies = fetch_all(urls)
|
|
|
|
print(f'Fetched {len(proxies)} proxies!')
|
|
|
|
|
|
|
|
# Concurrently validate proxies with ThreadPoolExecutor
|
|
|
|
valid = []
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor:
|
|
|
|
validate_futures = {executor.submit(validate_socks, proxy): proxy for proxy in proxies}
|
|
|
|
for future in concurrent.futures.as_completed(validate_futures):
|
|
|
|
proxy = validate_futures[future]
|
|
|
|
|
|
|
|
try:
|
|
|
|
response = future.result()
|
|
|
|
response.raise_for_status()
|
|
|
|
except Exception as exception:
|
|
|
|
# TODO: Handle exceptions differently. See issues.
|
|
|
|
logging.info(str(exception))
|
|
|
|
continue
|
|
|
|
|
|
|
|
ip = response.text.strip()
|
|
|
|
valid.append(proxy)
|
|
|
|
print(f'{proxy} -> {ip}')
|
|
|
|
|
|
|
|
with open('data/valid-socks.txt', 'w', encoding='utf-8') as file:
|
|
|
|
file.write('\n'.join(valid))
|
|
|
|
|
|
|
|
for proxy in valid:
|
|
|
|
print(proxy)
|
2023-11-06 19:41:05 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|