"""Harvester: Proxy collection tool Inspired by https://github.com/acidvegas/proxytools """ import time import concurrent.futures import logging from harvester.proxy import fetch_all, validate_socks def load_urls(path): with open(path, 'r', encoding='utf-8') as file: urls = [line.strip() for line in file.readlines()] return urls def write_file(path, data): with open(path, 'w', encoding='utf-8') as file: file.write(data) def main(): """Main entry point.""" logging.basicConfig(level=logging.WARN) # Load proxy source list and fetch proxies urls = load_urls('data/proxy-sources.txt') proxies = fetch_all(urls) print(f'Fetched {len(proxies)} proxies!') # Concurrently validate proxies with ThreadPoolExecutor valid = [] with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor: validate_futures = {executor.submit(validate_socks, proxy): proxy for proxy in proxies} for future in concurrent.futures.as_completed(validate_futures): proxy = validate_futures[future] try: response = future.result() response.raise_for_status() except Exception as exception: # TODO: Handle exceptions differently. See https://git.juggalol.com/agatha/harvester/issues/1. logging.info(str(exception)) continue ip = response.text.strip() valid.append(proxy) print(f'{proxy} -> {ip}') # Write to file with timestamp write_file( path=f'proxies/valid-socks-{time.strftime("%Y%m%d%H%M%S")}.txt', data='\n'.join(valid) ) for proxy in valid: print(proxy) if __name__ == '__main__': main()