"""Harvester: Proxy collection tool Inspired by https://github.com/acidvegas/proxytools """ import concurrent.futures import logging from harvester.proxy import fetch_all, validate_socks def load_urls(path): with open(path, 'r', encoding='utf-8') as file: urls = [line.strip() for line in file.readlines()] return urls def main(): """Main entry point.""" logging.basicConfig(level=logging.WARN) # Load proxy source list and fetch proxies urls = load_urls('data/proxy-sources.txt') proxies = fetch_all(urls) print(f'Fetched {len(proxies)} proxies!') # Concurrently validate proxies with ThreadPoolExecutor valid = [] with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor: validate_futures = {executor.submit(validate_socks, proxy): proxy for proxy in proxies} for future in concurrent.futures.as_completed(validate_futures): proxy = validate_futures[future] try: response = future.result() response.raise_for_status() except Exception as exception: # TODO: Handle exceptions differently. See issues. logging.info(str(exception)) continue ip = response.text.strip() valid.append(proxy) print(f'{proxy} -> {ip}') with open('data/valid-socks.txt', 'w', encoding='utf-8') as file: file.write('\n'.join(valid)) for proxy in valid: print(proxy) if __name__ == '__main__': main()