"""Harvester: Proxy collection tool Inspired by https://github.com/acidvegas/proxytools """ import time import concurrent.futures import logging from harvester.proxy import fetch_all, validate_socks def read_file(path): with open(path, 'r', encoding='utf-8') as file: data = [line.strip() for line in file.readlines()] return data def write_file(path, data): with open(path, 'w', encoding='utf-8') as file: file.write(data) def main(): """Main entry point.""" logging.basicConfig(level=logging.WARN) # Load proxy source list and fetch proxies urls = read_file('data/proxy-sources.txt') proxies = fetch_all(urls) print(f'Fetched {len(proxies)} proxies!') # Concurrently validate proxies with ThreadPoolExecutor valid = [] with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor: validate_futures = {executor.submit(validate_socks, proxy): proxy for proxy in proxies} for future in concurrent.futures.as_completed(validate_futures): proxy = validate_futures[future] try: response = future.result() response.raise_for_status() except Exception as exception: # TODO: Handle exceptions differently. See https://git.juggalol.com/agatha/harvester/issues/1. logging.info(str(exception)) continue ip = response.text.strip() valid.append(proxy) print(f'{proxy} -> {ip}') # Write to file with timestamp write_file( path=f'proxies/valid-socks-{time.strftime("%Y%m%d%H%M%S")}.txt', data='\n'.join(valid) ) # Write proxychains conf proxychains_template = read_file('templates/proxychains.conf') proxychains_data = [f'socks5 {proxy.replace(":", " ")}' for proxy in proxies] write_file( path=f'proxies/proxychains-{time.strftime("%Y%m%d%H%M%S")}.conf', data='\n'.join(proxychains_template + proxychains_data) ) if __name__ == '__main__': main()