diff --git a/README.md b/README.md index eb287f5..c4fdd1c 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ Python package for harvesting commonly available data, such as free proxy server ## Modules ### Proxy +#### fetch_list The `proxy` module will harvest proxies from URLs with the `fetch_list` function. It functions by running a regular expression against the HTTP response, looking for @@ -32,6 +33,33 @@ if __name__ == '__main__': ``` +#### fetch_all +Proxies can be fetched from multiple source URLs by using the `fetch_all` function. + +It takes a list of URLs and an optional `max_workers` parameter. Proxies will be fetched from +the source URLs concurrently using a `ThreadPoolExecutor`: + +```python +from harvester.proxy import fetch_all + + +URLS = [ + 'https://api.openproxylist.xyz/socks4.txt', + 'https://api.openproxylist.xyz/socks5.txt', + 'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4', +] + + +def main(): + """Main entry point.""" + proxies = fetch_all(URLS) + print(proxies) + + +if __name__ == '__main__': + main() +``` + ## Testing ``` pip install -r requirements.txt diff --git a/main.py b/main.py index 14ea066..154567a 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,7 @@ """Harvester: Proxy collection tool Inspired by https://github.com/acidvegas/proxytools """ -from harvester.proxy import fetch_list +from harvester.proxy import fetch_all URLS = [ @@ -12,9 +12,9 @@ URLS = [ def main(): """Main entry point.""" - for url in URLS: - proxies = fetch_list(url) - print(proxies) + proxies = fetch_all(URLS) + print(proxies) + print(len(proxies)) if __name__ == '__main__':