Update README.md and main.py

2023-11-06 16:29:44 -05:00 · 2023-11-06 16:29:44 -05:00 · f7777c9b35
commit f7777c9b35
parent 91ec19f659
2 changed files with 32 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -3,6 +3,7 @@ Python package for harvesting commonly available data, such as free proxy server

 ## Modules
 ### Proxy
+#### fetch_list
 The `proxy` module will harvest proxies from URLs with the `fetch_list` function.

 It functions by running a regular expression against the HTTP response, looking for
@ -32,6 +33,33 @@ if __name__ == '__main__':

 ```

+#### fetch_all
+Proxies can be fetched from multiple source URLs by using the `fetch_all` function.
+
+It takes a list of URLs and an optional `max_workers` parameter. Proxies will be fetched from
+the source URLs concurrently using a `ThreadPoolExecutor`:
+
+```python
+from harvester.proxy import fetch_all
+
+
+URLS = [
+    'https://api.openproxylist.xyz/socks4.txt',
+    'https://api.openproxylist.xyz/socks5.txt',
+    'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4',
+]
+
+
+def main():
+    """Main entry point."""
+    proxies = fetch_all(URLS)
+    print(proxies)
+
+
+if __name__ == '__main__':
+    main()
+```
+
 ## Testing
 ```
 pip install -r requirements.txt
--- a/main.py
+++ b/main.py
@ -1,7 +1,7 @@
 """Harvester: Proxy collection tool
 Inspired by https://github.com/acidvegas/proxytools
 """
-from harvester.proxy import fetch_list
+from harvester.proxy import fetch_all


 URLS = [
@ -12,9 +12,9 @@ URLS = [

 def main():
    """Main entry point."""
-    for url in URLS:
-        proxies = fetch_list(url)
-        print(proxies)
+    proxies = fetch_all(URLS)
+    print(proxies)
+    print(len(proxies))


 if __name__ == '__main__':