Update README.md and main.py

This commit is contained in:
agatha 2023-11-06 16:29:44 -05:00
parent 91ec19f659
commit f7777c9b35
2 changed files with 32 additions and 4 deletions

View File

@ -3,6 +3,7 @@ Python package for harvesting commonly available data, such as free proxy server
## Modules ## Modules
### Proxy ### Proxy
#### fetch_list
The `proxy` module will harvest proxies from URLs with the `fetch_list` function. The `proxy` module will harvest proxies from URLs with the `fetch_list` function.
It functions by running a regular expression against the HTTP response, looking for It functions by running a regular expression against the HTTP response, looking for
@ -32,6 +33,33 @@ if __name__ == '__main__':
``` ```
#### fetch_all
Proxies can be fetched from multiple source URLs by using the `fetch_all` function.
It takes a list of URLs and an optional `max_workers` parameter. Proxies will be fetched from
the source URLs concurrently using a `ThreadPoolExecutor`:
```python
from harvester.proxy import fetch_all
URLS = [
'https://api.openproxylist.xyz/socks4.txt',
'https://api.openproxylist.xyz/socks5.txt',
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4',
]
def main():
"""Main entry point."""
proxies = fetch_all(URLS)
print(proxies)
if __name__ == '__main__':
main()
```
## Testing ## Testing
``` ```
pip install -r requirements.txt pip install -r requirements.txt

View File

@ -1,7 +1,7 @@
"""Harvester: Proxy collection tool """Harvester: Proxy collection tool
Inspired by https://github.com/acidvegas/proxytools Inspired by https://github.com/acidvegas/proxytools
""" """
from harvester.proxy import fetch_list from harvester.proxy import fetch_all
URLS = [ URLS = [
@ -12,9 +12,9 @@ URLS = [
def main(): def main():
"""Main entry point.""" """Main entry point."""
for url in URLS: proxies = fetch_all(URLS)
proxies = fetch_list(url) print(proxies)
print(proxies) print(len(proxies))
if __name__ == '__main__': if __name__ == '__main__':