Update README.md and main.py
This commit is contained in:
parent
91ec19f659
commit
f7777c9b35
28
README.md
28
README.md
@ -3,6 +3,7 @@ Python package for harvesting commonly available data, such as free proxy server
|
|||||||
|
|
||||||
## Modules
|
## Modules
|
||||||
### Proxy
|
### Proxy
|
||||||
|
#### fetch_list
|
||||||
The `proxy` module will harvest proxies from URLs with the `fetch_list` function.
|
The `proxy` module will harvest proxies from URLs with the `fetch_list` function.
|
||||||
|
|
||||||
It functions by running a regular expression against the HTTP response, looking for
|
It functions by running a regular expression against the HTTP response, looking for
|
||||||
@ -32,6 +33,33 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### fetch_all
|
||||||
|
Proxies can be fetched from multiple source URLs by using the `fetch_all` function.
|
||||||
|
|
||||||
|
It takes a list of URLs and an optional `max_workers` parameter. Proxies will be fetched from
|
||||||
|
the source URLs concurrently using a `ThreadPoolExecutor`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from harvester.proxy import fetch_all
|
||||||
|
|
||||||
|
|
||||||
|
URLS = [
|
||||||
|
'https://api.openproxylist.xyz/socks4.txt',
|
||||||
|
'https://api.openproxylist.xyz/socks5.txt',
|
||||||
|
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main entry point."""
|
||||||
|
proxies = fetch_all(URLS)
|
||||||
|
print(proxies)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
```
|
||||||
|
|
||||||
## Testing
|
## Testing
|
||||||
```
|
```
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
6
main.py
6
main.py
@ -1,7 +1,7 @@
|
|||||||
"""Harvester: Proxy collection tool
|
"""Harvester: Proxy collection tool
|
||||||
Inspired by https://github.com/acidvegas/proxytools
|
Inspired by https://github.com/acidvegas/proxytools
|
||||||
"""
|
"""
|
||||||
from harvester.proxy import fetch_list
|
from harvester.proxy import fetch_all
|
||||||
|
|
||||||
|
|
||||||
URLS = [
|
URLS = [
|
||||||
@ -12,9 +12,9 @@ URLS = [
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main entry point."""
|
"""Main entry point."""
|
||||||
for url in URLS:
|
proxies = fetch_all(URLS)
|
||||||
proxies = fetch_list(url)
|
|
||||||
print(proxies)
|
print(proxies)
|
||||||
|
print(len(proxies))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
Reference in New Issue
Block a user