Compare commits
3 Commits
7538f37596
...
668a77ade0
Author | SHA1 | Date | |
---|---|---|---|
668a77ade0 | |||
1af8a63717 | |||
2fa30b1d13 |
5
.gitignore
vendored
5
.gitignore
vendored
@ -2,4 +2,7 @@
|
|||||||
venv/
|
venv/
|
||||||
|
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|
||||||
|
# proxies dev results
|
||||||
|
proxies/
|
||||||
|
63
README.md
63
README.md
@ -1,6 +1,16 @@
|
|||||||
# Harvester
|
# Harvester
|
||||||
Python package for harvesting commonly available data, such as free proxy servers.
|
Python package for harvesting commonly available data, such as free proxy servers.
|
||||||
|
|
||||||
|
## Running the Demo
|
||||||
|
If you just want proxies, just run the demo code in [main.py](main.py):
|
||||||
|
|
||||||
|
```shell
|
||||||
|
git clone https://git.juggalol.com/agatha/harvester
|
||||||
|
pip install -r requirements.txt
|
||||||
|
mkdir proxies
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
## Modules
|
## Modules
|
||||||
### Proxy
|
### Proxy
|
||||||
#### fetch_list
|
#### fetch_list
|
||||||
@ -10,56 +20,11 @@ It functions by running a regular expression against the HTTP response, looking
|
|||||||
strings that match a `username:password@ip:port` pattern where username and password
|
strings that match a `username:password@ip:port` pattern where username and password
|
||||||
are optional.
|
are optional.
|
||||||
|
|
||||||
```python
|
|
||||||
from harvester.proxy import fetch_list
|
|
||||||
|
|
||||||
|
|
||||||
URLS = [
|
|
||||||
'https://api.openproxylist.xyz/socks4.txt',
|
|
||||||
'https://api.openproxylist.xyz/socks5.txt',
|
|
||||||
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4',
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main entry point."""
|
|
||||||
for url in URLS:
|
|
||||||
proxies = fetch_list(url)
|
|
||||||
print(proxies)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
#### fetch_all
|
#### fetch_all
|
||||||
Proxies can be fetched from multiple source URLs by using the `fetch_all` function.
|
Proxies can be fetched from multiple source URLs by using the `fetch_all` function.
|
||||||
|
|
||||||
It takes a list of URLs and an optional `max_workers` parameter. Proxies will be fetched from
|
It takes a list of URLs and an optional `max_workers` parameter. Proxies will be fetched from
|
||||||
the source URLs concurrently using a `ThreadPoolExecutor`:
|
the source URLs concurrently using a `ThreadPoolExecutor`.
|
||||||
|
|
||||||
```python
|
|
||||||
from harvester.proxy import fetch_all
|
|
||||||
|
|
||||||
|
|
||||||
URLS = [
|
|
||||||
'https://api.openproxylist.xyz/socks4.txt',
|
|
||||||
'https://api.openproxylist.xyz/socks5.txt',
|
|
||||||
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4',
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
"""Main entry point."""
|
|
||||||
proxies = fetch_all(URLS)
|
|
||||||
print(proxies)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
#### validate_socks
|
#### validate_socks
|
||||||
SOCKS5 proxies can be tested with the `validate_socks` method. The method takes a proxy
|
SOCKS5 proxies can be tested with the `validate_socks` method. The method takes a proxy
|
||||||
@ -69,8 +34,14 @@ with no issues, otherwise it will raise an exception and the caller can decide h
|
|||||||
For an example implementation, see [main.py](main.py).
|
For an example implementation, see [main.py](main.py).
|
||||||
|
|
||||||
## Testing
|
## Testing
|
||||||
|
I was trying to get into the habit of writing unit tests, but god damn I hate them. There are
|
||||||
|
a few, but I don't plan on continuing any time soon.
|
||||||
```
|
```
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
pip install -r requirement-dev.txt
|
pip install -r requirement-dev.txt
|
||||||
pytest -v
|
pytest -v
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Greets
|
||||||
|
Shoutouts to [acidvegas](https://git.supernets.org/acidvegas/). This project was inspired by
|
||||||
|
[proxytools](https://git.supernets.org/acidvegas/proxytools)
|
16
main.py
16
main.py
@ -1,6 +1,7 @@
|
|||||||
"""Harvester: Proxy collection tool
|
"""Harvester: Proxy collection tool
|
||||||
Inspired by https://github.com/acidvegas/proxytools
|
Inspired by https://github.com/acidvegas/proxytools
|
||||||
"""
|
"""
|
||||||
|
import time
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import logging
|
import logging
|
||||||
from harvester.proxy import fetch_all, validate_socks
|
from harvester.proxy import fetch_all, validate_socks
|
||||||
@ -13,6 +14,11 @@ def load_urls(path):
|
|||||||
return urls
|
return urls
|
||||||
|
|
||||||
|
|
||||||
|
def write_file(path, data):
|
||||||
|
with open(path, 'w', encoding='utf-8') as file:
|
||||||
|
file.write(data)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Main entry point."""
|
"""Main entry point."""
|
||||||
logging.basicConfig(level=logging.WARN)
|
logging.basicConfig(level=logging.WARN)
|
||||||
@ -33,7 +39,7 @@ def main():
|
|||||||
response = future.result()
|
response = future.result()
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
# TODO: Handle exceptions differently. See issues.
|
# TODO: Handle exceptions differently. See https://git.juggalol.com/agatha/harvester/issues/1.
|
||||||
logging.info(str(exception))
|
logging.info(str(exception))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -41,9 +47,11 @@ def main():
|
|||||||
valid.append(proxy)
|
valid.append(proxy)
|
||||||
print(f'{proxy} -> {ip}')
|
print(f'{proxy} -> {ip}')
|
||||||
|
|
||||||
with open('data/valid-socks.txt', 'w', encoding='utf-8') as file:
|
# Write to file with timestamp
|
||||||
file.write('\n'.join(valid))
|
write_file(
|
||||||
|
path=f'proxies/valid-socks-{time.strftime("%Y%m%d%H%M%S")}.txt',
|
||||||
|
data='\n'.join(valid)
|
||||||
|
)
|
||||||
for proxy in valid:
|
for proxy in valid:
|
||||||
print(proxy)
|
print(proxy)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user