Initial commit
This commit is contained in:
commit
80ea0b32be
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
.idea/
|
||||
venv/
|
||||
|
||||
__pycache__/
|
||||
*.py[cod]
|
7
README.md
Normal file
7
README.md
Normal file
@ -0,0 +1,7 @@
|
||||
# Harvester
|
||||
Python package for harvesting commonly available data, such as free proxy servers.
|
||||
|
||||
## Testing
|
||||
```
|
||||
pytest -v
|
||||
```
|
0
harvester/__init__.py
Normal file
0
harvester/__init__.py
Normal file
21
harvester/proxy.py
Normal file
21
harvester/proxy.py
Normal file
@ -0,0 +1,21 @@
|
||||
"""Proxy harvester module"""
|
||||
import re
|
||||
import requests
|
||||
|
||||
|
||||
def fetch_list(url):
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException:
|
||||
return []
|
||||
|
||||
# proxy_regex = r"(?:\d{1,3}\.){3}\d{1,3}:\d+"
|
||||
proxy_regex = r"(?:\b(?:[\S]+:)?(?:[\S]+)?@\b)?(?:\d{1,3}\.){3}\d{1,3}:\d+"
|
||||
proxies = re.findall(proxy_regex, response.text)
|
||||
return proxies
|
||||
|
||||
|
||||
def validate(proxy, type='http'):
|
||||
# Check regex
|
||||
return None
|
21
main.py
Normal file
21
main.py
Normal file
@ -0,0 +1,21 @@
|
||||
"""Harvester: Proxy collection tool
|
||||
Inspired by https://github.com/acidvegas/proxytools
|
||||
"""
|
||||
from harvester.proxy import fetch_list
|
||||
|
||||
|
||||
URLS = [
|
||||
'https://api.openproxylist.xyz/socks4.txt',
|
||||
'https://api.openproxylist.xyz/socks5.txt',
|
||||
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4',
|
||||
]
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
for url in URLS:
|
||||
proxies = fetch_list(url)
|
||||
print(proxies)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
1
requirements-dev.txt
Normal file
1
requirements-dev.txt
Normal file
@ -0,0 +1 @@
|
||||
pytest
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
@ -0,0 +1 @@
|
||||
requests
|
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
4
tests/data/proxies.txt
Normal file
4
tests/data/proxies.txt
Normal file
@ -0,0 +1,4 @@
|
||||
127.0.0.1:9000
|
||||
garbage line lol
|
||||
127.0.0.1:9001
|
||||
username:pa$$@word@127.0.0.1:9002
|
32
tests/test_harvester.py
Normal file
32
tests/test_harvester.py
Normal file
@ -0,0 +1,32 @@
|
||||
import subprocess
|
||||
import time
|
||||
import pytest
|
||||
from harvester.proxy import fetch_list
|
||||
|
||||
|
||||
@pytest.fixture(scope='session', autouse=True)
|
||||
def start_web_server():
|
||||
server_process = subprocess.Popen(['python', '-m', 'http.server', '8888'], cwd='tests/data')
|
||||
time.sleep(1)
|
||||
|
||||
yield
|
||||
|
||||
server_process.terminate()
|
||||
|
||||
|
||||
def test_fetch_list():
|
||||
expected = ['127.0.0.1:9000', '127.0.0.1:9001', 'username:pa$$@word@127.0.0.1:9002']
|
||||
result = fetch_list('http://localhost:8888/proxies.txt')
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_fetch_list_fail():
|
||||
expected = []
|
||||
result = fetch_list('http://localhost:12345/proxies.txt')
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_fetch_list_only_valid():
|
||||
expected = ['127.0.0.1:9000', '127.0.0.1:9001', 'username:pa$$@word@127.0.0.1:9002']
|
||||
result = fetch_list('http://localhost:8888/proxies.txt')
|
||||
assert result == expected
|
Loading…
Reference in New Issue
Block a user