Compare commits

...

2 Commits

Author SHA1 Message Date
e813199501 Update example to use logger 2023-11-06 16:47:22 -05:00
fe02eaea6f Fetching functions log info and warnings 2023-11-06 16:47:12 -05:00
2 changed files with 60 additions and 2 deletions

View File

@ -1,5 +1,6 @@
"""Proxy harvester module""" """Proxy harvester module"""
import concurrent.futures import concurrent.futures
import logging
import re import re
import requests import requests
@ -9,10 +10,12 @@ def fetch_list(url):
response = requests.get(url) response = requests.get(url)
response.raise_for_status() response.raise_for_status()
except requests.RequestException: except requests.RequestException:
logging.warning(f'Error fetching proxies from {url}')
return [] return []
proxy_regex = r"(?:\b(?:[\S]+:)?(?:[\S]+)?@\b)?(?:\d{1,3}\.){3}\d{1,3}:\d+" proxy_regex = r"(?:\b(?:[\S]+:)?(?:[\S]+)?@\b)?(?:\d{1,3}\.){3}\d{1,3}:\d+"
proxies = re.findall(proxy_regex, response.text) proxies = re.findall(proxy_regex, response.text)
logging.info(f'Fetched {len(proxies)} proxies from {url}')
return proxies return proxies
@ -30,6 +33,7 @@ def fetch_all(urls, max_workers=8):
except: except:
pass pass
logging.info(f'Fetched {len(proxies)} proxies in total')
return proxies return proxies

58
main.py
View File

@ -1,6 +1,7 @@
"""Harvester: Proxy collection tool """Harvester: Proxy collection tool
Inspired by https://github.com/acidvegas/proxytools Inspired by https://github.com/acidvegas/proxytools
""" """
import logging
from harvester.proxy import fetch_all from harvester.proxy import fetch_all
@ -8,13 +9,66 @@ URLS = [
'https://api.openproxylist.xyz/socks4.txt', 'https://api.openproxylist.xyz/socks4.txt',
'https://api.openproxylist.xyz/socks5.txt', 'https://api.openproxylist.xyz/socks5.txt',
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4', 'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks4',
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4',
'https://api.proxyscrape.com/?request=displayproxies&proxytype=socks5',
'https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5',
'https://proxy-list.download/api/v1/get?type=socks4',
'https://proxy-list.download/api/v1/get?type=socks5',
'https://proxyscan.io/download?type=socks4',
'https://proxyscan.io/download?type=socks5',
'https://proxyspace.pro/socks4.txt',
'https://proxyspace.pro/socks5.txt',
'https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks4.txt',
'https://raw.githubusercontent.com/ALIILAPRO/Proxy/main/socks5.txt',
'https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS4.txt',
'https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/SOCKS5.txt',
'https://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt',
'https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks4.txt',
'https://raw.githubusercontent.com/HyperBeats/proxy-list/main/socks5.txt',
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks4.txt',
'https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-socks5.txt',
'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS4.txt',
'https://raw.githubusercontent.com/manuGMG/proxy-365/main/SOCKS5.txt',
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks4.txt',
'https://raw.githubusercontent.com/mmpx12/proxy-list/master/socks5.txt',
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks4.txt',
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/socks5.txt',
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks4.txt',
'https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/socks5.txt',
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks4.txt',
'https://raw.githubusercontent.com/MuRongPIG/Proxy-Master/main/socks5.txt',
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks4.txt',
'https://raw.githubusercontent.com/officialputuid/KangProxy/KangProxy/socks5/socks5.txt',
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks4.txt',
'https://raw.githubusercontent.com/prxchk/proxy-list/main/socks5.txt',
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks4.txt',
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies/socks5.txt',
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks4.txt',
'https://raw.githubusercontent.com/rdavydov/proxy-list/main/proxies_anonymous/socks5.txt',
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS4_RAW.txt',
'https://raw.githubusercontent.com/roosterkid/openproxylist/main/SOCKS5_RAW.txt',
'https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks4.txt',
'https://raw.githubusercontent.com/RX4096/proxy-list/main/online/socks5.txt',
'https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks4.txt',
'https://raw.githubusercontent.com/saschazesiger/Free-Proxies/master/proxies/socks5.txt',
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt',
'https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt',
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt',
'https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt',
'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks4.txt',
'https://raw.githubusercontent.com/UptimerBot/proxy-list/main/proxies/socks5.txt',
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks4.txt',
'https://raw.githubusercontent.com/Zaeem20/FREE_PROXIES_LIST/master/socks5.txt',
'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks4.txt',
'https://raw.githubusercontent.com/zevtyardt/proxy-list/main/socks5.txt',
'https://spys.me/socks.txt',
'https://spys.one/en/socks-proxy-list/'
] ]
def main(): def main():
"""Main entry point.""" """Main entry point."""
logging.basicConfig(level=logging.INFO)
proxies = fetch_all(URLS) proxies = fetch_all(URLS)
print(proxies)
print(len(proxies))
if __name__ == '__main__': if __name__ == '__main__':