Compare commits

..

1 Commits

Author SHA1 Message Date
fe139fc9ff Post meta is fetched when updating paste list 2023-09-12 17:44:38 -04:00
2 changed files with 24 additions and 14 deletions

View File

@ -1,23 +1,23 @@
import time import time
from pastebin.client import PastebinClient from pastebin.client import PastebinClient
from pastebin.util import search_by_language from pastebin.util import search_by_category
def main(): def main():
pastebin = PastebinClient() pastebin = PastebinClient(debug=True)
lang_filters = ['Email', 'Python'] filters = ['Gaming']
while True: while True:
new_pastes = pastebin.update_paste_list()
print('Updating pastes...') print('Updating pastes...')
new_pastes = pastebin.update_paste_list()
for lang in lang_filters: for filter_ in filters:
pastes = search_by_language(new_pastes, lang) pastes = search_by_category(new_pastes, filter_)
for paste in pastes: for paste in pastes:
print(f'New {lang} paste: {paste}') print(f'New {filter_} paste: {paste}')
time.sleep(60) time.sleep(300)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -63,7 +63,7 @@ class PastebinAPI:
return response return response
def get_public_paste_list(self): def get_paste_list(self):
endpoint = '/archive' endpoint = '/archive'
try: try:
@ -77,9 +77,14 @@ class PastebinAPI:
pastes = [self._parse_paste(paste_html) for paste_html in pastes_table] pastes = [self._parse_paste(paste_html) for paste_html in pastes_table]
# Sketchy way of getting the metadata each time
# TODO: This should be using ThreadPoolExecutor here
for i, paste in enumerate(pastes):
pastes[i] = self.get_paste_meta(paste)
return pastes return pastes
def get_paste(self, paste): def get_paste_meta(self, paste):
endpoint = f'/{paste.href}' endpoint = f'/{paste.href}'
try: try:
@ -93,10 +98,15 @@ class PastebinAPI:
paste = self._parse_paste_metadata(soup, paste) paste = self._parse_paste_metadata(soup, paste)
# Get paste text # Get paste text
paste = self._fetch_paste_text(paste) # paste = self._fetch_paste_text(paste)
return paste return paste
def download_paste(self, paste):
data = self._fetch_paste_text(paste)
with open(paste.href, 'w') as f:
f.write(data)
def _parse_paste(self, paste_html): def _parse_paste(self, paste_html):
paste_title = paste_html.find('a').text paste_title = paste_html.find('a').text
paste_link = paste_html.find('a')['href'].lstrip('/') paste_link = paste_html.find('a')['href'].lstrip('/')
@ -130,7 +140,7 @@ class PastebinClient(PastebinAPI):
self.pastes = [] self.pastes = []
def update_paste_list(self): def update_paste_list(self):
fetched_pastes = set(self.get_public_paste_list()) fetched_pastes = set(self.get_paste_list())
existing_pastes = set(self.pastes) existing_pastes = set(self.pastes)
new_pastes = fetched_pastes - existing_pastes new_pastes = fetched_pastes - existing_pastes
@ -141,9 +151,9 @@ class PastebinClient(PastebinAPI):
def fetch_paste(self, paste_index): def fetch_paste(self, paste_index):
if paste_index >= len(self.pastes): if paste_index >= len(self.pastes):
return None return None
return self.get_paste(self.pastes[paste_index]) return self.get_paste_meta(self.pastes[paste_index])
def get_paste_list(self): def paste_list(self):
return self.pastes return self.pastes
def total_pastes(self): def total_pastes(self):