Post meta is fetched when updating paste list

This commit is contained in:
agatha 2023-09-12 17:44:38 -04:00
parent 2f0b324603
commit fe139fc9ff
2 changed files with 24 additions and 14 deletions

View File

@ -1,23 +1,23 @@
import time
from pastebin.client import PastebinClient
from pastebin.util import search_by_language
from pastebin.util import search_by_category
def main():
pastebin = PastebinClient()
lang_filters = ['Email', 'Python']
pastebin = PastebinClient(debug=True)
filters = ['Gaming']
while True:
new_pastes = pastebin.update_paste_list()
print('Updating pastes...')
new_pastes = pastebin.update_paste_list()
for lang in lang_filters:
pastes = search_by_language(new_pastes, lang)
for filter_ in filters:
pastes = search_by_category(new_pastes, filter_)
for paste in pastes:
print(f'New {lang} paste: {paste}')
print(f'New {filter_} paste: {paste}')
time.sleep(60)
time.sleep(300)
if __name__ == '__main__':

View File

@ -63,7 +63,7 @@ class PastebinAPI:
return response
def get_public_paste_list(self):
def get_paste_list(self):
endpoint = '/archive'
try:
@ -77,9 +77,14 @@ class PastebinAPI:
pastes = [self._parse_paste(paste_html) for paste_html in pastes_table]
# Sketchy way of getting the metadata each time
# TODO: This should be using ThreadPoolExecutor here
for i, paste in enumerate(pastes):
pastes[i] = self.get_paste_meta(paste)
return pastes
def get_paste(self, paste):
def get_paste_meta(self, paste):
endpoint = f'/{paste.href}'
try:
@ -93,10 +98,15 @@ class PastebinAPI:
paste = self._parse_paste_metadata(soup, paste)
# Get paste text
paste = self._fetch_paste_text(paste)
# paste = self._fetch_paste_text(paste)
return paste
def download_paste(self, paste):
data = self._fetch_paste_text(paste)
with open(paste.href, 'w') as f:
f.write(data)
def _parse_paste(self, paste_html):
paste_title = paste_html.find('a').text
paste_link = paste_html.find('a')['href'].lstrip('/')
@ -130,7 +140,7 @@ class PastebinClient(PastebinAPI):
self.pastes = []
def update_paste_list(self):
fetched_pastes = set(self.get_public_paste_list())
fetched_pastes = set(self.get_paste_list())
existing_pastes = set(self.pastes)
new_pastes = fetched_pastes - existing_pastes
@ -141,9 +151,9 @@ class PastebinClient(PastebinAPI):
def fetch_paste(self, paste_index):
if paste_index >= len(self.pastes):
return None
return self.get_paste(self.pastes[paste_index])
return self.get_paste_meta(self.pastes[paste_index])
def get_paste_list(self):
def paste_list(self):
return self.pastes
def total_pastes(self):