fix: update get_seasons method

closes issue #1
This commit is contained in:
agatha 2025-07-26 12:18:50 -04:00
parent d90349c85d
commit c332b1552f

View File

@ -108,23 +108,46 @@ class RomhackRaceScraper:
response = self.rate_limited_request(self.base_url) response = self.rate_limited_request(self.base_url)
soup = BeautifulSoup(response.text, 'html.parser') soup = BeautifulSoup(response.text, 'html.parser')
season_div = soup.find('div', class_='info leaders', style=lambda s: s and '300px' in s) seasons = set() # use a set to avoid duplicates
if not season_div:
self.debug_print("Could not find season navigation div")
return []
season_links = season_div.find_all('a') # Find the season navigation section - looking for text that starts with "Season"
seasons = [] season_text = soup.find(text=lambda t: t and t.strip().startswith('Season'))
for link in season_links: if season_text:
season_num = re.search(r'season=(\d+)', link['href']) parent = season_text.parent
if season_num:
seasons.append(int(season_num.group(1)))
if 1 not in seasons: # Get all links and the bold span that follow "Season"
seasons.append(1) elements = parent.find_all(['a', 'span'])
for element in elements:
if element.name == 'a':
try:
seasons.add(int(element.text))
except ValueError:
continue
elif element.name == 'span' and element.get('style') == 'font-weight:bold;':
try:
seasons.add(int(element.text))
except ValueError:
continue
self.debug_print(f"Found seasons: {seasons}") if not seasons:
return sorted(seasons) self.debug_print("Warning: No seasons found in normal parsing")
# Fallback: try to find any numbers in season links
season_links = soup.find_all('a', href=lambda h: h and 'season=' in h)
for link in season_links:
match = re.search(r'season=(\d+)', link['href'])
if match:
seasons.add(int(match.group(1)))
# Always include season 1
seasons.add(1)
sorted_seasons = sorted(list(seasons))
self.debug_print(f"Found seasons: {sorted_seasons}")
if len(sorted_seasons) < 2:
self.debug_print("Warning: Found unusually few seasons, might indicate parsing error")
return sorted_seasons
def test_parse(self): def test_parse(self):
"""Test parsing on the first page""" """Test parsing on the first page"""