diff --git a/downloader.py b/downloader.py index 0d4d34a..24b2ea6 100644 --- a/downloader.py +++ b/downloader.py @@ -108,23 +108,46 @@ class RomhackRaceScraper: response = self.rate_limited_request(self.base_url) soup = BeautifulSoup(response.text, 'html.parser') - season_div = soup.find('div', class_='info leaders', style=lambda s: s and '300px' in s) - if not season_div: - self.debug_print("Could not find season navigation div") - return [] + seasons = set() # use a set to avoid duplicates + + # Find the season navigation section - looking for text that starts with "Season" + season_text = soup.find(text=lambda t: t and t.strip().startswith('Season')) + if season_text: + parent = season_text.parent - season_links = season_div.find_all('a') - seasons = [] - for link in season_links: - season_num = re.search(r'season=(\d+)', link['href']) - if season_num: - seasons.append(int(season_num.group(1))) + # Get all links and the bold span that follow "Season" + elements = parent.find_all(['a', 'span']) + for element in elements: + if element.name == 'a': + try: + seasons.add(int(element.text)) + except ValueError: + continue + elif element.name == 'span' and element.get('style') == 'font-weight:bold;': + try: + seasons.add(int(element.text)) + except ValueError: + continue - if 1 not in seasons: - seasons.append(1) + if not seasons: + self.debug_print("Warning: No seasons found in normal parsing") + # Fallback: try to find any numbers in season links + season_links = soup.find_all('a', href=lambda h: h and 'season=' in h) + for link in season_links: + match = re.search(r'season=(\d+)', link['href']) + if match: + seasons.add(int(match.group(1))) - self.debug_print(f"Found seasons: {seasons}") - return sorted(seasons) + # Always include season 1 + seasons.add(1) + + sorted_seasons = sorted(list(seasons)) + self.debug_print(f"Found seasons: {sorted_seasons}") + + if len(sorted_seasons) < 2: + self.debug_print("Warning: Found unusually few seasons, might indicate parsing error") + + return sorted_seasons def test_parse(self): """Test parsing on the first page"""