parent
d90349c85d
commit
c332b1552f
@ -108,23 +108,46 @@ class RomhackRaceScraper:
|
||||
response = self.rate_limited_request(self.base_url)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
season_div = soup.find('div', class_='info leaders', style=lambda s: s and '300px' in s)
|
||||
if not season_div:
|
||||
self.debug_print("Could not find season navigation div")
|
||||
return []
|
||||
seasons = set() # use a set to avoid duplicates
|
||||
|
||||
# Find the season navigation section - looking for text that starts with "Season"
|
||||
season_text = soup.find(text=lambda t: t and t.strip().startswith('Season'))
|
||||
if season_text:
|
||||
parent = season_text.parent
|
||||
|
||||
season_links = season_div.find_all('a')
|
||||
seasons = []
|
||||
for link in season_links:
|
||||
season_num = re.search(r'season=(\d+)', link['href'])
|
||||
if season_num:
|
||||
seasons.append(int(season_num.group(1)))
|
||||
# Get all links and the bold span that follow "Season"
|
||||
elements = parent.find_all(['a', 'span'])
|
||||
for element in elements:
|
||||
if element.name == 'a':
|
||||
try:
|
||||
seasons.add(int(element.text))
|
||||
except ValueError:
|
||||
continue
|
||||
elif element.name == 'span' and element.get('style') == 'font-weight:bold;':
|
||||
try:
|
||||
seasons.add(int(element.text))
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if 1 not in seasons:
|
||||
seasons.append(1)
|
||||
if not seasons:
|
||||
self.debug_print("Warning: No seasons found in normal parsing")
|
||||
# Fallback: try to find any numbers in season links
|
||||
season_links = soup.find_all('a', href=lambda h: h and 'season=' in h)
|
||||
for link in season_links:
|
||||
match = re.search(r'season=(\d+)', link['href'])
|
||||
if match:
|
||||
seasons.add(int(match.group(1)))
|
||||
|
||||
self.debug_print(f"Found seasons: {seasons}")
|
||||
return sorted(seasons)
|
||||
# Always include season 1
|
||||
seasons.add(1)
|
||||
|
||||
sorted_seasons = sorted(list(seasons))
|
||||
self.debug_print(f"Found seasons: {sorted_seasons}")
|
||||
|
||||
if len(sorted_seasons) < 2:
|
||||
self.debug_print("Warning: Found unusually few seasons, might indicate parsing error")
|
||||
|
||||
return sorted_seasons
|
||||
|
||||
def test_parse(self):
|
||||
"""Test parsing on the first page"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user