parent
d90349c85d
commit
c332b1552f
@ -108,23 +108,46 @@ class RomhackRaceScraper:
|
|||||||
response = self.rate_limited_request(self.base_url)
|
response = self.rate_limited_request(self.base_url)
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
season_div = soup.find('div', class_='info leaders', style=lambda s: s and '300px' in s)
|
seasons = set() # use a set to avoid duplicates
|
||||||
if not season_div:
|
|
||||||
self.debug_print("Could not find season navigation div")
|
|
||||||
return []
|
|
||||||
|
|
||||||
season_links = season_div.find_all('a')
|
# Find the season navigation section - looking for text that starts with "Season"
|
||||||
seasons = []
|
season_text = soup.find(text=lambda t: t and t.strip().startswith('Season'))
|
||||||
|
if season_text:
|
||||||
|
parent = season_text.parent
|
||||||
|
|
||||||
|
# Get all links and the bold span that follow "Season"
|
||||||
|
elements = parent.find_all(['a', 'span'])
|
||||||
|
for element in elements:
|
||||||
|
if element.name == 'a':
|
||||||
|
try:
|
||||||
|
seasons.add(int(element.text))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
elif element.name == 'span' and element.get('style') == 'font-weight:bold;':
|
||||||
|
try:
|
||||||
|
seasons.add(int(element.text))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not seasons:
|
||||||
|
self.debug_print("Warning: No seasons found in normal parsing")
|
||||||
|
# Fallback: try to find any numbers in season links
|
||||||
|
season_links = soup.find_all('a', href=lambda h: h and 'season=' in h)
|
||||||
for link in season_links:
|
for link in season_links:
|
||||||
season_num = re.search(r'season=(\d+)', link['href'])
|
match = re.search(r'season=(\d+)', link['href'])
|
||||||
if season_num:
|
if match:
|
||||||
seasons.append(int(season_num.group(1)))
|
seasons.add(int(match.group(1)))
|
||||||
|
|
||||||
if 1 not in seasons:
|
# Always include season 1
|
||||||
seasons.append(1)
|
seasons.add(1)
|
||||||
|
|
||||||
self.debug_print(f"Found seasons: {seasons}")
|
sorted_seasons = sorted(list(seasons))
|
||||||
return sorted(seasons)
|
self.debug_print(f"Found seasons: {sorted_seasons}")
|
||||||
|
|
||||||
|
if len(sorted_seasons) < 2:
|
||||||
|
self.debug_print("Warning: Found unusually few seasons, might indicate parsing error")
|
||||||
|
|
||||||
|
return sorted_seasons
|
||||||
|
|
||||||
def test_parse(self):
|
def test_parse(self):
|
||||||
"""Test parsing on the first page"""
|
"""Test parsing on the first page"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user