parent
							
								
									d90349c85d
								
							
						
					
					
						commit
						c332b1552f
					
				@ -108,23 +108,46 @@ class RomhackRaceScraper:
 | 
				
			|||||||
        response = self.rate_limited_request(self.base_url)
 | 
					        response = self.rate_limited_request(self.base_url)
 | 
				
			||||||
        soup = BeautifulSoup(response.text, 'html.parser')
 | 
					        soup = BeautifulSoup(response.text, 'html.parser')
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        season_div = soup.find('div', class_='info leaders', style=lambda s: s and '300px' in s)
 | 
					        seasons = set()  # use a set to avoid duplicates
 | 
				
			||||||
        if not season_div:
 | 
					 | 
				
			||||||
            self.debug_print("Could not find season navigation div")
 | 
					 | 
				
			||||||
            return []
 | 
					 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        season_links = season_div.find_all('a')
 | 
					        # Find the season navigation section - looking for text that starts with "Season"
 | 
				
			||||||
        seasons = []
 | 
					        season_text = soup.find(text=lambda t: t and t.strip().startswith('Season'))
 | 
				
			||||||
        for link in season_links:
 | 
					        if season_text:
 | 
				
			||||||
            season_num = re.search(r'season=(\d+)', link['href'])
 | 
					            parent = season_text.parent
 | 
				
			||||||
            if season_num:
 | 
					 | 
				
			||||||
                seasons.append(int(season_num.group(1)))
 | 
					 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
        if 1 not in seasons:
 | 
					            # Get all links and the bold span that follow "Season"
 | 
				
			||||||
            seasons.append(1)
 | 
					            elements = parent.find_all(['a', 'span'])
 | 
				
			||||||
 | 
					            for element in elements:
 | 
				
			||||||
 | 
					                if element.name == 'a':
 | 
				
			||||||
 | 
					                    try:
 | 
				
			||||||
 | 
					                        seasons.add(int(element.text))
 | 
				
			||||||
 | 
					                    except ValueError:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                elif element.name == 'span' and element.get('style') == 'font-weight:bold;':
 | 
				
			||||||
 | 
					                    try:
 | 
				
			||||||
 | 
					                        seasons.add(int(element.text))
 | 
				
			||||||
 | 
					                    except ValueError:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        self.debug_print(f"Found seasons: {seasons}")
 | 
					        if not seasons:
 | 
				
			||||||
        return sorted(seasons)
 | 
					            self.debug_print("Warning: No seasons found in normal parsing")
 | 
				
			||||||
 | 
					            # Fallback: try to find any numbers in season links
 | 
				
			||||||
 | 
					            season_links = soup.find_all('a', href=lambda h: h and 'season=' in h)
 | 
				
			||||||
 | 
					            for link in season_links:
 | 
				
			||||||
 | 
					                match = re.search(r'season=(\d+)', link['href'])
 | 
				
			||||||
 | 
					                if match:
 | 
				
			||||||
 | 
					                    seasons.add(int(match.group(1)))
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        # Always include season 1
 | 
				
			||||||
 | 
					        seasons.add(1)
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        sorted_seasons = sorted(list(seasons))
 | 
				
			||||||
 | 
					        self.debug_print(f"Found seasons: {sorted_seasons}")
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        if len(sorted_seasons) < 2:
 | 
				
			||||||
 | 
					            self.debug_print("Warning: Found unusually few seasons, might indicate parsing error")
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        return sorted_seasons
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_parse(self):
 | 
					    def test_parse(self):
 | 
				
			||||||
        """Test parsing on the first page"""
 | 
					        """Test parsing on the first page"""
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user