import requests
from bs4 import BeautifulSoup
import os
import re
import json
import time
from urllib.parse import urljoin
from datetime import datetime

class RomhackRaceScraper:
    def __init__(self):
        self.base_url = "https://www.romhackraces.com/levels.php"
        self.session = requests.Session()
        self.rate_limit = 1
        self.last_request = 0
        self.download_history_file = "download_history.json"
        self.download_history = self.load_download_history()
        self.debug = True
        
        os.makedirs('patches', exist_ok=True)

    def debug_print(self, message):
        if self.debug:
            print(f"DEBUG: {message}")

    def load_download_history(self):
        try:
            with open(self.download_history_file, 'r') as f:
                return json.load(f)
        except FileNotFoundError:
            return {
                "last_update": "",
                "downloaded_patches": {},
                "last_season_checked": 0
            }

    def save_download_history(self):
        with open(self.download_history_file, 'w') as f:
            json.dump(self.download_history, f, indent=2)

    def rate_limited_request(self, url):
        self.debug_print(f"Making request to: {url}")
        current_time = time.time()
        time_since_last = current_time - self.last_request
        if time_since_last < self.rate_limit:
            time.sleep(self.rate_limit - time_since_last)
        
        response = self.session.get(url)
        self.last_request = time.time()
        self.debug_print(f"Response status code: {response.status_code}")
        return response

    def get_week_number(self, element):
        """Extract week number from span element containing number images"""
        # First find the span with font-size:18px that contains 'Week'
        week_span = element.find('span', style='font-size:18px;')
        if not week_span or 'Week' not in week_span.text:
            return None
            
        # Get all number images in this span
        number_images = week_span.find_all('img')
        if not number_images:
            self.debug_print("Found week span but no number images")
            return None
            
        try:
            # Extract numbers from image filenames
            numbers = [img['src'].split('/')[-1].split('.')[0] for img in number_images]
            week_num = int(''.join(numbers))
            self.debug_print(f"Found week number: {week_num}")
            return week_num
        except (ValueError, KeyError, IndexError) as e:
            self.debug_print(f"Error parsing week number: {e}")
            return None

    def download_patch(self, url, week_number, season_number):
        patch_id = f"s{season_number}_w{week_number}"
        
        if patch_id in self.download_history["downloaded_patches"]:
            self.debug_print(f"Patch {patch_id} already downloaded")
            return False

        response = self.rate_limited_request(url)
        if response.status_code == 200:
            season_dir = os.path.join('patches', f"Season{season_number}")
            os.makedirs(season_dir, exist_ok=True)
            
            filename = f"Week{week_number}.bps"
            filepath = os.path.join(season_dir, filename)
            
            with open(filepath, 'wb') as f:
                f.write(response.content)
            
            self.download_history["downloaded_patches"][patch_id] = {
                "filename": filepath,
                "downloaded_at": datetime.now().isoformat(),
                "url": url
            }
            self.save_download_history()
            
            print(f"Downloaded Season {season_number} Week {week_number}")
            return True
        else:
            print(f"Failed to download Season {season_number} Week {week_number} - Status code: {response.status_code}")
            return False

    def get_seasons(self):
        response = self.rate_limited_request(self.base_url)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        season_div = soup.find('div', class_='info leaders', style=lambda s: s and '300px' in s)
        if not season_div:
            self.debug_print("Could not find season navigation div")
            return []
            
        season_links = season_div.find_all('a')
        seasons = []
        for link in season_links:
            season_num = re.search(r'season=(\d+)', link['href'])
            if season_num:
                seasons.append(int(season_num.group(1)))
        
        if 1 not in seasons:
            seasons.append(1)
        
        self.debug_print(f"Found seasons: {seasons}")
        return sorted(seasons)

    def test_parse(self):
        """Test parsing on the first page"""
        response = self.rate_limited_request(self.base_url)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find first patch link
        first_patch = soup.find('a', href=lambda href: href and href.endswith('.bps'))
        if first_patch:
            self.debug_print(f"Test found patch link: {first_patch['href']}")
        else:
            self.debug_print("Test could not find any patch links")
                
        # Find first week span
        first_week = soup.find('span', style='font-size:18px;')
        if first_week:
            self.debug_print(f"Test found week span: {first_week.text}")
            number_images = first_week.find_all('img')
            self.debug_print(f"Number images found: {len(number_images)}")
            for img in number_images:
                self.debug_print(f"Image source: {img['src']}")
        else:
            self.debug_print("Test could not find any week spans")

    def scrape_season(self, season_number):
        url = f"{self.base_url}?season={season_number}"
        print(f"\nScraping Season {season_number}")
        
        response = self.rate_limited_request(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        downloads_this_season = 0
        
        # Find all info divs
        info_divs = soup.find_all('div', class_='info')
        
        for info_div in info_divs:
            # Check if this div contains a week number
            week_num = self.get_week_number(info_div)
            if week_num is None:
                continue
                
            self.debug_print(f"Processing Week {week_num}")
            
            # Look for the patch link in the next table cell
            table_cell = info_div.find_next('td', valign='top', align='right')
            if table_cell:
                patch_link = table_cell.find('a', href=lambda href: href and href.endswith('.bps'))
                if patch_link:
                    self.debug_print(f"Found patch link: {patch_link['href']}")
                    patch_url = urljoin("https://www.romhackraces.com/", patch_link['href'])
                    self.debug_print(f"Full patch URL: {patch_url}")
                    
                    if self.download_patch(patch_url, week_num, season_number):
                        downloads_this_season += 1
                else:
                    self.debug_print(f"No patch link found for Week {week_num}")
            else:
                self.debug_print(f"No table cell found for Week {week_num}")
        
        self.debug_print(f"Downloads this season: {downloads_this_season}")
        return downloads_this_season

    def scrape_all_seasons(self):
        self.test_parse()
        seasons = self.get_seasons()
        print(f"Found {len(seasons)} seasons to scrape")
        
        total_downloads = 0
        last_season_checked = self.download_history["last_season_checked"]
        
        for season in seasons:
            if season < last_season_checked:
                self.debug_print(f"Skipping Season {season} - already checked")
                continue
                
            downloads = self.scrape_season(season)
            total_downloads += downloads
            
            self.download_history["last_season_checked"] = max(
                season,
                self.download_history["last_season_checked"]
            )
            self.download_history["last_update"] = datetime.now().isoformat()
            self.save_download_history()
        
        print(f"\nDownload session complete. Downloaded {total_downloads} new patches.")

def main():
    scraper = RomhackRaceScraper()
    
    # Check if we have existing downloads
    if os.path.exists("download_history.json"):
        print("Found existing download history")
        print(f"Last update: {scraper.download_history['last_update']}")
        print(f"Previously downloaded patches: {len(scraper.download_history['downloaded_patches'])}")
        print("Checking for new patches...\n")
    else:
        print("No download history found. Will download all patches.\n")
    
    scraper.scrape_all_seasons()

if __name__ == '__main__':
    main()