#!/usr/bin/env python3
"""
Tdarr File Monitor - Monitors Tdarr cache directory for completed .mkv files and copies them to backup location.
Detects file completion by monitoring size changes and always keeps the smallest version of duplicate files.
"""

import os
import shutil
import json
import time
import logging
from pathlib import Path
from dataclasses import dataclass, asdict
from typing import Dict, Optional
from datetime import datetime, timedelta


@dataclass
class FileState:
    """Tracks the state of a monitored file."""
    path: str
    size: int
    last_modified: float
    first_seen: float
    last_size_change: float
    check_count: int = 0


class TdarrFileMonitor:
    """Monitors Tdarr cache directory for completed .mkv files."""
    
    def __init__(
        self,
        source_dir: str = "/mnt/NV2/tdarr-cache/nobara-pc-gpu-unmapped/temp",
        media_dir: str = "/mnt/NV2/tdarr-cache/nobara-pc-gpu-unmapped/media",
        dest_dir: str = "/mnt/NV2/tdarr-cache/manual-backup",
        state_file: str = "/mnt/NV2/Development/claude-home/logs/tdarr_file_monitor_state.json",
        completion_wait_seconds: int = 60,
        log_file: str = "/mnt/NV2/Development/claude-home/logs/tdarr_file_monitor.log"
    ):
        self.source_dir = Path(source_dir)
        self.media_dir = Path(media_dir)
        self.dest_dir = Path(dest_dir)
        self.state_file = Path(state_file)
        self.completion_wait_seconds = completion_wait_seconds
        self.monitored_files: Dict[str, FileState] = {}
        
        # Setup logging
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(log_file),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger(f'{__name__}.TdarrFileMonitor')
        
        # Ensure destination directory exists
        self.dest_dir.mkdir(parents=True, exist_ok=True)
        
        # Load previous state
        self._load_state()
    
    def _load_state(self) -> None:
        """Load monitored files state from disk."""
        if self.state_file.exists():
            try:
                with open(self.state_file, 'r') as f:
                    data = json.load(f)
                    self.monitored_files = {
                        path: FileState(**file_data) 
                        for path, file_data in data.items()
                    }
                self.logger.info(f"Loaded state for {len(self.monitored_files)} monitored files")
            except Exception as e:
                self.logger.error(f"Failed to load state file: {e}")
                self.monitored_files = {}
    
    def _save_state(self) -> None:
        """Save monitored files state to disk."""
        try:
            with open(self.state_file, 'w') as f:
                data = {path: asdict(state) for path, state in self.monitored_files.items()}
                json.dump(data, f, indent=2)
        except Exception as e:
            self.logger.error(f"Failed to save state file: {e}")
    
    def _scan_for_mkv_files(self) -> Dict[str, Path]:
        """Scan source directory for .mkv files in all subdirectories."""
        mkv_files = {}
        try:
            for mkv_file in self.source_dir.rglob("*.mkv"):
                if mkv_file.is_file():
                    mkv_files[str(mkv_file)] = mkv_file
        except Exception as e:
            self.logger.error(f"Error scanning source directory: {e}")
        
        return mkv_files
    
    def _get_file_info(self, file_path: Path) -> Optional[tuple]:
        """Get file size and modification time, return None if file doesn't exist or can't be accessed."""
        try:
            stat = file_path.stat()
            return stat.st_size, stat.st_mtime
        except (OSError, FileNotFoundError) as e:
            self.logger.warning(f"Cannot access file {file_path}: {e}")
            return None
    
    def _validate_file_pair(self, temp_file_path: Path, temp_file_size: int) -> bool:
        """Validate that a matching file exists in media directory with exact same name and size."""
        try:
            # Search for matching file in media directory tree
            for media_file in self.media_dir.rglob(temp_file_path.name):
                if media_file.is_file():
                    media_file_info = self._get_file_info(media_file)
                    if media_file_info:
                        media_size, _ = media_file_info
                        if media_size == temp_file_size:
                            self.logger.debug(f"Found matching file: {temp_file_path.name} ({temp_file_size:,} bytes) in temp and media directories")
                            return True
                        else:
                            self.logger.debug(f"Size mismatch for {temp_file_path.name}: temp={temp_file_size:,}, media={media_size:,}")
            
            # No matching file found
            self.logger.info(f"No matching file found in media directory for {temp_file_path.name} ({temp_file_size:,} bytes)")
            return False
            
        except Exception as e:
            self.logger.error(f"Error validating file pair for {temp_file_path.name}: {e}")
            return False
    
    def _is_file_complete(self, file_state: FileState, current_time: float) -> bool:
        """Check if file is complete based on size stability."""
        stale_time = current_time - file_state.last_size_change
        return stale_time >= self.completion_wait_seconds
    
    def _should_copy_file(self, source_path: Path, dest_path: Path) -> bool:
        """Determine if we should copy the file (always keep smaller version)."""
        if not dest_path.exists():
            return True
        
        source_size = source_path.stat().st_size
        dest_size = dest_path.stat().st_size
        
        if source_size < dest_size:
            self.logger.info(f"Source file {source_path.name} ({source_size:,} bytes) is smaller than existing destination ({dest_size:,} bytes), will replace")
            return True
        else:
            self.logger.info(f"Source file {source_path.name} ({source_size:,} bytes) is not smaller than existing destination ({dest_size:,} bytes), skipping")
            return False
    
    def _copy_file_with_retry(self, source_path: Path, dest_path: Path) -> bool:
        """Copy file with retry logic and cleanup on failure."""
        temp_dest = dest_path.with_suffix(dest_path.suffix + '.tmp')
        
        for attempt in range(2):  # Try twice
            try:
                start_time = time.time()
                self.logger.info(f"Attempt {attempt + 1}: Copying {source_path.name} ({source_path.stat().st_size:,} bytes)")
                
                # Copy to temporary file first
                shutil.copy2(source_path, temp_dest)
                
                # Verify copy completed successfully
                if temp_dest.stat().st_size != source_path.stat().st_size:
                    raise Exception(f"Copy verification failed: size mismatch")
                
                # Move temp file to final destination
                if dest_path.exists():
                    dest_path.unlink()  # Remove existing file
                temp_dest.rename(dest_path)
                
                copy_time = time.time() - start_time
                final_size = dest_path.stat().st_size
                
                self.logger.info(f"Successfully copied {source_path.name} ({final_size:,} bytes) in {copy_time:.2f}s")
                return True
                
            except Exception as e:
                self.logger.error(f"Copy attempt {attempt + 1} failed for {source_path.name}: {e}")
                
                # Cleanup temporary file if it exists
                if temp_dest.exists():
                    try:
                        temp_dest.unlink()
                    except Exception as cleanup_error:
                        self.logger.error(f"Failed to cleanup temp file {temp_dest}: {cleanup_error}")
                
                if attempt == 1:  # Last attempt failed
                    self.logger.error(f"All copy attempts failed for {source_path.name}, giving up")
                    return False
                else:
                    time.sleep(5)  # Wait before retry
        
        return False
    
    def run_check(self) -> None:
        """Run a single monitoring check cycle."""
        current_time = time.time()
        self.logger.info("Starting monitoring check cycle")
        
        # Scan for current .mkv files
        current_files = self._scan_for_mkv_files()
        self.logger.info(f"Found {len(current_files)} .mkv files in source directory")
        
        # Remove files from monitoring that no longer exist
        missing_files = set(self.monitored_files.keys()) - set(current_files.keys())
        for missing_file in missing_files:
            self.logger.info(f"File no longer exists, removing from monitoring: {Path(missing_file).name}")
            del self.monitored_files[missing_file]
        
        # Process each current file
        files_to_copy = []
        for file_path_str, file_path in current_files.items():
            file_info = self._get_file_info(file_path)
            if not file_info:
                continue
                
            current_size, current_mtime = file_info
            
            # Update or create file state
            if file_path_str in self.monitored_files:
                file_state = self.monitored_files[file_path_str]
                file_state.check_count += 1
                
                # Check if size changed
                if current_size != file_state.size:
                    file_state.size = current_size
                    file_state.last_size_change = current_time
                    self.logger.debug(f"Size changed for {file_path.name}: {current_size:,} bytes")
                
                file_state.last_modified = current_mtime
                
            else:
                # New file discovered - validate before tracking
                if not self._validate_file_pair(file_path, current_size):
                    # File doesn't have a matching pair in media directory, skip tracking
                    continue
                
                file_state = FileState(
                    path=file_path_str,
                    size=current_size,
                    last_modified=current_mtime,
                    first_seen=current_time,
                    last_size_change=current_time,
                    check_count=1
                )
                self.monitored_files[file_path_str] = file_state
                self.logger.info(f"Started monitoring validated file: {file_path.name} ({current_size:,} bytes)")
            
            # Log current state
            stale_time = current_time - file_state.last_size_change
            self.logger.info(f"Checking {file_path.name}: {current_size:,} bytes, stale for {stale_time:.1f}s (checks: {file_state.check_count})")
            
            # Check if file is complete
            if self._is_file_complete(file_state, current_time):
                dest_path = self.dest_dir / file_path.name
                if self._should_copy_file(file_path, dest_path):
                    files_to_copy.append((file_path, dest_path, file_state))
        
        # Copy completed files
        for source_path, dest_path, file_state in files_to_copy:
            self.logger.info(f"File appears complete: {source_path.name} (stable for {current_time - file_state.last_size_change:.1f}s)")
            
            if self._copy_file_with_retry(source_path, dest_path):
                # Remove from monitoring after successful copy
                del self.monitored_files[str(source_path)]
                self.logger.info(f"Successfully processed and removed from monitoring: {source_path.name}")
            else:
                self.logger.error(f"Failed to copy {source_path.name}, will continue monitoring")
        
        # Save state
        self._save_state()
        
        self.logger.info(f"Check cycle completed, monitoring {len(self.monitored_files)} files")


def main():
    """Main entry point for the script."""
    monitor = TdarrFileMonitor()
    monitor.run_check()


if __name__ == "__main__":
    main()