#!/usr/bin/env python3 """ Tdarr File Monitor - Monitors Tdarr cache directory for completed .mkv files and copies them to backup location. Detects file completion by monitoring size changes and always keeps the smallest version of duplicate files. """ import os import shutil import json import time import logging from pathlib import Path from dataclasses import dataclass, asdict from typing import Dict, Optional from datetime import datetime, timedelta @dataclass class FileState: """Tracks the state of a monitored file.""" path: str size: int last_modified: float first_seen: float last_size_change: float check_count: int = 0 class TdarrFileMonitor: """Monitors Tdarr cache directory for completed .mkv files.""" def __init__( self, source_dir: str = "/mnt/NV2/tdarr-cache/nobara-pc-gpu-unmapped/temp", media_dir: str = "/mnt/NV2/tdarr-cache/nobara-pc-gpu-unmapped/media", dest_dir: str = "/mnt/NV2/tdarr-cache/manual-backup", state_file: str = "/mnt/NV2/Development/claude-home/logs/tdarr_file_monitor_state.json", completion_wait_seconds: int = 60, log_file: str = "/mnt/NV2/Development/claude-home/logs/tdarr_file_monitor.log" ): self.source_dir = Path(source_dir) self.media_dir = Path(media_dir) self.dest_dir = Path(dest_dir) self.state_file = Path(state_file) self.completion_wait_seconds = completion_wait_seconds self.monitored_files: Dict[str, FileState] = {} # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler(log_file), logging.StreamHandler() ] ) self.logger = logging.getLogger(f'{__name__}.TdarrFileMonitor') # Ensure destination directory exists self.dest_dir.mkdir(parents=True, exist_ok=True) # Load previous state self._load_state() def _load_state(self) -> None: """Load monitored files state from disk.""" if self.state_file.exists(): try: with open(self.state_file, 'r') as f: data = json.load(f) self.monitored_files = { path: FileState(**file_data) for path, file_data in data.items() } self.logger.info(f"Loaded state for {len(self.monitored_files)} monitored files") except Exception as e: self.logger.error(f"Failed to load state file: {e}") self.monitored_files = {} def _save_state(self) -> None: """Save monitored files state to disk.""" try: with open(self.state_file, 'w') as f: data = {path: asdict(state) for path, state in self.monitored_files.items()} json.dump(data, f, indent=2) except Exception as e: self.logger.error(f"Failed to save state file: {e}") def _scan_for_mkv_files(self) -> Dict[str, Path]: """Scan source directory for .mkv files in all subdirectories.""" mkv_files = {} try: for mkv_file in self.source_dir.rglob("*.mkv"): if mkv_file.is_file(): mkv_files[str(mkv_file)] = mkv_file except Exception as e: self.logger.error(f"Error scanning source directory: {e}") return mkv_files def _get_file_info(self, file_path: Path) -> Optional[tuple]: """Get file size and modification time, return None if file doesn't exist or can't be accessed.""" try: stat = file_path.stat() return stat.st_size, stat.st_mtime except (OSError, FileNotFoundError) as e: self.logger.warning(f"Cannot access file {file_path}: {e}") return None def _validate_file_pair(self, temp_file_path: Path, temp_file_size: int) -> bool: """Validate that a matching file exists in media directory with exact same name and size.""" try: # Search for matching file in media directory tree for media_file in self.media_dir.rglob(temp_file_path.name): if media_file.is_file(): media_file_info = self._get_file_info(media_file) if media_file_info: media_size, _ = media_file_info if media_size == temp_file_size: self.logger.debug(f"Found matching file: {temp_file_path.name} ({temp_file_size:,} bytes) in temp and media directories") return True else: self.logger.debug(f"Size mismatch for {temp_file_path.name}: temp={temp_file_size:,}, media={media_size:,}") # No matching file found self.logger.info(f"No matching file found in media directory for {temp_file_path.name} ({temp_file_size:,} bytes)") return False except Exception as e: self.logger.error(f"Error validating file pair for {temp_file_path.name}: {e}") return False def _is_file_complete(self, file_state: FileState, current_time: float) -> bool: """Check if file is complete based on size stability.""" stale_time = current_time - file_state.last_size_change return stale_time >= self.completion_wait_seconds def _should_copy_file(self, source_path: Path, dest_path: Path) -> bool: """Determine if we should copy the file (always keep smaller version).""" if not dest_path.exists(): return True source_size = source_path.stat().st_size dest_size = dest_path.stat().st_size if source_size < dest_size: self.logger.info(f"Source file {source_path.name} ({source_size:,} bytes) is smaller than existing destination ({dest_size:,} bytes), will replace") return True else: self.logger.info(f"Source file {source_path.name} ({source_size:,} bytes) is not smaller than existing destination ({dest_size:,} bytes), skipping") return False def _copy_file_with_retry(self, source_path: Path, dest_path: Path) -> bool: """Copy file with retry logic and cleanup on failure.""" temp_dest = dest_path.with_suffix(dest_path.suffix + '.tmp') for attempt in range(2): # Try twice try: start_time = time.time() self.logger.info(f"Attempt {attempt + 1}: Copying {source_path.name} ({source_path.stat().st_size:,} bytes)") # Copy to temporary file first shutil.copy2(source_path, temp_dest) # Verify copy completed successfully if temp_dest.stat().st_size != source_path.stat().st_size: raise Exception(f"Copy verification failed: size mismatch") # Move temp file to final destination if dest_path.exists(): dest_path.unlink() # Remove existing file temp_dest.rename(dest_path) copy_time = time.time() - start_time final_size = dest_path.stat().st_size self.logger.info(f"Successfully copied {source_path.name} ({final_size:,} bytes) in {copy_time:.2f}s") return True except Exception as e: self.logger.error(f"Copy attempt {attempt + 1} failed for {source_path.name}: {e}") # Cleanup temporary file if it exists if temp_dest.exists(): try: temp_dest.unlink() except Exception as cleanup_error: self.logger.error(f"Failed to cleanup temp file {temp_dest}: {cleanup_error}") if attempt == 1: # Last attempt failed self.logger.error(f"All copy attempts failed for {source_path.name}, giving up") return False else: time.sleep(5) # Wait before retry return False def run_check(self) -> None: """Run a single monitoring check cycle.""" current_time = time.time() self.logger.info("Starting monitoring check cycle") # Scan for current .mkv files current_files = self._scan_for_mkv_files() self.logger.info(f"Found {len(current_files)} .mkv files in source directory") # Remove files from monitoring that no longer exist missing_files = set(self.monitored_files.keys()) - set(current_files.keys()) for missing_file in missing_files: self.logger.info(f"File no longer exists, removing from monitoring: {Path(missing_file).name}") del self.monitored_files[missing_file] # Process each current file files_to_copy = [] for file_path_str, file_path in current_files.items(): file_info = self._get_file_info(file_path) if not file_info: continue current_size, current_mtime = file_info # Update or create file state if file_path_str in self.monitored_files: file_state = self.monitored_files[file_path_str] file_state.check_count += 1 # Check if size changed if current_size != file_state.size: file_state.size = current_size file_state.last_size_change = current_time self.logger.debug(f"Size changed for {file_path.name}: {current_size:,} bytes") file_state.last_modified = current_mtime else: # New file discovered - validate before tracking if not self._validate_file_pair(file_path, current_size): # File doesn't have a matching pair in media directory, skip tracking continue file_state = FileState( path=file_path_str, size=current_size, last_modified=current_mtime, first_seen=current_time, last_size_change=current_time, check_count=1 ) self.monitored_files[file_path_str] = file_state self.logger.info(f"Started monitoring validated file: {file_path.name} ({current_size:,} bytes)") # Log current state stale_time = current_time - file_state.last_size_change self.logger.info(f"Checking {file_path.name}: {current_size:,} bytes, stale for {stale_time:.1f}s (checks: {file_state.check_count})") # Check if file is complete if self._is_file_complete(file_state, current_time): dest_path = self.dest_dir / file_path.name if self._should_copy_file(file_path, dest_path): files_to_copy.append((file_path, dest_path, file_state)) # Copy completed files for source_path, dest_path, file_state in files_to_copy: self.logger.info(f"File appears complete: {source_path.name} (stable for {current_time - file_state.last_size_change:.1f}s)") if self._copy_file_with_retry(source_path, dest_path): # Remove from monitoring after successful copy del self.monitored_files[str(source_path)] self.logger.info(f"Successfully processed and removed from monitoring: {source_path.name}") else: self.logger.error(f"Failed to copy {source_path.name}, will continue monitoring") # Save state self._save_state() self.logger.info(f"Check cycle completed, monitoring {len(self.monitored_files)} files") def main(): """Main entry point for the script.""" monitor = TdarrFileMonitor() monitor.run_check() if __name__ == "__main__": main()