diff --git a/CLAUDE.md b/CLAUDE.md
index 587f098..1285f44 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -7,7 +7,7 @@
 - Prefer editing existing files over creating new ones
 - After complex tasks, prompt to save learnings to cognitive memory
 - At session end, ask: "Should I update our documentation?"
-- At 25% context remaining, ask: "Should I update docs before we lose context?"
+- At 25% context remaining, automatically run `/save-memories` before compaction loses detail
 
 ## Context Loading
 When a topic comes up, load `{tech}/CONTEXT.md` + `{tech}/troubleshooting.md`. For scripts, also load `{tech}/scripts/CONTEXT.md`.
diff --git a/monitoring/scripts/tdarr-file-monitor-cron.sh b/monitoring/scripts/tdarr-file-monitor-cron.sh
deleted file mode 100755
index 2faacbe..0000000
--- a/monitoring/scripts/tdarr-file-monitor-cron.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-# Cron job wrapper for Tdarr file monitor
-# Add this to crontab with: * * * * * /mnt/NV2/Development/claude-home/monitoring/scripts/tdarr-file-monitor-cron.sh
-
-cd /mnt/NV2/Development/claude-home/monitoring/scripts
-/usr/bin/python3 /mnt/NV2/Development/claude-home/monitoring/scripts/tdarr_file_monitor.py
\ No newline at end of file
diff --git a/monitoring/scripts/tdarr_file_monitor.py b/monitoring/scripts/tdarr_file_monitor.py
deleted file mode 100755
index e789408..0000000
--- a/monitoring/scripts/tdarr_file_monitor.py
+++ /dev/null
@@ -1,286 +0,0 @@
-#!/usr/bin/env python3
-"""
-Tdarr File Monitor - Monitors Tdarr cache directory for completed .mkv files and copies them to backup location.
-Detects file completion by monitoring size changes and always keeps the smallest version of duplicate files.
-"""
-
-import os
-import shutil
-import json
-import time
-import logging
-from pathlib import Path
-from dataclasses import dataclass, asdict
-from typing import Dict, Optional
-from datetime import datetime, timedelta
-
-
-@dataclass
-class FileState:
-    """Tracks the state of a monitored file."""
-    path: str
-    size: int
-    last_modified: float
-    first_seen: float
-    last_size_change: float
-    check_count: int = 0
-
-
-class TdarrFileMonitor:
-    """Monitors Tdarr cache directory for completed .mkv files."""
-    
-    def __init__(
-        self,
-        source_dir: str = "/mnt/NV2/tdarr-cache/nobara-pc-gpu-unmapped/temp",
-        media_dir: str = "/mnt/NV2/tdarr-cache/nobara-pc-gpu-unmapped/media",
-        dest_dir: str = "/mnt/NV2/tdarr-cache/manual-backup",
-        state_file: str = "/mnt/NV2/Development/claude-home/logs/tdarr_file_monitor_state.json",
-        completion_wait_seconds: int = 60,
-        log_file: str = "/mnt/NV2/Development/claude-home/logs/tdarr_file_monitor.log"
-    ):
-        self.source_dir = Path(source_dir)
-        self.media_dir = Path(media_dir)
-        self.dest_dir = Path(dest_dir)
-        self.state_file = Path(state_file)
-        self.completion_wait_seconds = completion_wait_seconds
-        self.monitored_files: Dict[str, FileState] = {}
-        
-        # Setup logging
-        logging.basicConfig(
-            level=logging.INFO,
-            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-            handlers=[
-                logging.FileHandler(log_file),
-                logging.StreamHandler()
-            ]
-        )
-        self.logger = logging.getLogger(f'{__name__}.TdarrFileMonitor')
-        
-        # Ensure destination directory exists
-        self.dest_dir.mkdir(parents=True, exist_ok=True)
-        
-        # Load previous state
-        self._load_state()
-    
-    def _load_state(self) -> None:
-        """Load monitored files state from disk."""
-        if self.state_file.exists():
-            try:
-                with open(self.state_file, 'r') as f:
-                    data = json.load(f)
-                    self.monitored_files = {
-                        path: FileState(**file_data) 
-                        for path, file_data in data.items()
-                    }
-                self.logger.info(f"Loaded state for {len(self.monitored_files)} monitored files")
-            except Exception as e:
-                self.logger.error(f"Failed to load state file: {e}")
-                self.monitored_files = {}
-    
-    def _save_state(self) -> None:
-        """Save monitored files state to disk."""
-        try:
-            with open(self.state_file, 'w') as f:
-                data = {path: asdict(state) for path, state in self.monitored_files.items()}
-                json.dump(data, f, indent=2)
-        except Exception as e:
-            self.logger.error(f"Failed to save state file: {e}")
-    
-    def _scan_for_mkv_files(self) -> Dict[str, Path]:
-        """Scan source directory for .mkv files in all subdirectories."""
-        mkv_files = {}
-        try:
-            for mkv_file in self.source_dir.rglob("*.mkv"):
-                if mkv_file.is_file():
-                    mkv_files[str(mkv_file)] = mkv_file
-        except Exception as e:
-            self.logger.error(f"Error scanning source directory: {e}")
-        
-        return mkv_files
-    
-    def _get_file_info(self, file_path: Path) -> Optional[tuple]:
-        """Get file size and modification time, return None if file doesn't exist or can't be accessed."""
-        try:
-            stat = file_path.stat()
-            return stat.st_size, stat.st_mtime
-        except (OSError, FileNotFoundError) as e:
-            self.logger.warning(f"Cannot access file {file_path}: {e}")
-            return None
-    
-    def _validate_file_pair(self, temp_file_path: Path, temp_file_size: int) -> bool:
-        """Validate that a matching file exists in media directory with exact same name and size."""
-        try:
-            # Search for matching file in media directory tree
-            for media_file in self.media_dir.rglob(temp_file_path.name):
-                if media_file.is_file():
-                    media_file_info = self._get_file_info(media_file)
-                    if media_file_info:
-                        media_size, _ = media_file_info
-                        if media_size == temp_file_size:
-                            self.logger.debug(f"Found matching file: {temp_file_path.name} ({temp_file_size:,} bytes) in temp and media directories")
-                            return True
-                        else:
-                            self.logger.debug(f"Size mismatch for {temp_file_path.name}: temp={temp_file_size:,}, media={media_size:,}")
-            
-            # No matching file found
-            self.logger.info(f"No matching file found in media directory for {temp_file_path.name} ({temp_file_size:,} bytes)")
-            return False
-            
-        except Exception as e:
-            self.logger.error(f"Error validating file pair for {temp_file_path.name}: {e}")
-            return False
-    
-    def _is_file_complete(self, file_state: FileState, current_time: float) -> bool:
-        """Check if file is complete based on size stability."""
-        stale_time = current_time - file_state.last_size_change
-        return stale_time >= self.completion_wait_seconds
-    
-    def _should_copy_file(self, source_path: Path, dest_path: Path) -> bool:
-        """Determine if we should copy the file (always keep smaller version)."""
-        if not dest_path.exists():
-            return True
-        
-        source_size = source_path.stat().st_size
-        dest_size = dest_path.stat().st_size
-        
-        if source_size < dest_size:
-            self.logger.info(f"Source file {source_path.name} ({source_size:,} bytes) is smaller than existing destination ({dest_size:,} bytes), will replace")
-            return True
-        else:
-            self.logger.info(f"Source file {source_path.name} ({source_size:,} bytes) is not smaller than existing destination ({dest_size:,} bytes), skipping")
-            return False
-    
-    def _copy_file_with_retry(self, source_path: Path, dest_path: Path) -> bool:
-        """Copy file with retry logic and cleanup on failure."""
-        temp_dest = dest_path.with_suffix(dest_path.suffix + '.tmp')
-        
-        for attempt in range(2):  # Try twice
-            try:
-                start_time = time.time()
-                self.logger.info(f"Attempt {attempt + 1}: Copying {source_path.name} ({source_path.stat().st_size:,} bytes)")
-                
-                # Copy to temporary file first
-                shutil.copy2(source_path, temp_dest)
-                
-                # Verify copy completed successfully
-                if temp_dest.stat().st_size != source_path.stat().st_size:
-                    raise Exception(f"Copy verification failed: size mismatch")
-                
-                # Move temp file to final destination
-                if dest_path.exists():
-                    dest_path.unlink()  # Remove existing file
-                temp_dest.rename(dest_path)
-                
-                copy_time = time.time() - start_time
-                final_size = dest_path.stat().st_size
-                
-                self.logger.info(f"Successfully copied {source_path.name} ({final_size:,} bytes) in {copy_time:.2f}s")
-                return True
-                
-            except Exception as e:
-                self.logger.error(f"Copy attempt {attempt + 1} failed for {source_path.name}: {e}")
-                
-                # Cleanup temporary file if it exists
-                if temp_dest.exists():
-                    try:
-                        temp_dest.unlink()
-                    except Exception as cleanup_error:
-                        self.logger.error(f"Failed to cleanup temp file {temp_dest}: {cleanup_error}")
-                
-                if attempt == 1:  # Last attempt failed
-                    self.logger.error(f"All copy attempts failed for {source_path.name}, giving up")
-                    return False
-                else:
-                    time.sleep(5)  # Wait before retry
-        
-        return False
-    
-    def run_check(self) -> None:
-        """Run a single monitoring check cycle."""
-        current_time = time.time()
-        self.logger.info("Starting monitoring check cycle")
-        
-        # Scan for current .mkv files
-        current_files = self._scan_for_mkv_files()
-        self.logger.info(f"Found {len(current_files)} .mkv files in source directory")
-        
-        # Remove files from monitoring that no longer exist
-        missing_files = set(self.monitored_files.keys()) - set(current_files.keys())
-        for missing_file in missing_files:
-            self.logger.info(f"File no longer exists, removing from monitoring: {Path(missing_file).name}")
-            del self.monitored_files[missing_file]
-        
-        # Process each current file
-        files_to_copy = []
-        for file_path_str, file_path in current_files.items():
-            file_info = self._get_file_info(file_path)
-            if not file_info:
-                continue
-                
-            current_size, current_mtime = file_info
-            
-            # Update or create file state
-            if file_path_str in self.monitored_files:
-                file_state = self.monitored_files[file_path_str]
-                file_state.check_count += 1
-                
-                # Check if size changed
-                if current_size != file_state.size:
-                    file_state.size = current_size
-                    file_state.last_size_change = current_time
-                    self.logger.debug(f"Size changed for {file_path.name}: {current_size:,} bytes")
-                
-                file_state.last_modified = current_mtime
-                
-            else:
-                # New file discovered - validate before tracking
-                if not self._validate_file_pair(file_path, current_size):
-                    # File doesn't have a matching pair in media directory, skip tracking
-                    continue
-                
-                file_state = FileState(
-                    path=file_path_str,
-                    size=current_size,
-                    last_modified=current_mtime,
-                    first_seen=current_time,
-                    last_size_change=current_time,
-                    check_count=1
-                )
-                self.monitored_files[file_path_str] = file_state
-                self.logger.info(f"Started monitoring validated file: {file_path.name} ({current_size:,} bytes)")
-            
-            # Log current state
-            stale_time = current_time - file_state.last_size_change
-            self.logger.info(f"Checking {file_path.name}: {current_size:,} bytes, stale for {stale_time:.1f}s (checks: {file_state.check_count})")
-            
-            # Check if file is complete
-            if self._is_file_complete(file_state, current_time):
-                dest_path = self.dest_dir / file_path.name
-                if self._should_copy_file(file_path, dest_path):
-                    files_to_copy.append((file_path, dest_path, file_state))
-        
-        # Copy completed files
-        for source_path, dest_path, file_state in files_to_copy:
-            self.logger.info(f"File appears complete: {source_path.name} (stable for {current_time - file_state.last_size_change:.1f}s)")
-            
-            if self._copy_file_with_retry(source_path, dest_path):
-                # Remove from monitoring after successful copy
-                del self.monitored_files[str(source_path)]
-                self.logger.info(f"Successfully processed and removed from monitoring: {source_path.name}")
-            else:
-                self.logger.error(f"Failed to copy {source_path.name}, will continue monitoring")
-        
-        # Save state
-        self._save_state()
-        
-        self.logger.info(f"Check cycle completed, monitoring {len(self.monitored_files)} files")
-
-
-def main():
-    """Main entry point for the script."""
-    monitor = TdarrFileMonitor()
-    monitor.run_check()
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/monitoring/scripts/tdarr_monitor.py b/monitoring/scripts/tdarr_monitor.py
deleted file mode 100755
index ad546d1..0000000
--- a/monitoring/scripts/tdarr_monitor.py
+++ /dev/null
@@ -1,1227 +0,0 @@
-#!/usr/bin/env python3
-"""
-Tdarr API Monitoring Script with Stuck Job Detection and Discord Alerts
-
-Monitors Tdarr server via its web API endpoints:
-- Server status and health
-- Queue status and statistics
-- Node status and performance
-- Library scan progress
-- Worker activity
-- Stuck job detection with configurable timeouts
-- Discord notifications for alerts and status updates
-
-Usage:
-    python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check all
-    python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check queue
-    python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check nodes
-    
-    # Enable stuck job detection (30 minute threshold)
-    python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check nodes --detect-stuck
-    
-    # Custom stuck threshold (15 minutes)
-    python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check all --detect-stuck --stuck-threshold 15
-    
-    # Enable Discord alerts for stuck jobs (uses default webhook)
-    python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check nodes --detect-stuck --discord-alerts
-    
-    # Automatically clear hung workers when detected
-    python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check nodes --detect-stuck --clear-hung-workers
-    
-    # Full monitoring with automatic clearing and Discord alerts
-    python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check nodes --detect-stuck --clear-hung-workers --discord-alerts
-    
-    # Test Discord integration (uses default webhook)
-    python3 tdarr_monitor.py --server http://10.10.0.43:8265 --discord-test
-    
-    # Enable file logging with custom path and debug level
-    python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check nodes --log-file /tmp/tdarr_debug.log --log-level DEBUG
-    
-    # Disable file logging (console only)
-    python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check nodes --no-log-file
-    
-    # Clear memory state
-    python3 tdarr_monitor.py --server http://10.10.0.43:8265 --clear-memory
-"""
-
-import argparse
-import json
-import logging
-import logging.handlers
-import sys
-import os
-import pickle
-from dataclasses import dataclass, asdict
-from datetime import datetime, timedelta
-from typing import Dict, List, Optional, Any, Union
-import requests
-from urllib.parse import urljoin
-
-
-
-@dataclass
-class WorkerSnapshot:
-    worker_id: str
-    node_id: str
-    worker_type: str
-    file: str
-    percentage: float
-    status: str
-    fps: int
-    eta: str
-    timestamp: datetime
-
-
-@dataclass
-class StuckJob:
-    worker_snapshot: WorkerSnapshot
-    first_seen: datetime
-    stuck_duration_minutes: float
-    is_stuck: bool = True
-
-
-@dataclass
-class MemoryState:
-    worker_snapshots: Dict[str, WorkerSnapshot]
-    stuck_jobs: Dict[str, StuckJob]
-    last_updated: datetime
-
-
-@dataclass
-class ServerStatus:
-    timestamp: str
-    server_url: str
-    status: str
-    error: Optional[str] = None
-    version: Optional[str] = None
-    server_id: Optional[str] = None
-    uptime: Optional[str] = None
-    system_info: Optional[Dict[str, Any]] = None
-
-
-@dataclass
-class QueueStats:
-    total_files: int
-    queued: int
-    processing: int
-    completed: int
-    queue_items: List[Dict[str, Any]]
-
-
-@dataclass
-class QueueStatus:
-    timestamp: str
-    queue_stats: Optional[QueueStats] = None
-    error: Optional[str] = None
-
-
-@dataclass
-class NodeInfo:
-    id: Optional[str]
-    nodeName: Optional[str]
-    status: str
-    lastSeen: Optional[int]
-    version: Optional[str]
-    platform: Optional[str]
-    workers: Dict[str, int]
-    processing: List[Dict[str, Any]]
-
-
-@dataclass
-class NodeSummary:
-    total_nodes: int
-    online_nodes: int
-    offline_nodes: int
-    online_details: List[NodeInfo]
-    offline_details: List[NodeInfo]
-
-
-@dataclass
-class NodeStatus:
-    timestamp: str
-    nodes: List[Dict[str, Any]]
-    node_summary: Optional[NodeSummary] = None
-    stuck_jobs: List[StuckJob] = None
-    error: Optional[str] = None
-
-
-@dataclass
-class LibraryInfo:
-    name: Optional[str]
-    path: Optional[str]
-    file_count: int
-    scan_progress: int
-    last_scan: Optional[str]
-    is_scanning: bool
-
-
-@dataclass
-class ScanStatus:
-    total_libraries: int
-    total_files: int
-    scanning_libraries: int
-
-
-@dataclass
-class LibraryStatus:
-    timestamp: str
-    libraries: List[LibraryInfo]
-    scan_status: Optional[ScanStatus] = None
-    error: Optional[str] = None
-
-
-@dataclass
-class Statistics:
-    total_transcodes: int
-    space_saved: int
-    total_files_processed: int
-    failed_transcodes: int
-    processing_speed: int
-    eta: Optional[str]
-
-
-@dataclass
-class StatisticsStatus:
-    timestamp: str
-    statistics: Optional[Statistics] = None
-    error: Optional[str] = None
-
-
-@dataclass
-class HealthCheck:
-    status: str
-    healthy: bool
-    online_count: Optional[int] = None
-    total_count: Optional[int] = None
-    accessible: Optional[bool] = None
-    total_items: Optional[int] = None
-
-
-@dataclass
-class HealthStatus:
-    timestamp: str
-    overall_status: str
-    checks: Dict[str, HealthCheck]
-
-
-@dataclass
-class DiscordEmbedField:
-    name: str
-    value: str
-    inline: bool = False
-
-
-@dataclass
-class DiscordEmbed:
-    title: str
-    description: str
-    color: int
-    fields: List[DiscordEmbedField] = None
-    timestamp: str = None
-    
-    def __post_init__(self):
-        if self.fields is None:
-            self.fields = []
-        if self.timestamp is None:
-            self.timestamp = datetime.now().isoformat()
-
-
-class DiscordNotifier:
-    def __init__(self, webhook_url: str, timeout: int = 10):
-        """Initialize Discord notifier with webhook URL."""
-        self.webhook_url = webhook_url
-        self.timeout = timeout
-        self.session = requests.Session()
-        self.logger = logging.getLogger(f"{__name__}.DiscordNotifier")
-    
-    def send_content_message(self, content: str, username: str = "Tdarr Monitor") -> bool:
-        """Send a simple content message to Discord.
-        
-        Args:
-            content: The message content to send
-            username: Bot username to display
-            
-        Returns:
-            True if successful, False otherwise
-        """
-        payload = {
-            "content": content,
-            "username": username
-        }
-        
-        return self._send_webhook(payload)
-    
-    def send_embed_message(self, 
-                          title: str,
-                          description: str,
-                          color: int = 0xff6b6b,  # Red by default
-                          fields: List[DiscordEmbedField] = None,
-                          username: str = "Tdarr Monitor") -> bool:
-        """Send an embed message to Discord.
-        
-        Args:
-            title: Embed title
-            description: Embed description
-            color: Embed color (hex integer, default red)
-            fields: List of embed fields
-            username: Bot username to display
-            
-        Returns:
-            True if successful, False otherwise
-        """
-        embed = DiscordEmbed(
-            title=title,
-            description=description,
-            color=color,
-            fields=fields or []
-        )
-        
-        payload = {
-            "username": username,
-            "embeds": [asdict(embed)]
-        }
-        
-        return self._send_webhook(payload)
-    
-    def send_stuck_job_alert(self, stuck_jobs: List[StuckJob]) -> bool:
-        """Send alert for stuck jobs using embed format.
-        
-        Args:
-            stuck_jobs: List of stuck jobs to report
-            
-        Returns:
-            True if successful, False otherwise
-        """
-        if not stuck_jobs:
-            return True
-            
-        # Create embed fields for each stuck job
-        fields = []
-        for i, stuck_job in enumerate(stuck_jobs[:10]):  # Limit to 10 jobs (Discord embed field limit is 25)
-            ws = stuck_job.worker_snapshot
-            field_value = (
-                f"**File:** {os.path.basename(ws.file)}\n"
-                f"**Status:** {ws.status}\n"
-                f"**Progress:** {ws.percentage}%\n"
-                f"**Duration:** {stuck_job.stuck_duration_minutes:.1f} minutes\n"
-                f"**Node:** {ws.node_id}"
-            )
-            
-            fields.append(DiscordEmbedField(
-                name=f"Stuck Job {i+1}: {ws.worker_id}",
-                value=field_value,
-                inline=True
-            ))
-        
-        # Add summary field if there are more jobs
-        if len(stuck_jobs) > 10:
-            fields.append(DiscordEmbedField(
-                name="Additional Jobs",
-                value=f"... and {len(stuck_jobs) - 10} more stuck jobs",
-                inline=False
-            ))
-        
-        title = f"🚨 Tdarr Stuck Jobs Detected ({len(stuck_jobs)})"
-        description = (
-            f"Detected {len(stuck_jobs)} stuck job{'s' if len(stuck_jobs) != 1 else ''} "
-            f"in your Tdarr system. These jobs will follow the requested clear behavior."
-        )
-        
-        return self.send_embed_message(
-            title=title,
-            description=description,
-            color=0xff6b6b,  # Red color for alerts
-            fields=fields
-        )
-    
-    def send_system_status(self, 
-                          server_status: ServerStatus, 
-                          node_status: NodeStatus,
-                          stuck_jobs: List[StuckJob] = None) -> bool:
-        """Send system status summary using embed format.
-        
-        Args:
-            server_status: Server status information
-            node_status: Node status information  
-            stuck_jobs: Optional stuck jobs list
-            
-        Returns:
-            True if successful, False otherwise
-        """
-        # Determine overall health color
-        is_healthy = (
-            server_status.status == "good" and 
-            not server_status.error and
-            not node_status.error and
-            (not stuck_jobs or len(stuck_jobs) == 0)
-        )
-        
-        color = 0x28a745 if is_healthy else 0xff6b6b  # Green if healthy, red if not
-        
-        # Build description
-        description_parts = [
-            f"**Server Status:** {server_status.status.title()}",
-            f"**Version:** {getattr(server_status, 'version', 'Unknown')}"
-        ]
-        
-        if node_status.node_summary:
-            description_parts.extend([
-                f"**Total Nodes:** {node_status.node_summary.total_nodes}",
-                f"**Online Nodes:** {node_status.node_summary.online_nodes}",
-                f"**Offline Nodes:** {node_status.node_summary.offline_nodes}"
-            ])
-        
-        if stuck_jobs:
-            description_parts.append(f"**Stuck Jobs:** {len(stuck_jobs)}")
-        
-        # Add node details as fields
-        fields = []
-        if node_status.node_summary and node_status.node_summary.online_details:
-            for node in node_status.node_summary.online_details:
-                active_workers = len(node.processing) if node.processing else 0
-                field_value = (
-                    f"**Status:** Online\n"
-                    f"**Platform:** {node.platform or 'Unknown'}\n"
-                    f"**Active Workers:** {active_workers}\n"
-                    f"**CPU Workers:** {node.workers.get('cpu', 0)}\n"
-                    f"**GPU Workers:** {node.workers.get('gpu', 0)}"
-                )
-                
-                fields.append(DiscordEmbedField(
-                    name=f"📡 {node.nodeName or node.id}",
-                    value=field_value,
-                    inline=True
-                ))
-        
-        title = "📊 Tdarr System Status"
-        if not is_healthy:
-            title = "⚠️ Tdarr System Alert"
-        
-        return self.send_embed_message(
-            title=title,
-            description="\n".join(description_parts),
-            color=color,
-            fields=fields
-        )
-    
-    def _send_webhook(self, payload: Dict[str, Any]) -> bool:
-        """Send payload to Discord webhook.
-        
-        Args:
-            payload: JSON payload to send
-            
-        Returns:
-            True if successful, False otherwise
-        """
-        try:
-            response = self.session.post(
-                self.webhook_url,
-                json=payload,
-                timeout=self.timeout
-            )
-            response.raise_for_status()
-            self.logger.info("Discord notification sent successfully")
-            return True
-            
-        except requests.exceptions.RequestException as e:
-            self.logger.error(f"Failed to send Discord notification: {e}")
-            return False
-        except Exception as e:
-            self.logger.error(f"Unexpected error sending Discord notification: {e}")
-            return False
-
-
-class StuckJobDetector:
-    def __init__(self, memory_file: str = "/mnt/NV2/Development/claude-home/logs/tdarr_memory.pkl", stuck_threshold_minutes: int = 30):
-        """Initialize stuck job detector with memory persistence."""
-        self.memory_file = os.path.abspath(memory_file)  # Use absolute path
-        self.stuck_threshold_minutes = stuck_threshold_minutes
-        self.logger = logging.getLogger(f"{__name__}.StuckJobDetector")
-        self.logger.debug(f"Using memory file: {self.memory_file}")
-        self.memory_state = self._load_memory_state()
-        
-        # Ensure memory directory exists
-        os.makedirs(os.path.dirname(memory_file), exist_ok=True)
-    
-    def _load_memory_state(self) -> MemoryState:
-        """Load memory state from disk or create new one."""
-        if os.path.exists(self.memory_file):
-            try:
-                with open(self.memory_file, 'rb') as f:
-                    memory_state = pickle.load(f)
-                    self.logger.debug(f"Loaded memory state: {len(memory_state.worker_snapshots)} workers, {len(memory_state.stuck_jobs)} stuck jobs")
-                    return memory_state
-            except Exception as e:
-                self.logger.warning(f"Failed to load memory state: {e}, creating new state")
-        else:
-            self.logger.debug(f"Memory file {self.memory_file} does not exist, creating new state")
-        
-        return MemoryState(
-            worker_snapshots={},
-            stuck_jobs={},
-            last_updated=datetime.now()
-        )
-    
-    def _save_memory_state(self):
-        """Save memory state to disk."""
-        try:
-            with open(self.memory_file, 'wb') as f:
-                pickle.dump(self.memory_state, f)
-        except Exception as e:
-            self.logger.error(f"Failed to save memory state: {e}")
-    
-    def _create_worker_key(self, node_id: str, worker_id: str) -> str:
-        """Create unique key for worker identification."""
-        return f"{node_id}:{worker_id}"
-    
-    def _is_worker_stuck(self, current: WorkerSnapshot, previous: WorkerSnapshot) -> bool:
-        """Check if worker is stuck based on comparison with previous snapshot."""
-        worker_key = f"{current.node_id}:{current.worker_id}"
-        
-        # Check each condition individually for detailed logging
-        file_same = current.file == previous.file
-        percentage_same = current.percentage == previous.percentage
-        status_same = current.status == previous.status
-        fps_same = current.fps == previous.fps
-        eta_same = current.eta == previous.eta
-        
-        is_stuck = file_same and percentage_same and status_same and fps_same and eta_same
-        
-        # Log detailed comparison info
-        self.logger.debug(f"Worker {worker_key} stuck check:")
-        self.logger.debug(f"  File: '{current.file}' == '{previous.file}' = {file_same}")
-        self.logger.debug(f"  Percentage: {current.percentage}% == {previous.percentage}% = {percentage_same}")
-        self.logger.debug(f"  Status: '{current.status}' == '{previous.status}' = {status_same}")
-        self.logger.debug(f"  FPS: {current.fps} == {previous.fps} = {fps_same}")
-        self.logger.debug(f"  ETA: '{current.eta}' == '{previous.eta}' = {eta_same}")
-        self.logger.debug(f"  → Result: {'STUCK' if is_stuck else 'NOT STUCK'}")
-        
-        # Log INFO level when we detect changes (worker making progress)
-        if not is_stuck:
-            if not percentage_same:
-                self.logger.info(f"Worker {worker_key} making progress: {previous.percentage}% → {current.percentage}%")
-            elif not status_same:
-                self.logger.info(f"Worker {worker_key} status changed: '{previous.status}' → '{current.status}'")
-            elif not file_same:
-                self.logger.info(f"Worker {worker_key} file changed: '{previous.file}' → '{current.file}'")
-        
-        return is_stuck
-    
-    def update_workers(self, nodes_data: Dict[str, Any]) -> List[StuckJob]:
-        """Update worker snapshots and detect stuck jobs."""
-        current_time = datetime.now()
-        current_workers = {}
-        detected_stuck_jobs = []
-        
-        # Extract current worker states from nodes data
-        for node_id, node_data in nodes_data.items():
-            workers = node_data.get('workers', {})
-            for worker_id, worker_data in workers.items():
-                worker_key = self._create_worker_key(node_id, worker_id)
-                
-                # Create current snapshot
-                current_snapshot = WorkerSnapshot(
-                    worker_id=worker_id,
-                    node_id=node_id,
-                    worker_type=worker_data.get('workerType', 'unknown'),
-                    file=worker_data.get('file', ''),
-                    percentage=worker_data.get('percentage', -1),
-                    status=worker_data.get('status', ''),
-                    fps=worker_data.get('fps', 0),
-                    eta=worker_data.get('ETA', ''),
-                    timestamp=current_time
-                )
-                
-                current_workers[worker_key] = current_snapshot
-                
-                # Log all workers being tracked
-                self.logger.debug(f"Tracking worker {worker_key}: {current_snapshot.status} at {current_snapshot.percentage}% on '{current_snapshot.file}'")
-                
-                # Check if worker was previously tracked
-                if worker_key in self.memory_state.worker_snapshots:
-                    previous_snapshot = self.memory_state.worker_snapshots[worker_key]
-                    
-                    # Check if worker is stuck
-                    if self._is_worker_stuck(current_snapshot, previous_snapshot):
-                        # Calculate how long it's been stuck
-                        time_since_previous = (current_time - previous_snapshot.timestamp).total_seconds() / 60
-                        self.logger.debug(f"Worker {worker_key} has been stuck for {time_since_previous:.1f} minutes since last check")
-                        self.logger.debug(f"Worker {worker_key} checking stuck_jobs dict: {list(self.memory_state.stuck_jobs.keys())}")
-                        
-                        if worker_key in self.memory_state.stuck_jobs:
-                            # Already known stuck job, update duration
-                            stuck_job = self.memory_state.stuck_jobs[worker_key]
-                            stuck_duration = current_time - stuck_job.first_seen
-                            stuck_job.stuck_duration_minutes = stuck_duration.total_seconds() / 60
-                            stuck_job.worker_snapshot = current_snapshot
-                            
-                            self.logger.debug(f"Worker {worker_key} known stuck job - duration: {stuck_job.stuck_duration_minutes:.1f} min, threshold: {self.stuck_threshold_minutes} min")
-                            if stuck_job.stuck_duration_minutes >= self.stuck_threshold_minutes:
-                                self.logger.debug(f"Worker {worker_key} EXCEEDS threshold - adding to detected stuck jobs")
-                                detected_stuck_jobs.append(stuck_job)
-                            else:
-                                self.logger.debug(f"Worker {worker_key} below threshold - not flagging yet")
-                        else:
-                            # New stuck job detected - add to memory immediately to start tracking
-                            first_seen = previous_snapshot.timestamp
-                            stuck_duration = current_time - first_seen
-                            stuck_duration_minutes = stuck_duration.total_seconds() / 60
-                            
-                            self.logger.debug(f"Worker {worker_key} NEW stuck job - first_seen: {first_seen}, current: {current_time}")
-                            self.logger.debug(f"Worker {worker_key} NEW stuck job - duration: {stuck_duration_minutes:.1f} min, threshold: {self.stuck_threshold_minutes} min")
-                            
-                            # Create stuck job entry immediately to track duration across runs
-                            stuck_job = StuckJob(
-                                worker_snapshot=current_snapshot,
-                                first_seen=first_seen,
-                                stuck_duration_minutes=stuck_duration_minutes,
-                                is_stuck=True
-                            )
-                            self.memory_state.stuck_jobs[worker_key] = stuck_job
-                            
-                            if stuck_duration_minutes >= self.stuck_threshold_minutes:
-                                self.logger.debug(f"Worker {worker_key} NEW stuck job EXCEEDS threshold - flagging for clearing")
-                                detected_stuck_jobs.append(stuck_job)
-                            else:
-                                self.logger.debug(f"Worker {worker_key} NEW stuck job below threshold - tracking in memory")
-                    else:
-                        # Worker is not stuck, remove from stuck jobs if present
-                        if worker_key in self.memory_state.stuck_jobs:
-                            del self.memory_state.stuck_jobs[worker_key]
-                            self.logger.info(f"Worker {worker_key} is no longer stuck")
-                else:
-                    # New worker, start tracking it
-                    self.logger.info(f"New worker detected: {worker_key} - {current_snapshot.status} at {current_snapshot.percentage}% on '{current_snapshot.file}'")
-        
-        # Clean up stuck jobs for workers that no longer exist
-        stuck_jobs_to_remove = []
-        for worker_key in self.memory_state.stuck_jobs:
-            if worker_key not in current_workers:
-                stuck_jobs_to_remove.append(worker_key)
-        
-        for worker_key in stuck_jobs_to_remove:
-            del self.memory_state.stuck_jobs[worker_key]
-            self.logger.info(f"Removed stuck job tracking for missing worker: {worker_key}")
-        
-        # Update memory state
-        self.memory_state.worker_snapshots = current_workers
-        self.memory_state.last_updated = current_time
-        
-        # Save to disk
-        self._save_memory_state()
-        
-        return detected_stuck_jobs
-    
-    def get_stuck_jobs(self) -> List[StuckJob]:
-        """Get current list of stuck jobs."""
-        return list(self.memory_state.stuck_jobs.values())
-    
-    def clear_memory(self):
-        """Clear all memory state."""
-        self.memory_state = MemoryState(
-            worker_snapshots={},
-            stuck_jobs={},
-            last_updated=datetime.now()
-        )
-        self._save_memory_state()
-        self.logger.info("Memory state cleared")
-
-
-class TdarrMonitor:
-    def __init__(self, server_url: str, timeout: int = 30, enable_stuck_detection: bool = False, 
-                 stuck_threshold_minutes: int = 30, memory_file: str = ".claude/tmp/tdarr_memory.pkl",
-                 discord_webhook_url: str = None, enable_discord_alerts: bool = False,
-                 log_file: Optional[str] = None, log_level: str = "INFO", clear_hung_workers: bool = False):
-        """Initialize Tdarr monitor with server URL."""
-        self.server_url = server_url.rstrip('/')
-        self.timeout = timeout
-        self.session = requests.Session()
-        self.enable_stuck_detection = enable_stuck_detection
-        self.enable_discord_alerts = enable_discord_alerts
-        self.clear_hung_workers_enabled = clear_hung_workers
-        
-        # Configure logging first
-        self._setup_logging(log_file, log_level)
-        self.logger = logging.getLogger(__name__)
-        
-        # Initialize stuck job detector if enabled
-        self.stuck_detector = None
-        if enable_stuck_detection:
-            self.stuck_detector = StuckJobDetector(memory_file, stuck_threshold_minutes)
-        
-        # Initialize Discord notifier if enabled
-        self.discord_notifier = None
-        if enable_discord_alerts:
-            if discord_webhook_url:
-                self.discord_notifier = DiscordNotifier(discord_webhook_url)
-            else:
-                self.logger.warning("Discord alerts enabled but no webhook URL provided")
-    
-    def _setup_logging(self, log_file: Optional[str] = None, log_level: str = "INFO"):
-        """Configure logging with optional file rotation."""
-        # Clear any existing handlers
-        root_logger = logging.getLogger()
-        root_logger.handlers.clear()
-        
-        # Set log level
-        level = getattr(logging, log_level.upper(), logging.INFO)
-        root_logger.setLevel(level)
-        
-        # Create formatters
-        console_formatter = logging.Formatter(
-            '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-            datefmt='%Y-%m-%d %H:%M:%S'
-        )
-        
-        # Enhanced file formatter for better readability
-        file_formatter = logging.Formatter(
-            '%(asctime)s [%(levelname)-7s] %(module)-12s | %(message)s',
-            datefmt='%H:%M:%S'
-        )
-        
-        # Console handler (for interactive use)
-        console_handler = logging.StreamHandler(sys.stdout)
-        console_handler.setFormatter(console_formatter)
-        root_logger.addHandler(console_handler)
-        
-        # File handler with rotation (if log_file specified)
-        if log_file:
-            # Ensure log directory exists
-            log_dir = os.path.dirname(log_file)
-            if log_dir:
-                os.makedirs(log_dir, exist_ok=True)
-            
-            # Rotating file handler: 10MB max, keep 5 backup files
-            file_handler = logging.handlers.RotatingFileHandler(
-                log_file,
-                maxBytes=10 * 1024 * 1024,  # 10MB
-                backupCount=5,
-                encoding='utf-8'
-            )
-            file_handler.setFormatter(file_formatter)
-            root_logger.addHandler(file_handler)
-
-    def _make_request(self, endpoint: str) -> Optional[Dict[str, Any]]:
-        """Make HTTP request to Tdarr API endpoint."""
-        url = urljoin(self.server_url, endpoint)
-        
-        try:
-            response = self.session.get(url, timeout=self.timeout)
-            response.raise_for_status()
-            return response.json()
-            
-        except requests.exceptions.RequestException as e:
-            self.logger.error(f"Request failed for {url}: {e}")
-            return None
-        except json.JSONDecodeError as e:
-            self.logger.error(f"JSON decode failed for {url}: {e}")
-            return None
-
-    def clear_hung_workers(self, stuck_jobs: Optional[List[StuckJob]] = None) -> bool:
-        """Clear hung workers via Tdarr API using kill-worker endpoint.
-        
-        Args:
-            stuck_jobs: List of StuckJob objects to clear. Each contains worker and node information.
-            
-        Returns:
-            True if all workers cleared successfully, False otherwise
-        """
-        if not stuck_jobs:
-            self.logger.info("No stuck jobs provided for clearing hung workers")
-            return True
-            
-        success_count = 0
-        total_count = len(stuck_jobs)
-        
-        for stuck_job in stuck_jobs:
-            worker_snapshot = stuck_job.worker_snapshot
-            try:
-                # Use the kill-worker endpoint with correct payload format
-                endpoint = '/api/v2/kill-worker'
-                payload = {
-                    "data": {
-                        "nodeID": worker_snapshot.node_id,
-                        "workerID": worker_snapshot.worker_id
-                    }
-                }
-                
-                url = urljoin(self.server_url, endpoint)
-                response = self.session.post(url, json=payload, timeout=self.timeout)
-                response.raise_for_status()
-                
-                self.logger.info(f"Successfully killed hung worker: {worker_snapshot.node_id}:{worker_snapshot.worker_id}")
-                success_count += 1
-                
-            except requests.exceptions.RequestException as e:
-                self.logger.error(f"Failed to kill worker {worker_snapshot.node_id}:{worker_snapshot.worker_id}: {e}")
-            except Exception as e:
-                self.logger.error(f"Unexpected error killing worker {worker_snapshot.node_id}:{worker_snapshot.worker_id}: {e}")
-        
-        self.logger.info(f"Cleared {success_count}/{total_count} hung workers")
-        return success_count == total_count
-
-    def get_server_status(self) -> ServerStatus:
-        """Get overall server status and configuration."""
-        timestamp = datetime.now().isoformat()
-        
-        # Try to get server info from API
-        data = self._make_request('/api/v2/status')
-        if data:
-            server_status = data.get('status', 'unknown')
-            self.logger.info(f"Server check completed: status={server_status}, version={data.get('version', 'unknown')}")
-            return ServerStatus(
-                timestamp=timestamp,
-                server_url=self.server_url,
-                status=server_status,
-                version=data.get('version'),
-                uptime=data.get('uptime')
-            )
-        else:
-            self.logger.error("Server check failed: Unable to connect to Tdarr server")
-            return ServerStatus(
-                timestamp=timestamp,
-                server_url=self.server_url,
-                status='offline',
-                error='Unable to connect to Tdarr server'
-            )
-
-    def get_queue_status(self) -> QueueStatus:
-        """Get transcoding queue status and statistics."""
-        timestamp = datetime.now().isoformat()
-        
-        # Get queue information
-        data = self._make_request('/api/v2/get-queue')
-        if data:
-            queue_data = data.get('queue', [])
-            
-            # Calculate queue statistics
-            total_files = len(queue_data)
-            queued_files = len([f for f in queue_data if f.get('status') == 'Queued'])
-            processing_files = len([f for f in queue_data if f.get('status') == 'Processing'])
-            completed_files = len([f for f in queue_data if f.get('status') == 'Completed'])
-            
-            queue_stats = QueueStats(
-                total_files=total_files,
-                queued=queued_files,
-                processing=processing_files,
-                completed=completed_files,
-                queue_items=queue_data[:10]  # First 10 items for details
-            )
-            
-            return QueueStatus(
-                timestamp=timestamp,
-                queue_stats=queue_stats
-            )
-        else:
-            return QueueStatus(
-                timestamp=timestamp,
-                error='Unable to fetch queue data'
-            )
-
-    def get_node_status(self) -> NodeStatus:
-        """Get status of all connected nodes."""
-        timestamp = datetime.now().isoformat()
-        
-        # Get nodes information (using correct endpoint)
-        data = self._make_request('/api/v2/get-nodes')
-        if data:
-            # Handle the actual data structure returned by Tdarr API
-            nodes_dict = data if isinstance(data, dict) else {}
-            nodes = []
-            
-            # Process node information
-            online_nodes = []
-            offline_nodes = []
-            
-            for node_id, node_data in nodes_dict.items():
-                node_info = NodeInfo(
-                    id=node_id,
-                    nodeName=node_data.get('nodeName'),
-                    status='online',  # Assume online if in response
-                    lastSeen=None,
-                    version=node_data.get('config', {}).get('version'),
-                    platform=node_data.get('config', {}).get('platform_arch_isdocker'),
-                    workers={
-                        'cpu': len([w for w in node_data.get('workers', {}).values() if 'cpu' in w.get('workerType', '').lower()]),
-                        'gpu': len([w for w in node_data.get('workers', {}).values() if 'gpu' in w.get('workerType', '').lower()])
-                    },
-                    processing=list(node_data.get('workers', {}).values())
-                )
-                
-                online_nodes.append(node_info)
-                nodes.append(node_data)
-            
-            # Check for stuck jobs if detection is enabled
-            stuck_jobs = []
-            if self.stuck_detector:
-                try:
-                    stuck_jobs = self.stuck_detector.update_workers(nodes_dict)
-                    if stuck_jobs:
-                        self.logger.warning(f"Detected {len(stuck_jobs)} stuck jobs")
-                        for stuck_job in stuck_jobs:
-                            self.logger.warning(
-                                f"Stuck job: {stuck_job.worker_snapshot.node_id}:{stuck_job.worker_snapshot.worker_id} "
-                                f"on file '{stuck_job.worker_snapshot.file}' "
-                                f"at {stuck_job.worker_snapshot.percentage}% for {stuck_job.stuck_duration_minutes:.1f} minutes"
-                            )
-                        
-                        # Clear hung workers if enabled
-                        if self.clear_hung_workers_enabled:
-                            try:
-                                clear_success = self.clear_hung_workers(stuck_jobs)
-                                if clear_success:
-                                    self.logger.info(f"Successfully cleared {len(stuck_jobs)} hung workers")
-                                else:
-                                    self.logger.warning("Some hung workers could not be cleared")
-                            except Exception as e:
-                                self.logger.error(f"Error clearing hung workers: {e}")
-                        
-                        # Send Discord notification for stuck jobs
-                        if self.discord_notifier:
-                            try:
-                                self.discord_notifier.send_stuck_job_alert(stuck_jobs)
-                            except Exception as e:
-                                self.logger.error(f"Failed to send Discord stuck job alert: {e}")
-                                
-                except Exception as e:
-                    self.logger.error(f"Error in stuck job detection: {e}")
-            
-            node_summary = NodeSummary(
-                total_nodes=len(nodes),
-                online_nodes=len(online_nodes),
-                offline_nodes=len(offline_nodes),
-                online_details=online_nodes,
-                offline_details=offline_nodes
-            )
-            
-            # Log successful node check with summary
-            if stuck_jobs:
-                self.logger.info(f"Node check completed: {len(nodes)} nodes online, {len(stuck_jobs)} stuck jobs detected")
-            else:
-                self.logger.info(f"Node check completed: {len(nodes)} nodes online, no stuck jobs detected")
-            
-            return NodeStatus(
-                timestamp=timestamp,
-                nodes=nodes,
-                node_summary=node_summary,
-                stuck_jobs=stuck_jobs
-            )
-        else:
-            self.logger.error("Node check failed: Unable to fetch node data")
-            return NodeStatus(
-                timestamp=timestamp,
-                nodes=[],
-                error='Unable to fetch node data'
-            )
-
-    def get_library_status(self) -> LibraryStatus:
-        """Get library scan status and file statistics."""
-        timestamp = datetime.now().isoformat()
-        
-        # Get library information
-        data = self._make_request('/api/v2/get-libraries')
-        if data:
-            libraries = data.get('libraries', [])
-            
-            library_stats = []
-            total_files = 0
-            
-            for lib in libraries:
-                lib_info = LibraryInfo(
-                    name=lib.get('name'),
-                    path=lib.get('path'),
-                    file_count=lib.get('totalFiles', 0),
-                    scan_progress=lib.get('scanProgress', 0),
-                    last_scan=lib.get('lastScan'),
-                    is_scanning=lib.get('isScanning', False)
-                )
-                library_stats.append(lib_info)
-                total_files += lib_info.file_count
-            
-            scan_status = ScanStatus(
-                total_libraries=len(libraries),
-                total_files=total_files,
-                scanning_libraries=len([l for l in library_stats if l.is_scanning])
-            )
-            
-            return LibraryStatus(
-                timestamp=timestamp,
-                libraries=library_stats,
-                scan_status=scan_status
-            )
-        else:
-            return LibraryStatus(
-                timestamp=timestamp,
-                libraries=[],
-                error='Unable to fetch library data'
-            )
-
-    def get_statistics(self) -> StatisticsStatus:
-        """Get overall Tdarr statistics and health metrics."""
-        timestamp = datetime.now().isoformat()
-        
-        # Get statistics
-        data = self._make_request('/api/v2/get-stats')
-        if data:
-            stats = data.get('stats', {})
-            statistics = Statistics(
-                total_transcodes=stats.get('totalTranscodes', 0),
-                space_saved=stats.get('spaceSaved', 0),
-                total_files_processed=stats.get('totalFilesProcessed', 0),
-                failed_transcodes=stats.get('failedTranscodes', 0),
-                processing_speed=stats.get('processingSpeed', 0),
-                eta=stats.get('eta')
-            )
-            
-            return StatisticsStatus(
-                timestamp=timestamp,
-                statistics=statistics
-            )
-        else:
-            return StatisticsStatus(
-                timestamp=timestamp,
-                error='Unable to fetch statistics'
-            )
-
-    def health_check(self) -> HealthStatus:
-        """Perform comprehensive health check."""
-        timestamp = datetime.now().isoformat()
-        
-        # Server connectivity
-        server_status = self.get_server_status()
-        server_check = HealthCheck(
-            status=server_status.status,
-            healthy=server_status.status == 'good'
-        )
-        
-        # Node connectivity  
-        node_status = self.get_node_status()
-        nodes_healthy = (
-            node_status.node_summary.online_nodes > 0 if node_status.node_summary else False
-        ) and not node_status.error
-        
-        nodes_check = HealthCheck(
-            status='online' if nodes_healthy else 'offline',
-            healthy=nodes_healthy,
-            online_count=node_status.node_summary.online_nodes if node_status.node_summary else 0,
-            total_count=node_status.node_summary.total_nodes if node_status.node_summary else 0
-        )
-        
-        checks = {
-            'server': server_check,
-            'nodes': nodes_check
-        }
-        
-        # Determine overall health
-        all_checks_healthy = all(check.healthy for check in checks.values())
-        overall_status = 'healthy' if all_checks_healthy else 'unhealthy'
-        
-        return HealthStatus(
-            timestamp=timestamp,
-            overall_status=overall_status,
-            checks=checks
-        )
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Monitor Tdarr server via API')
-    parser.add_argument('--server', required=True, help='Tdarr server URL (e.g., http://10.10.0.43:8265)')
-    parser.add_argument('--check', choices=['all', 'status', 'queue', 'nodes', 'libraries', 'stats', 'health'],
-                       default='health', help='Type of check to perform')
-    parser.add_argument('--timeout', type=int, default=30, help='Request timeout in seconds')
-    parser.add_argument('--output', choices=['json', 'pretty'], default='pretty', help='Output format')
-    parser.add_argument('--verbose', action='store_true', help='Enable verbose logging')
-    parser.add_argument('--detect-stuck', action='store_true', help='Enable stuck job detection')
-    parser.add_argument('--stuck-threshold', type=int, default=30, help='Minutes before job is considered stuck (default: 30)')
-    parser.add_argument('--memory-file', default='/mnt/NV2/Development/claude-home/logs/tdarr_memory.pkl', help='Path to memory state file')
-    parser.add_argument('--clear-memory', action='store_true', help='Clear memory state and exit')
-    parser.add_argument('--discord-webhook', 
-                       default='https://discord.com/api/webhooks/1404105821549498398/y2Ud1RK9rzFjv58xbypUfQNe3jrL7ZUq1FkQHa4_dfOHm2ylp93z0f4tY0O8Z-vQgKhD',
-                       help='Discord webhook URL for notifications (default: configured webhook)')
-    parser.add_argument('--discord-alerts', action='store_true', help='Enable Discord alerts for stuck jobs and system status')
-    parser.add_argument('--discord-test', action='store_true', help='Send test Discord message and exit')
-    parser.add_argument('--log-file', default='/mnt/NV2/Development/claude-home/logs/tdarr_monitor.log',
-                       help='Path to log file with rotation (default: /mnt/NV2/Development/claude-home/logs/tdarr_monitor.log)')
-    parser.add_argument('--log-level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default='INFO', 
-                       help='Logging level (default: INFO)')
-    parser.add_argument('--no-log-file', action='store_true', help='Disable file logging, console only')
-    parser.add_argument('--clear-hung-workers', action='store_true', help='Clear hung workers via API call when stuck jobs are detected')
-    
-    args = parser.parse_args()
-    
-    if args.verbose:
-        logging.getLogger().setLevel(logging.DEBUG)
-    
-    # Handle clear memory command
-    if args.clear_memory:
-        if os.path.exists(args.memory_file):
-            os.remove(args.memory_file)
-            print(f"Memory state cleared: {args.memory_file}")
-        else:
-            print(f"Memory file does not exist: {args.memory_file}")
-        sys.exit(0)
-    
-    # Handle Discord test command
-    if args.discord_test:
-        print("Sending Discord test messages...")
-        notifier = DiscordNotifier(args.discord_webhook)
-        
-        # Test content message
-        content_success = notifier.send_content_message(
-            "🧪 **Tdarr Monitor Test** - Content message working correctly!"
-        )
-        
-        # Test embed message
-        test_fields = [
-            DiscordEmbedField("Test Field 1", "This is a test value", True),
-            DiscordEmbedField("Test Field 2", "Another test value", True),
-        ]
-        
-        embed_success = notifier.send_embed_message(
-            title="🧪 Tdarr Monitor Test",
-            description="This is a test embed message to verify Discord integration is working correctly.",
-            color=0x00ff00,  # Green
-            fields=test_fields
-        )
-        
-        if content_success and embed_success:
-            print("✅ Discord test successful! Both content and embed messages sent.")
-            sys.exit(0)
-        else:
-            print("❌ Discord test failed. Check webhook URL and permissions.")
-            sys.exit(1)
-    
-    # Initialize monitor
-    log_file = None if args.no_log_file else args.log_file
-    monitor = TdarrMonitor(
-        args.server, 
-        args.timeout, 
-        enable_stuck_detection=args.detect_stuck,
-        stuck_threshold_minutes=args.stuck_threshold,
-        memory_file=args.memory_file,
-        discord_webhook_url=args.discord_webhook,
-        enable_discord_alerts=args.discord_alerts,
-        log_file=log_file,
-        log_level=args.log_level,
-        clear_hung_workers=args.clear_hung_workers
-    )
-    
-    # Perform requested check
-    monitor.logger.info(f"Starting Tdarr monitoring check: {args.check}, stuck_detection={'enabled' if args.detect_stuck else 'disabled'}, clear_workers={'enabled' if args.clear_hung_workers else 'disabled'}")
-    
-    result = None
-    if args.check == 'all':
-        result = {
-            'server_status': monitor.get_server_status(),
-            'node_status': monitor.get_node_status()
-        }
-    elif args.check == 'status':
-        result = monitor.get_server_status()
-    elif args.check == 'queue':
-        result = monitor.get_queue_status()
-    elif args.check == 'nodes':
-        result = monitor.get_node_status()
-    elif args.check == 'libraries':
-        result = monitor.get_library_status()
-    elif args.check == 'stats':
-        result = monitor.get_statistics()
-    elif args.check == 'health':
-        result = monitor.health_check()
-    
-    # Output results
-    if args.output == 'json':
-        # Convert dataclasses to dictionaries for JSON serialization
-        if args.check == 'all':
-            json_result = {}
-            for key, value in result.items():
-                json_result[key] = asdict(value)
-            print(json.dumps(json_result, indent=2))
-        else:
-            print(json.dumps(asdict(result), indent=2))
-    else:
-        # Pretty print format
-        print(f"=== Tdarr Monitor Results - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ===")
-        
-        if args.check == 'health' or (hasattr(result, 'overall_status') and result.overall_status):
-            health = result if hasattr(result, 'overall_status') else None
-            if health:
-                status = health.overall_status
-                print(f"Overall Status: {status.upper()}")
-                
-                if health.checks:
-                    print("\nHealth Checks:")
-                    for check_name, check_data in health.checks.items():
-                        status_icon = "✓" if check_data.healthy else "✗"
-                        print(f"  {status_icon} {check_name.title()}: {asdict(check_data)}")
-        
-        # Display stuck jobs if present
-        if args.detect_stuck:
-            if hasattr(result, 'stuck_jobs') and result.stuck_jobs:
-                print(f"\n=== STUCK JOBS DETECTED ({len(result.stuck_jobs)}) ===")
-                for stuck_job in result.stuck_jobs:
-                    print(f"🚨 {stuck_job.worker_snapshot.node_id}:{stuck_job.worker_snapshot.worker_id}")
-                    print(f"   File: {stuck_job.worker_snapshot.file}")
-                    print(f"   Progress: {stuck_job.worker_snapshot.percentage}%")
-                    print(f"   Status: {stuck_job.worker_snapshot.status}")
-                    print(f"   Stuck for: {stuck_job.stuck_duration_minutes:.1f} minutes")
-                    print()
-            elif args.check in ['nodes', 'all']:
-                # Check all results for stuck jobs if 'all' is selected
-                stuck_found = False
-                if args.check == 'all' and isinstance(result, dict):
-                    for section, data in result.items():
-                        if hasattr(data, 'stuck_jobs') and data.stuck_jobs:
-                            if not stuck_found:
-                                print(f"\n=== STUCK JOBS DETECTED ===")
-                                stuck_found = True
-                            for stuck_job in data.stuck_jobs:
-                                print(f"🚨 {stuck_job.worker_snapshot.node_id}:{stuck_job.worker_snapshot.worker_id}")
-                                print(f"   File: {stuck_job.worker_snapshot.file}")
-                                print(f"   Progress: {stuck_job.worker_snapshot.percentage}%")
-                                print(f"   Status: {stuck_job.worker_snapshot.status}")
-                                print(f"   Stuck for: {stuck_job.stuck_duration_minutes:.1f} minutes")
-                                print()
-                
-                if not stuck_found:
-                    print(f"\n✅ No stuck jobs detected (threshold: {args.stuck_threshold} minutes)")
-        
-        if args.check == 'all':
-            for section, data in result.items():
-                print(f"\n=== {section.replace('_', ' ').title()} ===")
-                # Don't print stuck_jobs in JSON format as we already displayed them above
-                if hasattr(data, 'stuck_jobs'):
-                    data_dict = asdict(data)
-                    data_dict.pop('stuck_jobs', None)
-                    print(json.dumps(data_dict, indent=2))
-                else:
-                    print(json.dumps(asdict(data), indent=2))
-        elif args.check != 'health':
-            # Don't print stuck_jobs in JSON format as we already displayed them above
-            if hasattr(result, 'stuck_jobs'):
-                result_dict = asdict(result)
-                result_dict.pop('stuck_jobs', None)
-                print(json.dumps(result_dict, indent=2))
-            else:
-                print(json.dumps(asdict(result), indent=2))
-    
-    # Exit with appropriate code
-    if result:
-        # Check for unhealthy status in health check
-        if isinstance(result, HealthStatus) and result.overall_status == 'unhealthy':
-            sys.exit(1)
-        # Check for errors in individual status objects (all status classes except HealthStatus have error attribute)
-        elif (isinstance(result, (ServerStatus, QueueStatus, NodeStatus, LibraryStatus, StatisticsStatus)) 
-              and result.error):
-            sys.exit(1)
-        # Check for errors in 'all' results
-        elif isinstance(result, dict):
-            for status_obj in result.values():
-                if (isinstance(status_obj, (ServerStatus, QueueStatus, NodeStatus, LibraryStatus, StatisticsStatus)) 
-                    and status_obj.error):
-                    sys.exit(1)
-    
-    sys.exit(0)
-
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
diff --git a/vm-management/CONTEXT.md b/vm-management/CONTEXT.md
index 5be2f1e..85667d9 100644
--- a/vm-management/CONTEXT.md
+++ b/vm-management/CONTEXT.md
@@ -3,6 +3,15 @@
 ## Overview
 Virtual machine management for home lab environments with focus on automated provisioning, infrastructure as code, and security-first configuration. This context covers VM lifecycle management, Proxmox integration, and standardized deployment patterns.
 
+## Proxmox Host
+- **Version**: PVE 8.4.16 (upgraded from 7.4-20 on 2026-02-19)
+- **Kernel**: 6.8.12-18-pve
+- **IP**: 10.10.0.11
+- **SSH**: `ssh -i ~/.ssh/homelab_rsa root@10.10.0.11`
+- **Storage**: local (100GB dir), local-lvm (2.3TB thin), home-truenas (17TB CIFS at 10.10.0.35)
+- **Networking**: vmbr0 (10.10.0.x/24 via eno1), vmbr1 (10.0.0.x/24 via eno2, Matter/IoT)
+- **Upgrade plan**: Phase 2 (PVE 8→9) pending — see `proxmox-upgrades/proxmox-7-to-9-upgrade-plan.md`
+
 ## Architecture Patterns
 
 ### Infrastructure as Code (IaC) Approach
diff --git a/vm-management/proxmox-upgrades/proxmox-7-to-9-upgrade-plan.md b/vm-management/proxmox-upgrades/proxmox-7-to-9-upgrade-plan.md
index cc8d477..d07ae3e 100644
--- a/vm-management/proxmox-upgrades/proxmox-7-to-9-upgrade-plan.md
+++ b/vm-management/proxmox-upgrades/proxmox-7-to-9-upgrade-plan.md
@@ -2,18 +2,26 @@
 
 ## Executive Summary
 
-**Current State**: Proxmox VE 7.1-7 (kernel 5.13.19-2-pve)
+**Current State**: Proxmox VE 8.4.16 (kernel 6.8.12-18-pve) — Phase 1 complete
 **Target State**: Proxmox VE 9.1 (latest)
 **Upgrade Path**: Two-phase upgrade (7→8→9) - direct upgrade not supported
 **Total Timeline**: 3-4 weeks (including stabilization periods)
 **Total Downtime**: ~4 hours (2 hours per phase)
 
+### Phase 1 Status: COMPLETED (2026-02-19)
+- Upgraded from PVE 7.4-20 → PVE 8.4.16
+- Kernel: 5.13.19-2-pve → 6.8.12-18-pve
+- Total downtime: ~45 minutes (upgrade + reboot + service startup)
+- All services validated and running
+- Stabilization period: monitoring through early March 2026
+
 ## Infrastructure Overview
 
-**Production Services** (8 LXC + 17 VMs):
-- **Critical**: Paper Dynasty/Major Domo (VMs 115, 110), Gitea (LXC 225), n8n (LXC 210), Home Assistant (VM 109)
-- **Important**: Media services (Plex 107, Tdarr 113, arr-stack 221), OpenClaw (224), Databases (112)
-- **Lower Priority**: Game servers, development containers
+**Production Services** (7 LXC + 7 VMs) — cleaned up 2026-02-19:
+- **Critical**: Paper Dynasty/Major Domo (VM 115), Discord bots (VM 110), Gitea (LXC 225), n8n (LXC 210), Home Assistant (VM 109), Databases (VM 112), docker-home/Pi-hole 1 (VM 106)
+- **Important**: Claude Discord Coordinator (LXC 301), arr-stack (LXC 221), Uptime Kuma (LXC 227), Foundry VTT (LXC 223), Memos (LXC 222)
+- **Stopped/Investigate**: docker-vpn (VM 105, decommissioning), docker-home-servers (VM 116, needs investigation)
+- **Removed (2026-02-19)**: 108 (ansible), 224 (openclaw), 300 (openclaw-migrated), 101/102/104/111/211 (game servers), 107 (plex), 113 (tdarr - moved to .226), 114 (duplicate arr-stack), 117 (unused), 100/103 (old templates)
 
 **Key Constraints**:
 - Home Assistant VM 109 requires dual network (vmbr1 for Matter support)
@@ -29,26 +37,33 @@
 
 #### 1. Comprehensive Backups
 
-**Priority 1 - Production Services**:
+**All production guests** (14 total after cleanup):
 ```bash
-# Backup critical services to TrueNAS
-vzdump 210 --mode snapshot --dumpdir /mnt/truenas/proxmox --compress zstd  # n8n
-vzdump 115 --mode snapshot --dumpdir /mnt/truenas/proxmox --compress zstd  # docker-sba
-vzdump 112 --mode snapshot --dumpdir /mnt/truenas/proxmox --compress zstd  # databases
-vzdump 110 --mode snapshot --dumpdir /mnt/truenas/proxmox --compress zstd  # discord-bots
-vzdump 225 --mode snapshot --dumpdir /mnt/truenas/proxmox --compress zstd  # gitea
-vzdump 109 --mode snapshot --dumpdir /mnt/truenas/proxmox --compress zstd  # homeassistant
-```
-
-**Priority 2 - All Remaining VMs/LXCs**:
-```bash
-vzdump --all --mode snapshot --dumpdir /mnt/truenas/proxmox --compress zstd
+# Backup all to TrueNAS (PVE storage: home-truenas, mount: /mnt/pve/home-truenas)
+# VMs
+vzdump 106 --mode snapshot --storage home-truenas --compress zstd  # docker-home (pihole1, NPM)
+vzdump 109 --mode snapshot --storage home-truenas --compress zstd  # homeassistant
+vzdump 110 --mode snapshot --storage home-truenas --compress zstd  # discord-bots
+vzdump 112 --mode snapshot --storage home-truenas --compress zstd  # databases
+vzdump 115 --mode snapshot --storage home-truenas --compress zstd  # docker-sba (Paper Dynasty)
+# LXCs
+vzdump 210 --mode snapshot --storage home-truenas --compress zstd  # n8n
+vzdump 221 --mode snapshot --storage home-truenas --compress zstd  # arr-stack
+vzdump 222 --mode snapshot --storage home-truenas --compress zstd  # memos
+vzdump 223 --mode snapshot --storage home-truenas --compress zstd  # foundry
+vzdump 225 --mode snapshot --storage home-truenas --compress zstd  # gitea
+vzdump 227 --mode snapshot --storage home-truenas --compress zstd  # uptime-kuma
+vzdump 301 --mode snapshot --storage home-truenas --compress zstd  # claude-discord-coordinator
+# Optional (stopped/investigate)
+# vzdump 105 --mode snapshot --storage home-truenas --compress zstd  # docker-vpn (decommissioning)
+# vzdump 116 --mode snapshot --storage home-truenas --compress zstd  # docker-home-servers (investigate)
 ```
 
 **Backup Proxmox Configuration**:
 ```bash
-tar -czf /mnt/truenas/proxmox/pve-config-$(date +%Y%m%d).tar.gz /etc/pve/
-cp /etc/network/interfaces /mnt/truenas/proxmox/interfaces.backup
+# Already completed 2026-02-19 — refresh before upgrade
+tar -czf /mnt/pve/home-truenas/dump/pve-config/pve-config-$(date +%Y%m%d).tar.gz /etc/pve/
+cp /etc/network/interfaces /mnt/pve/home-truenas/dump/pve-config/interfaces.backup.$(date +%Y%m%d)
 ```
 
 **Expected**: 2-4 hours, ~500GB-1TB storage required
@@ -120,24 +135,34 @@ pvesm status
 
 ### Post-Upgrade Validation
 
-**Start Services in Dependency Order**:
+**Start Services in Dependency Order** (stagger with 30s delays per Phase 1 lessons):
 ```bash
 # Databases first
-pvesh create /nodes/proxmox/qemu/112/status/start
+pvesh create /nodes/proxmox/qemu/112/status/start  # databases-bots
+sleep 30
 
-# Infrastructure
+# Infrastructure + DNS
+pvesh create /nodes/proxmox/qemu/106/status/start  # docker-home (pihole1, NPM)
 pvesh create /nodes/proxmox/lxc/225/status/start  # gitea
 pvesh create /nodes/proxmox/lxc/210/status/start  # n8n
+pvesh create /nodes/proxmox/lxc/227/status/start  # uptime-kuma
+sleep 30
 
 # Applications
 pvesh create /nodes/proxmox/qemu/115/status/start  # docker-sba (Paper Dynasty)
 pvesh create /nodes/proxmox/qemu/110/status/start  # discord-bots
-pvesh create /nodes/proxmox/lxc/224/status/start  # openclaw
+pvesh create /nodes/proxmox/lxc/301/status/start  # claude-discord-coordinator
+sleep 30
+
+# Restart Pi-hole container proactively (UDP DNS fix from Phase 1)
+qm guest exec 106 -- docker restart pihole
+sleep 10
 
 # Media & Others
 pvesh create /nodes/proxmox/qemu/109/status/start  # homeassistant
-pvesh create /nodes/proxmox/qemu/107/status/start  # plex
 pvesh create /nodes/proxmox/lxc/221/status/start  # arr-stack
+pvesh create /nodes/proxmox/lxc/222/status/start  # memos
+pvesh create /nodes/proxmox/lxc/223/status/start  # foundry-lxc
 ```
 
 **Service Validation Checklist**:
@@ -159,6 +184,30 @@ Monitor for:
 - Service uptime
 - Error logs
 
+### Phase 1 Lessons Learned (2026-02-19)
+
+**Issues encountered:**
+1. **I/O storm on boot**: All 15 guests starting simultaneously caused massive I/O delay (~50% for several minutes). Consider staggering guest startup with delays between groups.
+2. **Pi-hole 1 UDP DNS failed after boot**: Docker iptables NAT rules weren't fully set up. Required container restart. TCP DNS worked immediately — only UDP was affected.
+3. **Home Assistant IP changed**: HA on VM 109 got a new DHCP address (10.10.0.215 instead of previous). Need DHCP reservation to prevent this.
+4. **Local machine DNS failover**: Desktop was configured with only one Pi-hole DNS server (10.10.0.226). When Proxmox guests were shut down, Pi-hole on physical server at .226 should have kept working but didn't resolve initially. Added both Pi-holes as DNS servers.
+5. **Some VMs ignored ACPI shutdown**: VMs 105 and 112 required `--forceStop` flag.
+6. **Several guests had onboot=1**: Many guests auto-started before we could bring them up in dependency order. Not harmful but unexpected.
+
+**What went well:**
+- `pve7to8 --full` checker caught everything — zero surprises during upgrade
+- `DEBIAN_FRONTEND=noninteractive apt dist-upgrade -y -o Dpkg::Options::='--force-confnew'` worked cleanly
+- Reboot took ~4 minutes (longer than expected but completed without issues)
+- All backups on TrueNAS were intact and accessible post-upgrade
+- Local disk space dropped from 57% to 14% after upgrade (old kernel/packages cleaned up)
+
+**Recommendations for Phase 2:**
+- Stagger guest startup: add `sleep 30` between dependency groups
+- Restart Pi-hole Docker container proactively after boot
+- Set DHCP reservation for HA VM before Phase 2
+- Switch local DNS to public resolvers (1.1.1.1) before shutting down guests
+- Disable onboot for all guests before upgrade, re-enable after validation
+
 ---
 
 ## Phase 2: Proxmox 8.4 → 9.1 Upgrade
@@ -169,25 +218,29 @@ Monitor for:
 
 ```bash
 # Verify systemd version in each LXC (must be > 230)
-for ct in 108 210 211 221 222 223 224 225; do
+for ct in 108 210 211 221 222 223 224 225 227 300 301; do
     echo "=== LXC $ct ==="
     pct exec $ct -- systemd --version | head -1
 done
 ```
 
-**Action Required**: If any LXC shows systemd < 230:
-```bash
-pct enter <CTID>
-apt update && apt dist-upgrade -y
-do-release-upgrade  # Upgrade Ubuntu to compatible version
-```
+**Pre-verified 2026-02-19** (all pass, updated after cleanup):
+| LXC | Name | systemd | Status |
+|-----|------|---------|--------|
+| 210 | n8n | 245 | Pass |
+| 221 | arr-stack | 245 | Pass |
+| 222 | memos | 245 | Pass |
+| 223 | foundry | 245 | Pass |
+| 225 | gitea | 245 | Pass |
+| 227 | uptime-kuma | 249 | Pass |
+| 301 | claude-discord-coord | 249 | Pass |
 
-**Expected**: All Ubuntu 20.04+ LXCs should be compatible (systemd 245+)
+**Expected**: All compatible. Re-verify before Phase 2 in case any LXC OS was changed.
 
 #### 2. Fresh Backup Set
 ```bash
-vzdump --all --mode snapshot --dumpdir /mnt/truenas/proxmox/pve9-upgrade --compress zstd
-tar -czf /mnt/truenas/proxmox/pve8-config-$(date +%Y%m%d).tar.gz /etc/pve/
+vzdump --all --mode snapshot --storage home-truenas --compress zstd
+tar -czf /mnt/pve/home-truenas/dump/pve-config/pve8-config-$(date +%Y%m%d).tar.gz /etc/pve/
 ```
 
 #### 3. Run PVE 8-to-9 Checker
@@ -350,7 +403,27 @@ pvesh get /cluster/resources
 
 ## Verification Checklist
 
-After each upgrade phase:
+### Phase 1 (PVE 7→8) — Completed 2026-02-19
+
+- [x] Proxmox version correct: pve-manager/8.4.16
+- [x] Kernel version updated: 6.8.12-18-pve
+- [x] All PVE services running (pve-cluster, pvedaemon, pveproxy, pvestatd)
+- [x] Storage accessible: local, local-lvm, home-truenas all active
+- [x] Network functional
+- [x] All VMs/LXCs visible in UI
+- [x] Critical VMs/LXCs started successfully
+- [x] Discord bots responding (confirmed on .88)
+- [x] Databases accessible (VM 112 running)
+- [x] n8n workflows — HTTP 200
+- [x] Gitea accessible — HTTP 200
+- [x] Home Assistant functional — HTTP 200 (new IP: 10.10.0.215)
+- [x] Jellyfin streaming — HTTP 302
+- [x] Uptime Kuma — HTTP 302
+- [x] Pi-hole 1 DNS resolving (after container restart)
+- [x] Pi-hole 2 DNS resolving
+- [x] Web UI functional — HTTP 200
+
+### Phase 2 (PVE 8→9) — Pending
 
 - [ ] Proxmox version correct (`pveversion`)
 - [ ] Kernel version updated (`uname -r`)