- Add tdarr_monitor.py: Python-based API monitoring client with type-safe dataclasses - ServerStatus, QueueStatus, NodeStatus, LibraryStatus, StatisticsStatus, HealthStatus - Support for health checks, queue monitoring, node status, library scans - JSON and pretty-print output formats with proper exit codes - Integration with existing Discord monitoring system - Create scripts/monitoring/README.md: Complete monitoring documentation - Comprehensive usage examples and command-line options - Integration patterns with gaming-aware scheduling - Best practices for automated health monitoring - Update CLAUDE.md: Enhanced Tdarr keyword triggers and documentation structure - Add "monitoring" and "api" keywords to automatically load monitoring docs - Reference new tdarr_monitor.py with dataclass-based status tracking - Update documentation structure to show monitoring script location 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
498 lines
16 KiB
Python
Executable File
498 lines
16 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Tdarr API Monitoring Script
|
|
|
|
Monitors Tdarr server via its web API endpoints:
|
|
- Server status and health
|
|
- Queue status and statistics
|
|
- Node status and performance
|
|
- Library scan progress
|
|
- Worker activity
|
|
|
|
Usage:
|
|
python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check all
|
|
python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check queue
|
|
python3 tdarr_monitor.py --server http://10.10.0.43:8265 --check nodes
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import sys
|
|
from dataclasses import dataclass, asdict
|
|
from datetime import datetime
|
|
from typing import Dict, List, Optional, Any
|
|
import requests
|
|
from urllib.parse import urljoin
|
|
|
|
|
|
@dataclass
|
|
class ServerStatus:
|
|
timestamp: str
|
|
server_url: str
|
|
status: str
|
|
error: Optional[str] = None
|
|
version: Optional[str] = None
|
|
server_id: Optional[str] = None
|
|
uptime: Optional[str] = None
|
|
system_info: Optional[Dict[str, Any]] = None
|
|
|
|
|
|
@dataclass
|
|
class QueueStats:
|
|
total_files: int
|
|
queued: int
|
|
processing: int
|
|
completed: int
|
|
queue_items: List[Dict[str, Any]]
|
|
|
|
|
|
@dataclass
|
|
class QueueStatus:
|
|
timestamp: str
|
|
queue_stats: Optional[QueueStats] = None
|
|
error: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class NodeInfo:
|
|
id: Optional[str]
|
|
nodeName: Optional[str]
|
|
status: str
|
|
lastSeen: Optional[int]
|
|
version: Optional[str]
|
|
platform: Optional[str]
|
|
workers: Dict[str, int]
|
|
processing: List[Dict[str, Any]]
|
|
|
|
|
|
@dataclass
|
|
class NodeSummary:
|
|
total_nodes: int
|
|
online_nodes: int
|
|
offline_nodes: int
|
|
online_details: List[NodeInfo]
|
|
offline_details: List[NodeInfo]
|
|
|
|
|
|
@dataclass
|
|
class NodeStatus:
|
|
timestamp: str
|
|
nodes: List[Dict[str, Any]]
|
|
node_summary: Optional[NodeSummary] = None
|
|
error: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class LibraryInfo:
|
|
name: Optional[str]
|
|
path: Optional[str]
|
|
file_count: int
|
|
scan_progress: int
|
|
last_scan: Optional[str]
|
|
is_scanning: bool
|
|
|
|
|
|
@dataclass
|
|
class ScanStatus:
|
|
total_libraries: int
|
|
total_files: int
|
|
scanning_libraries: int
|
|
|
|
|
|
@dataclass
|
|
class LibraryStatus:
|
|
timestamp: str
|
|
libraries: List[LibraryInfo]
|
|
scan_status: Optional[ScanStatus] = None
|
|
error: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class Statistics:
|
|
total_transcodes: int
|
|
space_saved: int
|
|
total_files_processed: int
|
|
failed_transcodes: int
|
|
processing_speed: int
|
|
eta: Optional[str]
|
|
|
|
|
|
@dataclass
|
|
class StatisticsStatus:
|
|
timestamp: str
|
|
statistics: Optional[Statistics] = None
|
|
error: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class HealthCheck:
|
|
status: str
|
|
healthy: bool
|
|
online_count: Optional[int] = None
|
|
total_count: Optional[int] = None
|
|
accessible: Optional[bool] = None
|
|
total_items: Optional[int] = None
|
|
|
|
|
|
@dataclass
|
|
class HealthStatus:
|
|
timestamp: str
|
|
overall_status: str
|
|
checks: Dict[str, HealthCheck]
|
|
|
|
|
|
class TdarrMonitor:
|
|
def __init__(self, server_url: str, timeout: int = 30):
|
|
"""Initialize Tdarr monitor with server URL."""
|
|
self.server_url = server_url.rstrip('/')
|
|
self.timeout = timeout
|
|
self.session = requests.Session()
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
)
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
def _make_request(self, endpoint: str) -> Optional[Dict[str, Any]]:
|
|
"""Make HTTP request to Tdarr API endpoint."""
|
|
url = urljoin(self.server_url, endpoint)
|
|
|
|
try:
|
|
response = self.session.get(url, timeout=self.timeout)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
self.logger.error(f"Request failed for {url}: {e}")
|
|
return None
|
|
except json.JSONDecodeError as e:
|
|
self.logger.error(f"JSON decode failed for {url}: {e}")
|
|
return None
|
|
|
|
def get_server_status(self) -> ServerStatus:
|
|
"""Get overall server status and configuration."""
|
|
timestamp = datetime.now().isoformat()
|
|
|
|
# Try to get server info from API
|
|
data = self._make_request('/api/v2/get-server-info')
|
|
if data:
|
|
return ServerStatus(
|
|
timestamp=timestamp,
|
|
server_url=self.server_url,
|
|
status='online',
|
|
version=data.get('version'),
|
|
server_id=data.get('serverId'),
|
|
uptime=data.get('uptime'),
|
|
system_info=data.get('systemInfo', {})
|
|
)
|
|
else:
|
|
return ServerStatus(
|
|
timestamp=timestamp,
|
|
server_url=self.server_url,
|
|
status='offline',
|
|
error='Unable to connect to Tdarr server'
|
|
)
|
|
|
|
def get_queue_status(self) -> QueueStatus:
|
|
"""Get transcoding queue status and statistics."""
|
|
timestamp = datetime.now().isoformat()
|
|
|
|
# Get queue information
|
|
data = self._make_request('/api/v2/get-queue')
|
|
if data:
|
|
queue_data = data.get('queue', [])
|
|
|
|
# Calculate queue statistics
|
|
total_files = len(queue_data)
|
|
queued_files = len([f for f in queue_data if f.get('status') == 'Queued'])
|
|
processing_files = len([f for f in queue_data if f.get('status') == 'Processing'])
|
|
completed_files = len([f for f in queue_data if f.get('status') == 'Completed'])
|
|
|
|
queue_stats = QueueStats(
|
|
total_files=total_files,
|
|
queued=queued_files,
|
|
processing=processing_files,
|
|
completed=completed_files,
|
|
queue_items=queue_data[:10] # First 10 items for details
|
|
)
|
|
|
|
return QueueStatus(
|
|
timestamp=timestamp,
|
|
queue_stats=queue_stats
|
|
)
|
|
else:
|
|
return QueueStatus(
|
|
timestamp=timestamp,
|
|
error='Unable to fetch queue data'
|
|
)
|
|
|
|
def get_node_status(self) -> NodeStatus:
|
|
"""Get status of all connected nodes."""
|
|
timestamp = datetime.now().isoformat()
|
|
|
|
# Get nodes information
|
|
data = self._make_request('/api/v2/get-nodes')
|
|
if data:
|
|
nodes = data.get('nodes', [])
|
|
|
|
# Process node information
|
|
online_nodes = []
|
|
offline_nodes = []
|
|
|
|
for node in nodes:
|
|
node_info = NodeInfo(
|
|
id=node.get('_id'),
|
|
nodeName=node.get('nodeName'),
|
|
status='online' if node.get('lastSeen', 0) > 0 else 'offline',
|
|
lastSeen=node.get('lastSeen'),
|
|
version=node.get('version'),
|
|
platform=node.get('platform'),
|
|
workers={
|
|
'cpu': node.get('workers', {}).get('CPU', 0),
|
|
'gpu': node.get('workers', {}).get('GPU', 0)
|
|
},
|
|
processing=node.get('currentJobs', [])
|
|
)
|
|
|
|
if node_info.status == 'online':
|
|
online_nodes.append(node_info)
|
|
else:
|
|
offline_nodes.append(node_info)
|
|
|
|
node_summary = NodeSummary(
|
|
total_nodes=len(nodes),
|
|
online_nodes=len(online_nodes),
|
|
offline_nodes=len(offline_nodes),
|
|
online_details=online_nodes,
|
|
offline_details=offline_nodes
|
|
)
|
|
|
|
return NodeStatus(
|
|
timestamp=timestamp,
|
|
nodes=nodes,
|
|
node_summary=node_summary
|
|
)
|
|
else:
|
|
return NodeStatus(
|
|
timestamp=timestamp,
|
|
nodes=[],
|
|
error='Unable to fetch node data'
|
|
)
|
|
|
|
def get_library_status(self) -> LibraryStatus:
|
|
"""Get library scan status and file statistics."""
|
|
timestamp = datetime.now().isoformat()
|
|
|
|
# Get library information
|
|
data = self._make_request('/api/v2/get-libraries')
|
|
if data:
|
|
libraries = data.get('libraries', [])
|
|
|
|
library_stats = []
|
|
total_files = 0
|
|
|
|
for lib in libraries:
|
|
lib_info = LibraryInfo(
|
|
name=lib.get('name'),
|
|
path=lib.get('path'),
|
|
file_count=lib.get('totalFiles', 0),
|
|
scan_progress=lib.get('scanProgress', 0),
|
|
last_scan=lib.get('lastScan'),
|
|
is_scanning=lib.get('isScanning', False)
|
|
)
|
|
library_stats.append(lib_info)
|
|
total_files += lib_info.file_count
|
|
|
|
scan_status = ScanStatus(
|
|
total_libraries=len(libraries),
|
|
total_files=total_files,
|
|
scanning_libraries=len([l for l in library_stats if l.is_scanning])
|
|
)
|
|
|
|
return LibraryStatus(
|
|
timestamp=timestamp,
|
|
libraries=library_stats,
|
|
scan_status=scan_status
|
|
)
|
|
else:
|
|
return LibraryStatus(
|
|
timestamp=timestamp,
|
|
libraries=[],
|
|
error='Unable to fetch library data'
|
|
)
|
|
|
|
def get_statistics(self) -> StatisticsStatus:
|
|
"""Get overall Tdarr statistics and health metrics."""
|
|
timestamp = datetime.now().isoformat()
|
|
|
|
# Get statistics
|
|
data = self._make_request('/api/v2/get-stats')
|
|
if data:
|
|
stats = data.get('stats', {})
|
|
statistics = Statistics(
|
|
total_transcodes=stats.get('totalTranscodes', 0),
|
|
space_saved=stats.get('spaceSaved', 0),
|
|
total_files_processed=stats.get('totalFilesProcessed', 0),
|
|
failed_transcodes=stats.get('failedTranscodes', 0),
|
|
processing_speed=stats.get('processingSpeed', 0),
|
|
eta=stats.get('eta')
|
|
)
|
|
|
|
return StatisticsStatus(
|
|
timestamp=timestamp,
|
|
statistics=statistics
|
|
)
|
|
else:
|
|
return StatisticsStatus(
|
|
timestamp=timestamp,
|
|
error='Unable to fetch statistics'
|
|
)
|
|
|
|
def health_check(self) -> HealthStatus:
|
|
"""Perform comprehensive health check."""
|
|
timestamp = datetime.now().isoformat()
|
|
|
|
# Server connectivity
|
|
server_status = self.get_server_status()
|
|
server_check = HealthCheck(
|
|
status=server_status.status,
|
|
healthy=server_status.status == 'online'
|
|
)
|
|
|
|
# Node connectivity
|
|
node_status = self.get_node_status()
|
|
nodes_healthy = (
|
|
node_status.node_summary.online_nodes > 0 if node_status.node_summary else False
|
|
) and not node_status.error
|
|
|
|
nodes_check = HealthCheck(
|
|
status='online' if nodes_healthy else 'offline',
|
|
healthy=nodes_healthy,
|
|
online_count=node_status.node_summary.online_nodes if node_status.node_summary else 0,
|
|
total_count=node_status.node_summary.total_nodes if node_status.node_summary else 0
|
|
)
|
|
|
|
# Queue status
|
|
queue_status = self.get_queue_status()
|
|
queue_healthy = not queue_status.error
|
|
queue_check = HealthCheck(
|
|
status='accessible' if queue_healthy else 'error',
|
|
healthy=queue_healthy,
|
|
accessible=queue_healthy,
|
|
total_items=queue_status.queue_stats.total_files if queue_status.queue_stats else 0
|
|
)
|
|
|
|
checks = {
|
|
'server': server_check,
|
|
'nodes': nodes_check,
|
|
'queue': queue_check
|
|
}
|
|
|
|
# Determine overall health
|
|
all_checks_healthy = all(check.healthy for check in checks.values())
|
|
overall_status = 'healthy' if all_checks_healthy else 'unhealthy'
|
|
|
|
return HealthStatus(
|
|
timestamp=timestamp,
|
|
overall_status=overall_status,
|
|
checks=checks
|
|
)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Monitor Tdarr server via API')
|
|
parser.add_argument('--server', required=True, help='Tdarr server URL (e.g., http://10.10.0.43:8265)')
|
|
parser.add_argument('--check', choices=['all', 'status', 'queue', 'nodes', 'libraries', 'stats', 'health'],
|
|
default='health', help='Type of check to perform')
|
|
parser.add_argument('--timeout', type=int, default=30, help='Request timeout in seconds')
|
|
parser.add_argument('--output', choices=['json', 'pretty'], default='pretty', help='Output format')
|
|
parser.add_argument('--verbose', action='store_true', help='Enable verbose logging')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.verbose:
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
|
|
# Initialize monitor
|
|
monitor = TdarrMonitor(args.server, args.timeout)
|
|
|
|
# Perform requested check
|
|
result = None
|
|
if args.check == 'all':
|
|
result = {
|
|
'server_status': monitor.get_server_status(),
|
|
'queue_status': monitor.get_queue_status(),
|
|
'node_status': monitor.get_node_status(),
|
|
'library_status': monitor.get_library_status(),
|
|
'statistics': monitor.get_statistics()
|
|
}
|
|
elif args.check == 'status':
|
|
result = monitor.get_server_status()
|
|
elif args.check == 'queue':
|
|
result = monitor.get_queue_status()
|
|
elif args.check == 'nodes':
|
|
result = monitor.get_node_status()
|
|
elif args.check == 'libraries':
|
|
result = monitor.get_library_status()
|
|
elif args.check == 'stats':
|
|
result = monitor.get_statistics()
|
|
elif args.check == 'health':
|
|
result = monitor.health_check()
|
|
|
|
# Output results
|
|
if args.output == 'json':
|
|
# Convert dataclasses to dictionaries for JSON serialization
|
|
if args.check == 'all':
|
|
json_result = {}
|
|
for key, value in result.items():
|
|
json_result[key] = asdict(value)
|
|
print(json.dumps(json_result, indent=2))
|
|
else:
|
|
print(json.dumps(asdict(result), indent=2))
|
|
else:
|
|
# Pretty print format
|
|
print(f"=== Tdarr Monitor Results - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ===")
|
|
|
|
if args.check == 'health' or (hasattr(result, 'overall_status') and result.overall_status):
|
|
health = result if hasattr(result, 'overall_status') else None
|
|
if health:
|
|
status = health.overall_status
|
|
print(f"Overall Status: {status.upper()}")
|
|
|
|
if health.checks:
|
|
print("\nHealth Checks:")
|
|
for check_name, check_data in health.checks.items():
|
|
status_icon = "✓" if check_data.healthy else "✗"
|
|
print(f" {status_icon} {check_name.title()}: {asdict(check_data)}")
|
|
|
|
if args.check == 'all':
|
|
for section, data in result.items():
|
|
print(f"\n=== {section.replace('_', ' ').title()} ===")
|
|
print(json.dumps(asdict(data), indent=2))
|
|
elif args.check != 'health':
|
|
print(json.dumps(asdict(result), indent=2))
|
|
|
|
# Exit with appropriate code
|
|
if result:
|
|
# Check for unhealthy status in health check
|
|
if isinstance(result, HealthStatus) and result.overall_status == 'unhealthy':
|
|
sys.exit(1)
|
|
# Check for errors in individual status objects (all status classes except HealthStatus have error attribute)
|
|
elif (isinstance(result, (ServerStatus, QueueStatus, NodeStatus, LibraryStatus, StatisticsStatus))
|
|
and result.error):
|
|
sys.exit(1)
|
|
# Check for errors in 'all' results
|
|
elif isinstance(result, dict):
|
|
for status_obj in result.values():
|
|
if (isinstance(status_obj, (ServerStatus, QueueStatus, NodeStatus, LibraryStatus, StatisticsStatus))
|
|
and status_obj.error):
|
|
sys.exit(1)
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main() |