#!/usr/bin/env python3 """ NVIDIA Driver Update Checker Monitors for available updates to held NVIDIA packages and sends Discord notifications when new versions are available. This allows manual, planned updates during maintenance windows rather than surprise auto-updates causing downtime. Usage: # Check for updates (with Discord alert) python3 nvidia_update_checker.py --check --discord-alerts # Check silently (cron job logging) python3 nvidia_update_checker.py --check # Test Discord integration python3 nvidia_update_checker.py --discord-test """ import argparse import json import logging import subprocess import sys from dataclasses import dataclass, asdict from datetime import datetime from typing import List, Optional import requests @dataclass class PackageUpdate: name: str current_version: str available_version: str held: bool @dataclass class UpdateCheckResult: timestamp: str updates_available: bool held_packages: List[PackageUpdate] other_packages: List[PackageUpdate] total_updates: int class DiscordNotifier: def __init__(self, webhook_url: str, timeout: int = 10): self.webhook_url = webhook_url self.timeout = timeout self.logger = logging.getLogger(f"{__name__}.DiscordNotifier") def send_alert(self, title: str, description: str, color: int = 0xffa500, fields: list = None) -> bool: """Send embed alert to Discord.""" embed = { "title": title, "description": description, "color": color, "timestamp": datetime.now().isoformat(), "fields": fields or [] } payload = { "username": "NVIDIA Update Monitor", "embeds": [embed] } try: response = requests.post( self.webhook_url, json=payload, timeout=self.timeout ) response.raise_for_status() self.logger.info("Discord notification sent successfully") return True except Exception as e: self.logger.error(f"Failed to send Discord notification: {e}") return False def send_update_available_alert(self, updates: List[PackageUpdate]) -> bool: """Send alert when NVIDIA driver updates are available.""" version_list = "\n".join([ f"• **{pkg.name}**: {pkg.current_version} → {pkg.available_version}" for pkg in updates ]) fields = [ { "name": "Available Updates", "value": version_list, "inline": False }, { "name": "⚠️ Action Required", "value": ( "These packages are held and will NOT auto-update.\n" "Plan a maintenance window to update manually:\n" "```bash\n" "sudo apt-mark unhold nvidia-driver-570\n" "sudo apt update && sudo apt upgrade\n" "sudo reboot\n" "```" ), "inline": False } ] return self.send_alert( title="🔔 NVIDIA Driver Update Available", description=f"New NVIDIA driver version(s) available for ubuntu-manticore ({len(updates)} package(s))", color=0xffa500, # Orange fields=fields ) class NvidiaUpdateChecker: def __init__(self, ssh_host: str = None, discord_webhook: str = None, enable_discord: bool = False): self.ssh_host = ssh_host self.logger = logging.getLogger(__name__) self.discord = None if enable_discord and discord_webhook: self.discord = DiscordNotifier(discord_webhook) def _run_command(self, cmd: list, timeout: int = 30) -> tuple: """Run command locally or via SSH.""" if self.ssh_host: cmd = ["ssh", self.ssh_host] + [" ".join(cmd)] try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=timeout, shell=isinstance(cmd[-1], str) and self.ssh_host is not None ) return result.returncode, result.stdout.strip(), result.stderr.strip() except subprocess.TimeoutExpired: return -1, "", "Command timed out" except Exception as e: return -1, "", str(e) def get_held_packages(self) -> List[str]: """Get list of held packages.""" cmd = ["apt-mark", "showhold"] code, stdout, stderr = self._run_command(cmd) if code != 0: self.logger.error(f"Failed to get held packages: {stderr}") return [] return [line.strip() for line in stdout.split("\n") if line.strip()] def check_package_updates(self) -> List[PackageUpdate]: """Check for available updates.""" # Update package cache update_cmd = ["apt-get", "update", "-qq"] self._run_command(update_cmd) # Get list of upgradable packages cmd = ["apt", "list", "--upgradable"] code, stdout, stderr = self._run_command(cmd) if code != 0: self.logger.error(f"Failed to check updates: {stderr}") return [] held_packages = self.get_held_packages() updates = [] for line in stdout.split("\n"): if "/" not in line or "[upgradable" not in line: continue # Parse: package/release version arch [upgradable from: old_version] parts = line.split() if len(parts) < 6: continue package_name = parts[0].split("/")[0] new_version = parts[1] old_version = parts[5].rstrip("]") # Filter for NVIDIA packages if "nvidia" in package_name.lower(): updates.append(PackageUpdate( name=package_name, current_version=old_version, available_version=new_version, held=package_name in held_packages )) return updates def check_updates(self) -> UpdateCheckResult: """Perform full update check.""" timestamp = datetime.now().isoformat() updates = self.check_package_updates() held_updates = [u for u in updates if u.held] other_updates = [u for u in updates if not u.held] result = UpdateCheckResult( timestamp=timestamp, updates_available=len(held_updates) > 0, held_packages=held_updates, other_packages=other_updates, total_updates=len(updates) ) # Send Discord alert for held packages with updates if result.updates_available and self.discord: self.discord.send_update_available_alert(held_updates) return result def main(): parser = argparse.ArgumentParser( description='Monitor NVIDIA driver updates on held packages' ) parser.add_argument('--check', action='store_true', help='Check for updates') parser.add_argument('--discord-webhook', default='https://discord.com/api/webhooks/1404105821549498398/y2Ud1RK9rzFjv58xbypUfQNe3jrL7ZUq1FkQHa4_dfOHm2ylp93z0f4tY0O8Z-vQgKhD', help='Discord webhook URL') parser.add_argument('--discord-alerts', action='store_true', help='Enable Discord alerts') parser.add_argument('--discord-test', action='store_true', help='Test Discord integration') parser.add_argument('--ssh-host', default='cal@10.10.0.226', help='SSH host for remote monitoring') parser.add_argument('--output', choices=['json', 'pretty'], default='pretty') parser.add_argument('--verbose', action='store_true', help='Verbose logging') args = parser.parse_args() # Configure logging level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig( level=level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) # Discord test if args.discord_test: notifier = DiscordNotifier(args.discord_webhook) success = notifier.send_alert( title="NVIDIA Update Monitor Test", description="Discord integration is working correctly.", color=0x00ff00, fields=[ {"name": "Host", "value": args.ssh_host, "inline": True}, {"name": "Status", "value": "Test successful", "inline": True} ] ) sys.exit(0 if success else 1) # Update check if args.check: checker = NvidiaUpdateChecker( ssh_host=args.ssh_host, discord_webhook=args.discord_webhook, enable_discord=args.discord_alerts ) result = checker.check_updates() if args.output == 'json': print(json.dumps(asdict(result), indent=2)) else: print(f"=== NVIDIA Update Check - {result.timestamp} ===") if result.updates_available: print(f"\n⚠️ {len(result.held_packages)} held package(s) have updates:") for pkg in result.held_packages: print(f" • {pkg.name}: {pkg.current_version} → {pkg.available_version}") print("\nThese packages will NOT auto-update (held)") print("Plan a maintenance window to update manually") else: print("\n✅ All held NVIDIA packages are up to date") if result.other_packages: print(f"\nℹ️ {len(result.other_packages)} other NVIDIA package(s) have updates:") for pkg in result.other_packages: print(f" • {pkg.name}: {pkg.current_version} → {pkg.available_version}") sys.exit(0 if not result.updates_available else 1) parser.print_help() if __name__ == '__main__': main()