diff --git a/CLAUDE.md b/CLAUDE.md index f370cbc..1a8826b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -37,19 +37,27 @@ When working with specific technologies, automatically load their dedicated cont - If working in `/vm-management/scripts/`: Load `vm-management/scripts/CONTEXT.md` (script-specific documentation) **Networking Keywords** -- "network", "nginx", "proxy", "load balancer", "dns", "port", "firewall", "ssh", "ssl", "tls", "npm", "nginx proxy manager", "reverse proxy" +- "network", "nginx", "proxy", "load balancer", "dns", "port", "firewall", "ssh", "ssl", "tls", "npm", "nginx proxy manager", "reverse proxy", "pihole", "pi-hole", "orbital sync", "dns ha", "high availability dns", "dns failover" - Load: `networking/CONTEXT.md` (technology overview and patterns) - Load: `networking/troubleshooting.md` (error handling and debugging) - If working in `/networking/scripts/`: Load `networking/scripts/CONTEXT.md` (script-specific documentation) - - Note: Comprehensive NPM configuration documented in `networking/npm-configuration.md` + - If "pihole" or "pi-hole" mentioned: Load `networking/pihole-ha-setup.md` (dual Pi-hole HA architecture) + - Note: Comprehensive NPM configuration documented in `server-configs/networking/nginx-proxy-manager-pihole.md` + - Note: Dual Pi-hole deployment: Primary (10.10.0.16), Secondary (10.10.0.226) + - Note: Orbital Sync handles Pi-hole → Pi-hole synchronization (5-minute interval) + - Note: NPM DNS sync script updates both Pi-holes hourly with proxy host entries **Monitoring Keywords** -- "monitoring", "alert", "notification", "discord", "health check", "status", "uptime", "windows reboot", "system monitor" +- "monitoring", "alert", "notification", "discord", "health check", "status", "uptime", "uptime kuma", "uptime-kuma", "status page", "windows reboot", "system monitor", "gpu monitor", "nvidia monitor", "driver monitor" - Load: `monitoring/CONTEXT.md` (technology overview and patterns) - Load: `monitoring/troubleshooting.md` (error handling and debugging) - If working in `/monitoring/scripts/`: Load `monitoring/scripts/CONTEXT.md` (script-specific documentation) + - Note: Uptime Kuma centralized service monitoring on LXC 227 (10.10.0.227) + - Note: Status page at https://status.manticorum.com (internal: http://10.10.0.227:3001) - Note: Windows desktop monitoring with Discord notifications available - Note: Comprehensive Tdarr API monitoring with dataclass-based status tracking + - Note: Jellyfin GPU monitoring with auto-restart (`jellyfin_gpu_monitor.py`) + - Note: NVIDIA driver update monitoring with weekly checks (`nvidia_update_checker.py`) **Productivity Keywords** - "task", "todo", "productivity", "task manager", "brain dump", "focus", "adhd", "task tracking", "context switch", "task dashboard", "n8n", "workflow", "automation", "webhook", "integration", "ko-fi", "payment", "shop order" @@ -71,11 +79,52 @@ When working with specific technologies, automatically load their dedicated cont - Location: LXC 224 (10.10.0.224), gateway at http://10.10.0.224:18789 **Media Server Keywords** -- "jellyfin", "plex", "emby", "media server", "streaming", "watchstate", "watch history" +- "jellyfin", "plex", "emby", "media server", "streaming", "watchstate", "watch history", "nvidia driver", "gpu transcoding", "nvenc" - Load: `media-servers/CONTEXT.md` (technology overview and patterns) - If "jellyfin" mentioned: Load `media-servers/jellyfin-ubuntu-manticore.md` (Jellyfin-specific setup) - Note: Jellyfin on ubuntu-manticore (10.10.0.226) with GPU transcoding - Note: Shares GPU resources with Tdarr - coordinate concurrent usage + - Note: NVIDIA driver packages held to prevent auto-updates; weekly update monitoring via Discord + - Note: GPU health monitoring every 5 minutes with auto-restart capability + +**Media Tools Keywords** +- "media download", "video download", "yt-dlp", "playwright", "pokeflix", "streaming scraper", "web scraping", "media archival" + - Load: `media-tools/CONTEXT.md` (technology overview and patterns) + - Load: `media-tools/troubleshooting.md` (error handling and debugging) + - If working in `/media-tools/scripts/`: Load `media-tools/scripts/CONTEXT.md` (script-specific documentation) + - Note: Browser automation with Playwright for JavaScript-heavy streaming sites + - Note: yt-dlp integration for video downloading + - Note: Resumable downloads with JSON state persistence + - Note: Anti-bot handling with realistic browser behaviors + +**Gaming Keywords** +- "gaming", "steam", "proton", "steam tinker launch", "stl", "ready or not", "gamescope", "gamemode", "dxvk", "wine", "windows games", "linux gaming" + - Load: `gaming/CONTEXT.md` (technology overview and patterns) + - Load: `gaming/troubleshooting.md` (error handling and debugging) + - If working in `/gaming/scripts/`: Load `gaming/scripts/CONTEXT.md` (script-specific documentation) + - Note: Steam Tinker Launch for advanced Proton configuration + - Note: Ready or Not (Game ID: 1144200) specific configs available + - Note: NVIDIA GPU optimizations with NVAPI, DXVK, GameScope + - Note: Config location: ~/.config/steamtinkerlaunch/ + - Note: 12 gaming scripts for RON setup, STL logs, Proton testing + +**Server Configs Keywords** +- "server config", "lxc config", "docker compose deploy", "infrastructure config", "gitea", "n8n config", "foundry config", "home assistant config", "config sync" + - Load: `server-configs/README.md` (technology overview) + - Note: Version-controlled infrastructure configurations for all hosts + - Note: Centralized Docker Compose and VM/LXC configuration management + - Note: sync-configs.sh for automated config deployment and synchronization + - Note: Covers Proxmox LXCs, VMs, and physical servers (ubuntu-manticore) + - Note: Services include: Gitea, n8n, Home Assistant, Foundry VTT, OpenClaw, Uptime Kuma, Discord bots, databases + +**Databases Keywords** +- "database", "sql", "postgres", "mysql", "redis", "mongodb", "db optimization", "query performance", "database backup", "connection pooling", "schema migration" + - Load: `databases/CONTEXT.md` (technology overview and patterns) + - Load: `databases/troubleshooting.md` (error handling and debugging) + - Note: Database design patterns (normalization, indexing, schema versioning) + - Note: Performance optimization (connection pooling, query optimization, caching) + - Note: Backup and recovery strategies (point-in-time recovery, replication) + - Note: Security and access control (privilege management, encryption, audit logging) ### Directory Context Triggers When working in specific directories: @@ -183,6 +232,8 @@ For troubleshooting scenarios, always load both context and troubleshooting file ├── examples/ # Working configurations and templates └── scripts/ # Active automation scripts ├── CONTEXT.md # Script-specific documentation + ├── jellyfin_gpu_monitor.py # Jellyfin GPU health monitoring (5-min checks) + ├── nvidia_update_checker.py # NVIDIA driver update monitoring (weekly) └── windows-desktop/ # Windows reboot monitoring with Discord notifications /productivity/ # Task management and productivity tools diff --git a/monitoring/CONTEXT.md b/monitoring/CONTEXT.md index 64d0d53..10d14df 100644 --- a/monitoring/CONTEXT.md +++ b/monitoring/CONTEXT.md @@ -7,6 +7,7 @@ Comprehensive monitoring and alerting system for home lab infrastructure with fo ### Distributed Monitoring Strategy **Pattern**: Service-specific monitoring with centralized alerting +- **Uptime Kuma**: Centralized service uptime and health monitoring (status page) - **Tdarr Monitoring**: API-based transcoding health checks - **Windows Desktop Monitoring**: Reboot detection and system events - **Network Monitoring**: Connectivity and service availability @@ -45,6 +46,30 @@ curl -X POST "$DISCORD_WEBHOOK" \ - Discord notification integration - System event correlation +### Uptime Kuma (Centralized Uptime Monitoring) +**Purpose**: Centralized service uptime, health checks, and status page for all homelab services +**Location**: LXC 227 (10.10.0.227), Docker container +**URL**: https://status.manticorum.com (internal: http://10.10.0.227:3001) + +**Key Features**: +- HTTP/HTTPS, TCP, DNS, Docker, and ping monitoring +- Built-in Discord notification support +- Public/private status pages +- Multi-protocol health checks with configurable intervals +- Certificate expiration monitoring + +**Infrastructure**: +- Proxmox LXC 227, Ubuntu 22.04, 2 cores, 2GB RAM, 8GB disk +- Docker with AppArmor unconfined (required for Docker-in-LXC) +- Data persisted via Docker named volume (`uptime-kuma-data`) +- Compose config: `server-configs/uptime-kuma/docker-compose/uptime-kuma/` + +**Recommended Monitors**: +- All Docker hosts: Jellyfin, Tdarr, n8n, Gitea, Foundry, Pi-holes, NPM, Discord bots +- Databases: strat-database PostgreSQL instances +- External: Akamai services, SBA website +- Infrastructure: Proxmox API, Home Assistant + ### Network and Service Monitoring **Purpose**: Monitor critical infrastructure availability **Implementation**: diff --git a/server-configs/README.md b/server-configs/README.md index fd50855..6e95d80 100644 --- a/server-configs/README.md +++ b/server-configs/README.md @@ -63,6 +63,10 @@ server-configs/ │ └── docker-compose/ │ └── n8n/ │ +├── uptime-kuma/ # Service uptime monitoring LXC +│ └── docker-compose/ +│ └── uptime-kuma/ +│ ├── akamai/ # Cloud server (Linode) │ └── docker-compose/ │ ├── nginx-proxy-manager/ @@ -85,6 +89,7 @@ server-configs/ | arr-stack | Docker | 10.10.0.221 | Sonarr/Radarr/etc. | | n8n | Docker | 10.10.0.210 | Workflow automation | | gitea | LXC | 10.10.0.225 | Self-hosted Git server + CI/CD | +| uptime-kuma | Docker | 10.10.0.227 | Service uptime monitoring | | akamai | Docker | 172.237.147.99 | Public-facing services | | nobara-desktop | Local | - | Development workstation | diff --git a/server-configs/hosts.yml b/server-configs/hosts.yml index 19a5a67..e1bbf9e 100644 --- a/server-configs/hosts.yml +++ b/server-configs/hosts.yml @@ -24,13 +24,19 @@ hosts: ssh_alias: ubuntu-manticore ip: 10.10.0.226 user: cal - description: "Physical Ubuntu server - media services" + description: "Physical Ubuntu server - media services and secondary DNS" config_paths: docker-compose: /home/cal/docker services: - jellyfin - tdarr - watchstate + - pihole + - orbital-sync + documentation: media-servers/jellyfin-ubuntu-manticore.md + dns_config: + pihole_webui: http://10.10.0.226:8053/admin + role: secondary # Part of dual Pi-hole HA setup # Discord Bots VM (Proxmox) discord-bots: @@ -205,6 +211,21 @@ hosts: dns_sync: /home/cal/scripts/npm-pihole-sync.sh cron: "0 * * * *" # Hourly + # Uptime Kuma LXC (Proxmox) + uptime-kuma: + type: docker + ssh_alias: uptime-kuma + ip: 10.10.0.227 + user: root + vmid: 227 + description: "Uptime Kuma - service uptime monitoring" + config_paths: + docker-compose: /opt/uptime-kuma + services: + - uptime-kuma + web_ui: http://10.10.0.227:3001 + url: https://status.manticorum.com + # Akamai Cloud Server akamai: type: docker diff --git a/server-configs/proxmox/lxc/227.conf b/server-configs/proxmox/lxc/227.conf new file mode 100644 index 0000000..0159797 --- /dev/null +++ b/server-configs/proxmox/lxc/227.conf @@ -0,0 +1,14 @@ +arch: amd64 +cores: 2 +features: nesting=1,keyctl=1 +hostname: uptime-kuma +memory: 2048 +nameserver: 8.8.8.8 +net0: name=eth0,bridge=vmbr0,gw=10.10.0.1,hwaddr=F6:FD:7A:D2:69:60,ip=10.10.0.227/24,type=veth +onboot: 1 +ostype: ubuntu +rootfs: local-lvm:vm-227-disk-0,size=8G +swap: 512 +lxc.apparmor.profile: unconfined +lxc.cgroup2.devices.allow: a +lxc.cap.drop: diff --git a/server-configs/uptime-kuma/docker-compose/uptime-kuma/docker-compose.yml b/server-configs/uptime-kuma/docker-compose/uptime-kuma/docker-compose.yml new file mode 100644 index 0000000..17222cd --- /dev/null +++ b/server-configs/uptime-kuma/docker-compose/uptime-kuma/docker-compose.yml @@ -0,0 +1,13 @@ +services: + uptime-kuma: + image: louislam/uptime-kuma:1 + container_name: uptime-kuma + restart: unless-stopped + ports: + - "3001:3001" + volumes: + - uptime-kuma-data:/app/data + - /var/run/docker.sock:/var/run/docker.sock:ro + +volumes: + uptime-kuma-data: