Original planning folder (no git repo) for the server diagnostics system that runs on CT 300. Live deployment is on claude-runner; this preserves the Agent SDK reference, PRD with Phase 2/3 roadmap, and N8N workflow designs. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
382 lines
12 KiB
JSON
382 lines
12 KiB
JSON
{
|
|
"project": {
|
|
"name": "N8N-to-Claude Code Automated Server Troubleshooting",
|
|
"version": "1.0.0",
|
|
"created": "2025-12-19",
|
|
"updated": "2025-12-20"
|
|
},
|
|
"phases": [
|
|
{
|
|
"id": "phase-1",
|
|
"name": "Foundation",
|
|
"description": "Core infrastructure and basic Docker diagnostics",
|
|
"status": "completed",
|
|
"completed_date": "2025-12-20"
|
|
},
|
|
{
|
|
"id": "phase-2",
|
|
"name": "Enhancement",
|
|
"description": "Complete diagnostic library and system-level monitoring",
|
|
"status": "pending"
|
|
},
|
|
{
|
|
"id": "phase-3",
|
|
"name": "Expansion",
|
|
"description": "Full homelab coverage and Proxmox integration",
|
|
"status": "pending"
|
|
}
|
|
],
|
|
"infrastructure": {
|
|
"claude_code_lxc": {
|
|
"ct_id": 300,
|
|
"hostname": "claude-code",
|
|
"ip": "10.10.0.148",
|
|
"os": "Ubuntu 20.04",
|
|
"resources": "2 vCPU, 2GB RAM, 16GB disk",
|
|
"claude_version": "2.0.74",
|
|
"auth_method": "Max subscription (OAuth)"
|
|
},
|
|
"n8n": {
|
|
"ct_id": 210,
|
|
"hostname": "docker-n8n-lxc",
|
|
"ip": "10.10.0.210",
|
|
"deployment": "Docker container in LXC"
|
|
},
|
|
"target_servers": [
|
|
{
|
|
"name": "paper-dynasty",
|
|
"ip": "10.10.0.88",
|
|
"ssh_user": "cal",
|
|
"description": "Paper Dynasty Discord bots and services",
|
|
"containers": [
|
|
"paper-dynasty_discord-app_1",
|
|
"paper-dynasty_db_1",
|
|
"paper-dynasty_adminer_1",
|
|
"sba-website_sba-web_1",
|
|
"sba-ghost_sba-ghost_1"
|
|
]
|
|
}
|
|
],
|
|
"notifications": {
|
|
"discord_webhook": "configured",
|
|
"channel": "server-alerts"
|
|
}
|
|
},
|
|
"tasks": [
|
|
{
|
|
"id": "lxc-provision",
|
|
"name": "Provision Claude Code LXC",
|
|
"description": "Create dedicated LXC container on Proxmox for Claude Code (2 vCPU, 2GB RAM, 16GB disk)",
|
|
"phase": "phase-1",
|
|
"dependencies": [],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "CT 300, Ubuntu 20.04 (22.04 template failed)"
|
|
},
|
|
{
|
|
"id": "lxc-packages",
|
|
"name": "Install LXC Dependencies",
|
|
"description": "Install Node.js, Python 3.8, and other required packages on the LXC",
|
|
"phase": "phase-1",
|
|
"dependencies": ["lxc-provision"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "Python 3.8 (Ubuntu 20.04 default), PyYAML installed"
|
|
},
|
|
{
|
|
"id": "claude-code-install",
|
|
"name": "Install Claude Code CLI",
|
|
"description": "Install Claude Code CLI using native installer and authenticate with Max subscription",
|
|
"phase": "phase-1",
|
|
"dependencies": ["lxc-packages"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "v2.0.74 installed at /root/.local/bin/claude, OAuth device code auth"
|
|
},
|
|
{
|
|
"id": "ssh-keys-generate",
|
|
"name": "Generate SSH Key Pair",
|
|
"description": "Generate dedicated SSH key pair for Claude Code diagnostics",
|
|
"phase": "phase-1",
|
|
"dependencies": ["lxc-provision"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "~/.ssh/claude_diagnostics_key (ed25519)"
|
|
},
|
|
{
|
|
"id": "ssh-keys-install",
|
|
"name": "Install SSH Keys on Target Server",
|
|
"description": "Add public key to Paper Dynasty server (10.10.0.88)",
|
|
"phase": "phase-1",
|
|
"dependencies": ["ssh-keys-generate"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "Key installed for user 'cal' on paper-dynasty server"
|
|
},
|
|
{
|
|
"id": "skill-structure",
|
|
"name": "Create Skill Directory Structure",
|
|
"description": "Create ~/.claude/skills/server-diagnostics/ with SKILL.md, client.py, config.yaml",
|
|
"phase": "phase-1",
|
|
"dependencies": ["claude-code-install"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20"
|
|
},
|
|
{
|
|
"id": "skill-skillmd",
|
|
"name": "Write SKILL.md Context File",
|
|
"description": "Create comprehensive SKILL.md with troubleshooting workflows, command references, and usage instructions",
|
|
"phase": "phase-1",
|
|
"dependencies": ["skill-structure"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20"
|
|
},
|
|
{
|
|
"id": "skill-config",
|
|
"name": "Write config.yaml",
|
|
"description": "Create server inventory, command whitelist, and container configuration",
|
|
"phase": "phase-1",
|
|
"dependencies": ["skill-structure"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "Paper Dynasty server with 5 containers configured"
|
|
},
|
|
{
|
|
"id": "skill-client",
|
|
"name": "Implement Python Diagnostic Client",
|
|
"description": "Implement ServerDiagnostics class with SSH, Docker operations, and CLI interface",
|
|
"phase": "phase-1",
|
|
"dependencies": ["skill-config", "ssh-keys-install"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "Python 3.8 compatible, Go template format for Docker 20.10"
|
|
},
|
|
{
|
|
"id": "settings-json",
|
|
"name": "Configure settings.json Allow/Deny Lists",
|
|
"description": "Add permission rules to Claude Code settings.json for command control",
|
|
"phase": "phase-1",
|
|
"dependencies": ["claude-code-install"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20"
|
|
},
|
|
{
|
|
"id": "n8n-ssh-access",
|
|
"name": "Configure N8N SSH Access to Claude Code LXC",
|
|
"description": "Set up SSH from N8N LXC to Claude Code LXC",
|
|
"phase": "phase-1",
|
|
"dependencies": ["lxc-provision"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "SSH key n8n_to_claude generated and installed"
|
|
},
|
|
{
|
|
"id": "n8n-workflow",
|
|
"name": "Create N8N Health Check Workflow",
|
|
"description": "Two-stage workflow: free health check script, Claude remediation only on issues",
|
|
"phase": "phase-1",
|
|
"dependencies": ["n8n-ssh-access"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "Cost-optimized: $0 for healthy checks, ~$0.10-0.15 for remediation"
|
|
},
|
|
{
|
|
"id": "wrapper-scripts",
|
|
"name": "Create Wrapper Scripts",
|
|
"description": "health-check.sh (free) and remediate.sh (Claude) for N8N invocation",
|
|
"phase": "phase-1",
|
|
"dependencies": ["skill-client"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "Scripts handle quoting issues and provide clean interface for N8N"
|
|
},
|
|
{
|
|
"id": "discord-webhook",
|
|
"name": "Set Up Discord Webhook",
|
|
"description": "Configure Discord webhook for server alerts",
|
|
"phase": "phase-1",
|
|
"dependencies": [],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20"
|
|
},
|
|
{
|
|
"id": "n8n-discord",
|
|
"name": "Add Discord Notification to Workflow",
|
|
"description": "Add Discord webhook node with formatted alerts",
|
|
"phase": "phase-1",
|
|
"dependencies": ["n8n-workflow", "discord-webhook"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "Bullet point format (Discord doesn't support markdown tables)"
|
|
},
|
|
{
|
|
"id": "auto-remediation",
|
|
"name": "Implement Auto-Remediation",
|
|
"description": "Claude automatically restarts containers with restart_allowed: true",
|
|
"phase": "phase-1",
|
|
"dependencies": ["n8n-workflow"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "Successfully tested with paper-dynasty_adminer_1"
|
|
},
|
|
{
|
|
"id": "e2e-test-phase1",
|
|
"name": "End-to-End Phase 1 Test",
|
|
"description": "Stop a Docker container, verify full pipeline: detection -> diagnosis -> remediation -> notification",
|
|
"phase": "phase-1",
|
|
"dependencies": ["n8n-discord", "auto-remediation"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "Full pipeline working: stopped adminer -> detected -> restarted -> Discord alert"
|
|
},
|
|
{
|
|
"id": "production-activation",
|
|
"name": "Activate Workflow for Production",
|
|
"description": "Enable N8N workflow to run on 5-minute schedule",
|
|
"phase": "phase-1",
|
|
"dependencies": ["e2e-test-phase1"],
|
|
"completed": true,
|
|
"tested": true,
|
|
"completed_date": "2025-12-20",
|
|
"notes": "Running live every 5 minutes"
|
|
},
|
|
{
|
|
"id": "skill-commands-expand",
|
|
"name": "Expand Diagnostic Command Library",
|
|
"description": "Add network diagnostics, port checks, service status, and more system commands",
|
|
"phase": "phase-2",
|
|
"dependencies": ["e2e-test-phase1"],
|
|
"completed": false,
|
|
"tested": false
|
|
},
|
|
{
|
|
"id": "containers-expand",
|
|
"name": "Add More Containers to Monitoring",
|
|
"description": "Expand docker_containers list in config.yaml to include all critical containers",
|
|
"phase": "phase-2",
|
|
"dependencies": ["e2e-test-phase1"],
|
|
"completed": false,
|
|
"tested": false
|
|
},
|
|
{
|
|
"id": "multi-server",
|
|
"name": "Add Multi-Server Support",
|
|
"description": "Expand to monitor additional servers beyond Paper Dynasty",
|
|
"phase": "phase-2",
|
|
"dependencies": ["e2e-test-phase1"],
|
|
"completed": false,
|
|
"tested": false
|
|
},
|
|
{
|
|
"id": "alert-dedup",
|
|
"name": "Implement Alert Deduplication",
|
|
"description": "Add cooldown/deduplication logic to prevent alert fatigue",
|
|
"phase": "phase-2",
|
|
"dependencies": ["e2e-test-phase1"],
|
|
"completed": false,
|
|
"tested": false
|
|
},
|
|
{
|
|
"id": "memorygraph-integration",
|
|
"name": "Integrate MemoryGraph with Skill",
|
|
"description": "Add MemoryGraph recall/store calls to SKILL.md instructions for pattern learning",
|
|
"phase": "phase-2",
|
|
"dependencies": ["e2e-test-phase1"],
|
|
"completed": false,
|
|
"tested": false
|
|
},
|
|
{
|
|
"id": "nas-reports",
|
|
"name": "Configure NAS Report Storage",
|
|
"description": "Set up NAS mount and add report saving to N8N workflow",
|
|
"phase": "phase-2",
|
|
"dependencies": ["e2e-test-phase1"],
|
|
"completed": false,
|
|
"tested": false
|
|
},
|
|
{
|
|
"id": "e2e-test-phase2",
|
|
"name": "End-to-End Phase 2 Test",
|
|
"description": "Test 10+ different error scenarios for comprehensive coverage",
|
|
"phase": "phase-2",
|
|
"dependencies": ["skill-commands-expand", "containers-expand", "alert-dedup", "memorygraph-integration"],
|
|
"completed": false,
|
|
"tested": false
|
|
},
|
|
{
|
|
"id": "proxmox-skill-extend",
|
|
"name": "Extend Existing Proxmox Skill",
|
|
"description": "Integrate server-diagnostics with existing Proxmox skill for VM/LXC monitoring",
|
|
"phase": "phase-3",
|
|
"dependencies": ["e2e-test-phase2"],
|
|
"completed": false,
|
|
"tested": false
|
|
},
|
|
{
|
|
"id": "nas-archival",
|
|
"name": "Implement Report Archival and Cleanup",
|
|
"description": "Add scheduled cleanup of old reports on NAS",
|
|
"phase": "phase-3",
|
|
"dependencies": ["nas-reports"],
|
|
"completed": false,
|
|
"tested": false
|
|
},
|
|
{
|
|
"id": "documentation",
|
|
"name": "Write Documentation",
|
|
"description": "Create setup guide, runbooks, and troubleshooting documentation",
|
|
"phase": "phase-3",
|
|
"dependencies": ["e2e-test-phase2"],
|
|
"completed": false,
|
|
"tested": false
|
|
},
|
|
{
|
|
"id": "e2e-test-phase3",
|
|
"name": "End-to-End Phase 3 Test",
|
|
"description": "Full homelab coverage test with all servers and services",
|
|
"phase": "phase-3",
|
|
"dependencies": ["proxmox-skill-extend", "nas-archival", "documentation"],
|
|
"completed": false,
|
|
"tested": false
|
|
}
|
|
],
|
|
"bonuses_completed": [
|
|
{
|
|
"description": "Disabled avahi-daemon (was consuming 67+ hours CPU time)",
|
|
"date": "2025-12-20",
|
|
"server": "paper-dynasty"
|
|
},
|
|
{
|
|
"description": "Disabled gdm/GNOME desktop services (~12% CPU + 180MB RAM recovered)",
|
|
"date": "2025-12-20",
|
|
"server": "paper-dynasty"
|
|
}
|
|
],
|
|
"metadata": {
|
|
"total_tasks": 29,
|
|
"phase_1_tasks": 18,
|
|
"phase_1_completed": 18,
|
|
"phase_2_tasks": 7,
|
|
"phase_2_completed": 0,
|
|
"phase_3_tasks": 4,
|
|
"phase_3_completed": 0
|
|
}
|
|
}
|