2026-04-03 20:00:14 +00:00
3 changed files with 246 additions and 4 deletions
--- a/legacy/headless-claude/n8n-workflow-import.json
+++ b/legacy/headless-claude/n8n-workflow-import.json
@ -21,7 +21,7 @@
    {
      "parameters": {
        "operation": "executeCommand",
-        "command": "/root/.local/bin/claude -p \"Run python3 ~/.claude/skills/server-diagnostics/client.py health paper-dynasty and analyze the results. If any containers are not running or there are critical issues, summarize them. Otherwise just say 'All systems healthy'.\" --output-format json --json-schema '{\"type\":\"object\",\"properties\":{\"status\":{\"type\":\"string\",\"enum\":[\"healthy\",\"issues_found\"]},\"summary\":{\"type\":\"string\"},\"root_cause\":{\"type\":\"string\"},\"severity\":{\"type\":\"string\",\"enum\":[\"low\",\"medium\",\"high\",\"critical\"]},\"affected_services\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"actions_taken\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"status\",\"severity\",\"summary\"]}' --allowedTools \"Read,Grep,Glob,Bash(python3 ~/.claude/skills/server-diagnostics/client.py *)\"",
+        "command": "/root/.local/bin/claude -p \"Run python3 ~/.claude/skills/server-diagnostics/client.py health paper-dynasty and analyze the results. If any containers are not running or there are critical issues, summarize them. Otherwise just say 'All systems healthy'.\" --output-format json --json-schema '{\"type\":\"object\",\"properties\":{\"status\":{\"type\":\"string\",\"enum\":[\"healthy\",\"issues_found\"]},\"summary\":{\"type\":\"string\"},\"root_cause\":{\"type\":\"string\"},\"severity\":{\"type\":\"string\",\"enum\":[\"low\",\"medium\",\"high\",\"critical\"]},\"affected_services\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"actions_taken\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}},\"required\":[\"status\",\"severity\",\"summary\"]}' --allowedTools \"Read,Grep,Glob,Bash(python3 ~/.claude/skills/server-diagnostics/client.py *)\" --append-system-prompt \"You are a server diagnostics agent. Use the server-diagnostics skill client.py for all operations. Never run destructive commands.\"",
        "options": {}
      },
      "id": "ssh-claude-code",
@ -75,20 +75,48 @@
      "typeVersion": 2,
      "position": [660, 0]
    },
+    {
+      "parameters": {
+        "operation": "executeCommand",
+        "command": "=/root/.local/bin/claude -p \"The previous health check found issues. Investigate deeper: check container logs, resource usage, and recent events. Provide a detailed root cause analysis and recommended remediation steps.\" --resume \"{{ $json.session_id }}\" --output-format json --json-schema '{\"type\":\"object\",\"properties\":{\"root_cause_detail\":{\"type\":\"string\"},\"container_logs\":{\"type\":\"string\"},\"resource_status\":{\"type\":\"string\"},\"remediation_steps\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"requires_human\":{\"type\":\"boolean\"}},\"required\":[\"root_cause_detail\",\"remediation_steps\",\"requires_human\"]}' --allowedTools \"Read,Grep,Glob,Bash(python3 ~/.claude/skills/server-diagnostics/client.py *)\" --max-turns 15 --append-system-prompt \"You are a server diagnostics agent performing a follow-up investigation. The initial health check found issues. Dig deeper into logs and metrics. Never run destructive commands.\"",
+        "options": {}
+      },
+      "id": "ssh-followup",
+      "name": "Follow Up Diagnostics",
+      "type": "n8n-nodes-base.ssh",
+      "typeVersion": 1,
+      "position": [880, -200],
+      "credentials": {
+        "sshPassword": {
+          "id": "REPLACE_WITH_CREDENTIAL_ID",
+          "name": "Claude Code LXC"
+        }
+      }
+    },
+    {
+      "parameters": {
+        "jsCode": "// Parse follow-up diagnostics response\nconst stdout = $input.first().json.stdout || '';\nconst initial = $('Parse Claude Response').first().json;\n\ntry {\n  const response = JSON.parse(stdout);\n  const data = response.structured_output || JSON.parse(response.result || '{}');\n  \n  return [{\n    json: {\n      ...initial,\n      followup: {\n        root_cause_detail: data.root_cause_detail || 'No detail available',\n        container_logs: data.container_logs || '',\n        resource_status: data.resource_status || '',\n        remediation_steps: data.remediation_steps || [],\n        requires_human: data.requires_human || false,\n        cost_usd: response.total_cost_usd,\n        session_id: response.session_id\n      },\n      total_cost_usd: (initial.cost_usd || 0) + (response.total_cost_usd || 0)\n    }\n  }];\n} catch (e) {\n  return [{\n    json: {\n      ...initial,\n      followup: {\n        error: e.message,\n        root_cause_detail: 'Follow-up parse failed',\n        remediation_steps: [],\n        requires_human: true\n      },\n      total_cost_usd: initial.cost_usd || 0\n    }\n  }];\n}"
+      },
+      "id": "parse-followup",
+      "name": "Parse Follow-up Response",
+      "type": "n8n-nodes-base.code",
+      "typeVersion": 2,
+      "position": [1100, -200]
+    },
    {
      "parameters": {
        "method": "POST",
        "url": "https://discord.com/api/webhooks/1451783909409816763/O9PMDiNt6ZIWRf8HKocIZ_E4vMGV_lEwq50aAiZ9HVFR2UGwO6J1N9_wOm82p0MetIqT",
        "sendBody": true,
        "specifyBody": "json",
-        "jsonBody": "={\n  \"embeds\": [{\n    \"title\": \"{{ $json.severity === 'critical' ? '🔴' : $json.severity === 'high' ? '🟠' : '🟡' }} Server Alert\",\n    \"description\": {{ JSON.stringify($json.summary) }},\n    \"color\": {{ $json.severity === 'critical' ? 15158332 : $json.severity === 'high' ? 15105570 : 16776960 }},\n    \"fields\": [\n      {\n        \"name\": \"Severity\",\n        \"value\": \"{{ $json.severity.toUpperCase() }}\",\n        \"inline\": true\n      },\n      {\n        \"name\": \"Server\",\n        \"value\": \"paper-dynasty (10.10.0.88)\",\n        \"inline\": true\n      },\n      {\n        \"name\": \"Cost\",\n        \"value\": \"${{ $json.cost_usd ? $json.cost_usd.toFixed(4) : '0.0000' }}\",\n        \"inline\": true\n      },\n      {\n        \"name\": \"Root Cause\",\n        \"value\": \"{{ $json.root_cause || 'N/A' }}\",\n        \"inline\": false\n      },\n      {\n        \"name\": \"Affected Services\",\n        \"value\": \"{{ $json.affected_services.length ? $json.affected_services.join(', ') : 'None' }}\",\n        \"inline\": false\n      },\n      {\n        \"name\": \"Actions Taken\",\n        \"value\": \"{{ $json.actions_taken.length ? $json.actions_taken.join('\\n') : 'None' }}\",\n        \"inline\": false\n      }\n    ],\n    \"timestamp\": \"{{ new Date().toISOString() }}\"\n  }]\n}",
+        "jsonBody": "={\n  \"embeds\": [{\n    \"title\": \"{{ $json.severity === 'critical' ? '🔴' : $json.severity === 'high' ? '🟠' : '🟡' }} Server Alert\",\n    \"description\": {{ JSON.stringify($json.summary) }},\n    \"color\": {{ $json.severity === 'critical' ? 15158332 : $json.severity === 'high' ? 15105570 : 16776960 }},\n    \"fields\": [\n      {\n        \"name\": \"Severity\",\n        \"value\": \"{{ $json.severity.toUpperCase() }}\",\n        \"inline\": true\n      },\n      {\n        \"name\": \"Server\",\n        \"value\": \"paper-dynasty (10.10.0.88)\",\n        \"inline\": true\n      },\n      {\n        \"name\": \"Cost\",\n        \"value\": \"${{ $json.total_cost_usd ? $json.total_cost_usd.toFixed(4) : '0.0000' }}\",\n        \"inline\": true\n      },\n      {\n        \"name\": \"Root Cause\",\n        \"value\": {{ JSON.stringify(($json.followup && $json.followup.root_cause_detail) || $json.root_cause || 'N/A') }},\n        \"inline\": false\n      },\n      {\n        \"name\": \"Affected Services\",\n        \"value\": \"{{ $json.affected_services.length ? $json.affected_services.join(', ') : 'None' }}\",\n        \"inline\": false\n      },\n      {\n        \"name\": \"Remediation Steps\",\n        \"value\": {{ JSON.stringify(($json.followup && $json.followup.remediation_steps.length) ? $json.followup.remediation_steps.map((s, i) => (i+1) + '. ' + s).join('\\n') : ($json.actions_taken.length ? $json.actions_taken.join('\\n') : 'None')) }},\n        \"inline\": false\n      },\n      {\n        \"name\": \"Requires Human?\",\n        \"value\": \"{{ ($json.followup && $json.followup.requires_human) ? '⚠️ Yes' : '✅ No' }}\",\n        \"inline\": true\n      }\n    ],\n    \"timestamp\": \"{{ new Date().toISOString() }}\"\n  }]\n}",
        "options": {}
      },
      "id": "discord-alert",
      "name": "Discord Alert",
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.2,
-      "position": [880, -100]
+      "position": [1320, -200]
    },
    {
      "parameters": {
@ -145,7 +173,7 @@
      "main": [
        [
          {
-            "node": "Discord Alert",
+            "node": "Follow Up Diagnostics",
            "type": "main",
            "index": 0
          }
@ -158,6 +186,28 @@
          }
        ]
      ]
+    },
+    "Follow Up Diagnostics": {
+      "main": [
+        [
+          {
+            "node": "Parse Follow-up Response",
+            "type": "main",
+            "index": 0
+          }
+        ]
+      ]
+    },
+    "Parse Follow-up Response": {
+      "main": [
+        [
+          {
+            "node": "Discord Alert",
+            "type": "main",
+            "index": 0
+          }
+        ]
+      ]
    }
  },
  "settings": {
--- a/scheduled-tasks/CONTEXT.md
+++ b/scheduled-tasks/CONTEXT.md
@ -158,6 +158,23 @@ ls -t ~/.local/share/claude-scheduled/logs/backlog-triage/ | head -1
 ~/.config/claude-scheduled/runner.sh backlog-triage
 ```

+## Session Resumption
+
+Tasks can opt into session persistence for multi-step workflows:
+
+```json
+{
+  "session_resumable": true,
+  "resume_last_session": true
+}
+```
+
+When `session_resumable` is `true`, runner.sh saves the `session_id` to `$LOG_DIR/last_session_id` after each run. When `resume_last_session` is also `true`, the next run resumes that session with `--resume`.
+
+Issue-poller and PR-reviewer capture `session_id` in logs and result JSON for manual follow-up.
+
+See also: [Agent SDK Evaluation](agent-sdk-evaluation.md) for CLI vs SDK comparison.
+
 ## Cost Safety

 - Per-task `max_budget_usd` cap — runner.sh detects `error_max_budget_usd` and warns
--- a/scheduled-tasks/agent-sdk-evaluation.md
+++ b/scheduled-tasks/agent-sdk-evaluation.md
@ -0,0 +1,175 @@
+---
+title: "Agent SDK Evaluation — CLI vs Python/TypeScript SDK"
+description: "Comparison of Claude Code CLI invocation (claude -p) vs the native Agent SDK for programmatic use in the headless-claude and claude-scheduled systems."
+type: context
+domain: scheduled-tasks
+tags: [claude-code, sdk, agent-sdk, python, typescript, headless, automation, evaluation]
+---
+
+# Agent SDK Evaluation: CLI vs Python/TypeScript SDK
+
+**Date:** 2026-04-03
+**Status:** Evaluation complete — recommendation below
+**Related:** Issue #3 (headless-claude: Additional Agent SDK improvements)
+
+## 1. Current Approach — CLI via `claude -p`
+
+All headless Claude invocations use the CLI subprocess pattern:
+
+```bash
+claude -p "<prompt>" \
+  --model sonnet \
+  --output-format json \
+  --allowedTools "Read,Grep,Glob" \
+  --append-system-prompt "..." \
+  --max-budget-usd 2.00
+```
+
+**Pros:**
+- Simple to invoke from any language (bash, n8n SSH nodes, systemd units)
+- Uses Claude Max OAuth — no API key needed, no per-token billing
+- Mature and battle-tested in our scheduled-tasks framework
+- CLAUDE.md and settings.json are loaded automatically
+- No runtime dependencies beyond the CLI binary
+
+**Cons:**
+- Structured output requires parsing JSON from stdout
+- Error handling is exit-code-based with stderr parsing
+- No mid-stream observability (streaming requires JSONL parsing)
+- Tool approval is allowlist-only — no dynamic per-call decisions
+- Session resumption requires manual `--resume` flag plumbing
+
+## 2. Python Agent SDK
+
+**Package:** `claude-agent-sdk` (renamed from `claude-code`)
+**Install:** `pip install claude-agent-sdk`
+**Requires:** Python 3.10+, `ANTHROPIC_API_KEY` env var
+
+```python
+from claude_agent_sdk import query, ClaudeAgentOptions
+
+async for message in query(
+    prompt="Diagnose server health",
+    options=ClaudeAgentOptions(
+        allowed_tools=["Read", "Grep", "Bash(python3 *)"],
+        output_format={"type": "json_schema", "schema": {...}},
+        max_budget_usd=2.00,
+    ),
+):
+    if hasattr(message, "result"):
+        print(message.result)
+```
+
+**Key features:**
+- Async generator with typed `SDKMessage` objects (User, Assistant, Result, System)
+- `ClaudeSDKClient` for stateful multi-turn conversations
+- `can_use_tool` callback for dynamic per-call tool approval
+- In-process hooks (`PreToolUse`, `PostToolUse`, `Stop`, etc.)
+- `rewindFiles()` to restore filesystem to any prior message point
+- Typed exception hierarchy (`CLINotFoundError`, `ProcessError`, etc.)
+
+**Limitation:** Shells out to the Claude Code CLI binary — it is NOT a pure HTTP client. The binary must be installed.
+
+## 3. TypeScript Agent SDK
+
+**Package:** `@anthropic-ai/claude-agent-sdk` (renamed from `@anthropic-ai/claude-code`)
+**Install:** `npm install @anthropic-ai/claude-agent-sdk`
+**Requires:** Node 18+, `ANTHROPIC_API_KEY` env var
+
+```typescript
+import { query } from "@anthropic-ai/claude-agent-sdk";
+
+for await (const message of query({
+  prompt: "Diagnose server health",
+  options: {
+    allowedTools: ["Read", "Grep", "Bash(python3 *)"],
+    maxBudgetUsd: 2.00,
+  }
+})) {
+  if ("result" in message) console.log(message.result);
+}
+```
+
+**Key features (superset of Python):**
+- Same async generator pattern
+- `"auto"` permission mode (model classifier per tool call) — TS-only
+- `spawnClaudeCodeProcess` hook for remote/containerized execution
+- `setMcpServers()` for dynamic MCP server swapping mid-session
+- V2 preview: `send()` / `stream()` patterns for simpler multi-turn
+- Bundles the Claude Code binary — no separate install needed
+
+## 4. Comparison Matrix
+
+| Capability | `claude -p` CLI | Python SDK | TypeScript SDK |
+|---|---|---|---|
+| **Auth** | OAuth (Claude Max) | API key only | API key only |
+| **Invocation** | Shell subprocess | Async generator | Async generator |
+| **Structured output** | `--json-schema` flag | Schema in options | Schema in options |
+| **Streaming** | JSONL parsing | Typed messages | Typed messages |
+| **Tool approval** | `--allowedTools` only | `can_use_tool` callback | `canUseTool` callback + auto mode |
+| **Session resume** | `--resume` flag | `resume: sessionId` | `resume: sessionId` |
+| **Cost tracking** | Parse result JSON | `ResultMessage.total_cost_usd` | Same + per-model breakdown |
+| **Error handling** | Exit codes + stderr | Typed exceptions | Typed exceptions |
+| **Hooks** | External shell scripts | In-process callbacks | In-process callbacks |
+| **Custom tools** | Not available | `tool()` decorator | `tool()` + Zod schemas |
+| **Subagents** | Not programmatic | `agents` option | `agents` option |
+| **File rewind** | Not available | `rewindFiles()` | `rewindFiles()` |
+| **MCP servers** | `--mcp-config` file | Inline config object | Inline + dynamic swap |
+| **CLAUDE.md loading** | Automatic | Must opt-in (`settingSources`) | Must opt-in |
+| **Dependencies** | CLI binary | CLI binary + Python | Node 18+ (bundles CLI) |
+
+## 5. Integration Paths
+
+### A. n8n Code Nodes
+
+The n8n Code node supports JavaScript (not TypeScript directly, but the SDK's JS output works). This would replace the current SSH → CLI pattern:
+
+```
+Schedule Trigger → Code Node (JS, uses SDK) → IF → Discord
+```
+
+**Trade-off:** Eliminates the SSH hop to CT 300, but requires `ANTHROPIC_API_KEY` and n8n to have the npm package installed. Current n8n runs in a Docker container on CT 210 — would need the SDK and CLI binary in the image.
+
+### B. Standalone Python Scripts
+
+Replace `claude -p` subprocess calls in custom dispatchers with the Python SDK:
+
+```python
+# Instead of: subprocess.run(["claude", "-p", prompt, ...])
+async for msg in query(prompt=prompt, options=opts):
+    ...
+```
+
+**Trade-off:** Richer error handling and streaming, but our dispatchers are bash scripts, not Python. Would require rewriting `runner.sh` and dispatchers in Python.
+
+### C. Systemd-triggered Tasks (Current Architecture)
+
+Keep systemd timers → bash scripts, but optionally invoke a thin Python wrapper that uses the SDK instead of `claude -p` directly.
+
+**Trade-off:** Adds Python as a dependency for scheduled tasks that currently only need bash + the CLI binary. Marginal benefit unless we need hooks or dynamic tool approval.
+
+## 6. Recommendation
+
+**Stay with CLI invocation for now. Revisit the Python SDK when we need dynamic tool approval or in-process hooks.**
+
+### Rationale
+
+1. **Auth is the blocker.** The SDK requires `ANTHROPIC_API_KEY` (API billing). Our entire scheduled-tasks framework runs on Claude Max OAuth at zero marginal cost. Switching to the SDK means paying per-token for every scheduled task, issue-worker, and PR-reviewer invocation. This alone makes the SDK non-viable for our current architecture.
+
+2. **The CLI covers our needs.** With `--append-system-prompt` (done), `--resume` (this PR), `--json-schema`, and `--allowedTools`, the CLI provides everything we currently need. Session resumption was the last missing piece.
+
+3. **Bash scripts are the right abstraction.** Our runners are launched by systemd timers. Bash + CLI is the natural fit — no runtime dependencies, no async event loops, no package management.
+
+### When to Revisit
+
+- If Anthropic adds OAuth support to the SDK (eliminating the billing difference)
+- If we need dynamic tool approval (e.g., "allow this Bash command but deny that one" at runtime)
+- If we build a long-running Python service that orchestrates multiple Claude sessions (the `ClaudeSDKClient` stateful pattern would be valuable there)
+- If we move to n8n custom nodes written in TypeScript (the TS SDK bundles the CLI binary)
+
+### Migration Path (If Needed Later)
+
+1. Start with the Python SDK in a single task (e.g., `backlog-triage`) as a proof of concept
+2. Create a thin `sdk-runner.py` wrapper that reads the same `settings.json` and `prompt.md` files
+3. Swap the systemd unit's `ExecStart` from `runner.sh` to `sdk-runner.py`
+4. Expand to other tasks if the POC proves valuable