claude-home/monitoring/scripts/test-audit-collectors.sh
Cal Corum f28dfeb4bf
All checks were successful
Auto-merge docs-only PRs / auto-merge-docs (pull_request) Successful in 3s
feat: add zombie parent, swap, and OOM metrics to audit; harden Tdarr containers
Extend homelab-audit.sh collector with zombie_parents(), swap_mb(), and
oom_events() functions so the audit identifies which process spawns zombies,
flags high swap usage, and reports recent OOM kills. Add init: true to both
Tdarr docker-compose services so tini reaps orphaned ffmpeg children, and
cap tdarr-node at 28g RAM / 30g total to prevent unbounded memory use.

Closes #30

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 21:02:05 -05:00

100 lines
2.9 KiB
Bash

#!/usr/bin/env bash
# test-audit-collectors.sh — validates homelab-audit.sh collector output format
#
# Extracts each collector function from the audit script's COLLECTOR_SCRIPT
# heredoc and runs it locally, checking that output matches the expected
# key=value format. Exits non-zero on first failure.
set -euo pipefail
PASS=0
FAIL=0
pass() {
((PASS++)) || true
echo " PASS: $1"
}
fail() {
((FAIL++)) || true
echo " FAIL: $1$2"
}
echo "=== Collector output format tests ==="
# Run each collector function locally and validate output format
# These functions are designed to work on any Linux host
# --- cpu_load ---
result=$(uptime | awk -F'load average:' '{print $2}' | awk -F'[, ]+' '{print $2}')
if [[ "$result" =~ ^[0-9]+\.?[0-9]*$ ]]; then
pass "cpu_load returns numeric value: $result"
else
fail "cpu_load" "expected numeric, got: '$result'"
fi
# --- mem_pct ---
result=$(free | awk '/^Mem:/ {printf "%.0f", $3/$2*100}')
if [[ "$result" =~ ^[0-9]+$ ]] && ((result >= 0 && result <= 100)); then
pass "mem_pct returns percentage: $result"
else
fail "mem_pct" "expected 0-100, got: '$result'"
fi
# --- zombie_count ---
result=$(ps -eo stat= | grep -c "^Z" || true)
if [[ "$result" =~ ^[0-9]+$ ]]; then
pass "zombie_count returns integer: $result"
else
fail "zombie_count" "expected integer, got: '$result'"
fi
# --- zombie_parents ---
# May be empty if no zombies — that's valid
result=$(ps -eo pid=,ppid=,stat= | awk '$3 ~ /^Z/ {print $2}' | sort -u |
xargs -I{} ps -o comm= -p {} 2>/dev/null | paste -sd, || true)
if [[ -z "$result" || "$result" =~ ^[a-zA-Z0-9_.,/-]+$ ]]; then
pass "zombie_parents returns csv or empty: '${result:-<empty>}'"
else
fail "zombie_parents" "unexpected format: '$result'"
fi
# --- swap_mb ---
result=$(free | awk '/^Swap:/ {printf "%.0f", $3/1024}')
if [[ "$result" =~ ^[0-9]+$ ]]; then
pass "swap_mb returns integer MB: $result"
else
fail "swap_mb" "expected integer, got: '$result'"
fi
# --- oom_events ---
result=$(journalctl -k --since "7 days ago" 2>/dev/null | grep -ci "out of memory") || true
result="${result:-0}"
if [[ "$result" =~ ^[0-9]+$ ]]; then
pass "oom_events returns integer: $result"
else
fail "oom_events" "expected integer, got: '$result'"
fi
# --- stuck_procs ---
# May be empty — that's valid
result=$(ps -eo stat=,pcpu=,comm= |
awk '$1 ~ /^D/ && $2+0 >= 10 {print $3}' | paste -sd, || true)
if [[ -z "$result" || "$result" =~ ^[a-zA-Z0-9_.,/-]+$ ]]; then
pass "stuck_procs returns csv or empty: '${result:-<empty>}'"
else
fail "stuck_procs" "unexpected format: '$result'"
fi
# --- disk_usage format ---
result=$(df --output=pcent,target -x tmpfs -x devtmpfs 2>/dev/null | tail -n +2 | head -1 |
while read -r pct mnt; do echo "${pct%%%} $mnt"; done)
if [[ "$result" =~ ^[0-9]+\ / ]]; then
pass "disk_usage returns 'pct mount' format: $result"
else
fail "disk_usage" "expected 'N /path', got: '$result'"
fi
echo ""
echo "=== Results: $PASS passed, $FAIL failed ==="
((FAIL == 0))