Compare commits

..

2 Commits

Author SHA1 Message Date
Cal Corum
0e5c3c2b3b feat: add monthly Docker prune cron Ansible playbook (#29)
All checks were successful
Auto-merge docs-only PRs / auto-merge-docs (pull_request) Successful in 2s
Closes #29

Deploys /etc/cron.monthly/docker-prune to all six Docker hosts via
Ansible. Uses 720h (30-day) age filter on containers and images, with
volume pruning exempt for `keep`-labeled volumes.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 16:33:26 -05:00
Cal Corum
7a0c264f27 feat: add monthly Proxmox maintenance reboot automation (#26)
Establishes a first-Sunday-of-the-month maintenance window orchestrated
by Ansible on LXC 304. Split into two playbooks to handle the self-reboot
paradox (the controller is a guest on the host being rebooted):

- monthly-reboot.yml: snapshots, tiered shutdown with per-guest polling,
  fire-and-forget host reboot
- post-reboot-startup.yml: controlled tiered startup with staggered delays,
  Pi-hole UDP DNS fix, validation, and snapshot cleanup

Also fixes onboot:1 on VM 109, LXC 221, LXC 223 and creates a recurring
Google Calendar event for the maintenance window.

Closes #26

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-03 16:17:55 -05:00
5 changed files with 58 additions and 110 deletions

View File

@ -0,0 +1,55 @@
---
# Monthly Docker Prune — Deploy Cleanup Cron to All Docker Hosts
#
# Deploys /etc/cron.monthly/docker-prune to each VM running Docker.
# The script prunes stopped containers, unused images, and orphaned volumes
# older than 30 days (720h). Volumes labeled `keep` are exempt.
#
# Resolves accumulated disk waste from stopped containers and stale images.
# The `--filter "until=720h"` age gate prevents removing recently-pulled
# images that haven't started yet. `docker image prune -a` only removes
# images not referenced by any container (running or stopped), so the
# age filter adds an extra safety margin.
#
# Hosts: VM 106 (docker-home), VM 110 (discord-bots), VM 112 (databases-bots),
# VM 115 (docker-sba), VM 116 (docker-home-servers), manticore
#
# Controller: LXC 304 (ansible-controller) at 10.10.0.232
#
# Usage:
# # Dry run (shows what would change, skips writes)
# ansible-playbook /opt/ansible/playbooks/docker-prune.yml --check
#
# # Single host
# ansible-playbook /opt/ansible/playbooks/docker-prune.yml --limit docker-sba
#
# # All Docker hosts
# ansible-playbook /opt/ansible/playbooks/docker-prune.yml
#
# To undo: rm /etc/cron.monthly/docker-prune on target hosts
- name: Deploy Docker monthly prune cron to all Docker hosts
hosts: docker-home:discord-bots:databases-bots:docker-sba:docker-home-servers:manticore
become: true
tasks:
- name: Deploy docker-prune cron script
ansible.builtin.copy:
dest: /etc/cron.monthly/docker-prune
owner: root
group: root
mode: "0755"
content: |
#!/bin/bash
# Monthly Docker cleanup — deployed by Ansible (issue #29)
# Prunes stopped containers, unused images (>30 days), and orphaned volumes.
# Volumes labeled `keep` are exempt from volume pruning.
set -euo pipefail
docker container prune -f --filter "until=720h"
docker image prune -a -f --filter "until=720h"
docker volume prune -f --filter "label!=keep"
- name: Verify docker-prune script is executable
ansible.builtin.command: test -x /etc/cron.monthly/docker-prune
changed_when: false

View File

@ -1,80 +0,0 @@
---
# gitea-cleanup.yml — Weekly cleanup of Gitea server disk space
#
# Removes stale Docker buildx volumes, unused images, Gitea repo-archive
# cache, and vacuums journal logs to prevent disk exhaustion on LXC 225.
#
# Schedule: Weekly via systemd timer on LXC 304 (ansible-controller)
#
# Usage:
# ansible-playbook /opt/ansible/playbooks/gitea-cleanup.yml # full run
# ansible-playbook /opt/ansible/playbooks/gitea-cleanup.yml --check # dry run
- name: Gitea server disk cleanup
hosts: gitea
gather_facts: false
tasks:
- name: Check current disk usage
ansible.builtin.shell: df --output=pcent / | tail -1
register: disk_before
changed_when: false
- name: Display current disk usage
ansible.builtin.debug:
msg: "Disk usage before cleanup: {{ disk_before.stdout | trim }}"
- name: Clear Gitea repo-archive cache
ansible.builtin.find:
paths: /var/lib/gitea/data/repo-archive
file_type: any
register: repo_archive_files
- name: Remove repo-archive files
ansible.builtin.file:
path: "{{ item.path }}"
state: absent
loop: "{{ repo_archive_files.files }}"
loop_control:
label: "{{ item.path | basename }}"
when: repo_archive_files.files | length > 0
- name: Remove orphaned Docker buildx volumes
ansible.builtin.shell: |
volumes=$(docker volume ls -q --filter name=buildx_buildkit)
if [ -n "$volumes" ]; then
echo "$volumes" | xargs docker volume rm 2>&1
else
echo "No buildx volumes to remove"
fi
register: buildx_cleanup
changed_when: "'No buildx volumes' not in buildx_cleanup.stdout"
- name: Prune unused Docker images
ansible.builtin.command: docker image prune -af
register: image_prune
changed_when: "'Total reclaimed space: 0B' not in image_prune.stdout"
- name: Prune unused Docker volumes
ansible.builtin.command: docker volume prune -f
register: volume_prune
changed_when: "'Total reclaimed space: 0B' not in volume_prune.stdout"
- name: Vacuum journal logs to 500M
ansible.builtin.command: journalctl --vacuum-size=500M
register: journal_vacuum
changed_when: "'freed 0B' not in journal_vacuum.stderr"
- name: Check disk usage after cleanup
ansible.builtin.shell: df --output=pcent / | tail -1
register: disk_after
changed_when: false
- name: Display cleanup summary
ansible.builtin.debug:
msg: >-
Cleanup complete.
Disk: {{ disk_before.stdout | default('N/A') | trim }} → {{ disk_after.stdout | default('N/A') | trim }}.
Buildx: {{ (buildx_cleanup.stdout_lines | default(['N/A'])) | last }}.
Images: {{ (image_prune.stdout_lines | default(['N/A'])) | last }}.
Journal: {{ (journal_vacuum.stderr_lines | default(['N/A'])) | last }}.

View File

@ -5,7 +5,7 @@
# to collect system metrics, then generates a summary report.
#
# Usage:
# homelab-audit.sh [--output-dir DIR] [--hosts label:ip,label:ip,...]
# homelab-audit.sh [--output-dir DIR]
#
# Environment overrides:
# STUCK_PROC_CPU_WARN CPU% at which a D-state process is flagged (default: 10)
@ -29,6 +29,7 @@ LOAD_WARN=2.0
MEM_WARN=85
ZOMBIE_WARN=1
SWAP_WARN=512
HOSTS_FILTER="" # comma-separated host list from --hosts; empty = audit all
JSON_OUTPUT=0 # set to 1 by --json

View File

@ -93,34 +93,6 @@ else
fail "disk_usage" "expected 'N /path', got: '$result'"
fi
# --- --hosts flag parsing ---
echo ""
echo "=== --hosts argument parsing tests ==="
# Single host
input="vm-115:10.10.0.88"
IFS=',' read -ra entries <<<"$input"
label="${entries[0]%%:*}"
addr="${entries[0]#*:}"
if [[ "$label" == "vm-115" && "$addr" == "10.10.0.88" ]]; then
pass "--hosts single entry parsed: $label $addr"
else
fail "--hosts single" "expected 'vm-115 10.10.0.88', got: '$label $addr'"
fi
# Multiple hosts
input="vm-115:10.10.0.88,lxc-225:10.10.0.225"
IFS=',' read -ra entries <<<"$input"
label1="${entries[0]%%:*}"
addr1="${entries[0]#*:}"
label2="${entries[1]%%:*}"
addr2="${entries[1]#*:}"
if [[ "$label1" == "vm-115" && "$addr1" == "10.10.0.88" && "$label2" == "lxc-225" && "$addr2" == "10.10.0.225" ]]; then
pass "--hosts multi entry parsed: $label1 $addr1, $label2 $addr2"
else
fail "--hosts multi" "unexpected parse result"
fi
echo ""
echo "=== Results: $PASS passed, $FAIL failed ==="
((FAIL == 0))

View File

@ -12,5 +12,5 @@ ostype: l26
scsi0: local-lvm:vm-115-disk-0,size=256G
scsihw: virtio-scsi-pci
smbios1: uuid=19be98ee-f60d-473d-acd2-9164717fcd11
sockets: 1
sockets: 2
vmgenid: 682dfeab-8c63-4f0b-8ed2-8828c2f808ef