feat: add monthly Proxmox maintenance reboot automation (#26)
All checks were successful
Reindex Knowledge Base / reindex (push) Successful in 2s
All checks were successful
Reindex Knowledge Base / reindex (push) Successful in 2s
Establishes a first-Sunday-of-the-month maintenance window orchestrated by Ansible on LXC 304. Split into two playbooks to handle the self-reboot paradox (the controller is a guest on the host being rebooted): - monthly-reboot.yml: snapshots, tiered shutdown with per-guest polling, fire-and-forget host reboot - post-reboot-startup.yml: controlled tiered startup with staggered delays, Pi-hole UDP DNS fix, validation, and snapshot cleanup Also fixes onboot:1 on VM 109, LXC 221, LXC 223 and creates a recurring Google Calendar event for the maintenance window. Closes #26 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
fdc44acb28
commit
29a20fbe06
265
ansible/playbooks/monthly-reboot.yml
Normal file
265
ansible/playbooks/monthly-reboot.yml
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
---
|
||||||
|
# Monthly Proxmox Maintenance Reboot — Shutdown & Reboot
|
||||||
|
#
|
||||||
|
# Orchestrates a graceful shutdown of all guests in dependency order,
|
||||||
|
# then issues a fire-and-forget reboot to the Proxmox host.
|
||||||
|
#
|
||||||
|
# After the host reboots, LXC 304 auto-starts via onboot:1 and the
|
||||||
|
# post-reboot-startup.yml playbook runs automatically via the
|
||||||
|
# ansible-post-reboot.service systemd unit (triggered by @reboot).
|
||||||
|
#
|
||||||
|
# Schedule: 1st Sunday of each month, 08:00 UTC (3 AM ET)
|
||||||
|
# Controller: LXC 304 (ansible-controller) at 10.10.0.232
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# # Dry run
|
||||||
|
# ansible-playbook /opt/ansible/playbooks/monthly-reboot.yml --check
|
||||||
|
#
|
||||||
|
# # Full execution
|
||||||
|
# ansible-playbook /opt/ansible/playbooks/monthly-reboot.yml
|
||||||
|
#
|
||||||
|
# # Shutdown only (skip the host reboot)
|
||||||
|
# ansible-playbook /opt/ansible/playbooks/monthly-reboot.yml --tags shutdown
|
||||||
|
#
|
||||||
|
# Note: VM 109 (homeassistant) is excluded from Ansible inventory
|
||||||
|
# (self-managed via HA Supervisor) but is included in pvesh start/stop.
|
||||||
|
|
||||||
|
- name: Pre-reboot health check and snapshots
|
||||||
|
hosts: pve-node
|
||||||
|
gather_facts: false
|
||||||
|
tags: [pre-reboot, shutdown]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Check Proxmox cluster health
|
||||||
|
ansible.builtin.command: pvesh get /cluster/status --output-format json
|
||||||
|
register: cluster_status
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Get list of running QEMU VMs
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
pvesh get /nodes/proxmox/qemu --output-format json |
|
||||||
|
python3 -c "import sys,json; [print(vm['vmid']) for vm in json.load(sys.stdin) if vm.get('status')=='running']"
|
||||||
|
register: running_vms
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Get list of running LXC containers
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
pvesh get /nodes/proxmox/lxc --output-format json |
|
||||||
|
python3 -c "import sys,json; [print(ct['vmid']) for ct in json.load(sys.stdin) if ct.get('status')=='running']"
|
||||||
|
register: running_lxcs
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Display running guests
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Running VMs: {{ running_vms.stdout_lines }} | Running LXCs: {{ running_lxcs.stdout_lines }}"
|
||||||
|
|
||||||
|
- name: Snapshot running VMs
|
||||||
|
ansible.builtin.command: >
|
||||||
|
pvesh create /nodes/proxmox/qemu/{{ item }}/snapshot
|
||||||
|
--snapname pre-maintenance-{{ lookup('pipe', 'date +%Y-%m-%d') }}
|
||||||
|
--description "Auto snapshot before monthly maintenance reboot"
|
||||||
|
loop: "{{ running_vms.stdout_lines }}"
|
||||||
|
when: running_vms.stdout_lines | length > 0
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Snapshot running LXCs
|
||||||
|
ansible.builtin.command: >
|
||||||
|
pvesh create /nodes/proxmox/lxc/{{ item }}/snapshot
|
||||||
|
--snapname pre-maintenance-{{ lookup('pipe', 'date +%Y-%m-%d') }}
|
||||||
|
--description "Auto snapshot before monthly maintenance reboot"
|
||||||
|
loop: "{{ running_lxcs.stdout_lines }}"
|
||||||
|
when: running_lxcs.stdout_lines | length > 0
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: "Shutdown Tier 4 — Media & Others"
|
||||||
|
hosts: pve-node
|
||||||
|
gather_facts: false
|
||||||
|
tags: [shutdown]
|
||||||
|
|
||||||
|
vars:
|
||||||
|
tier4_vms: [109]
|
||||||
|
# LXC 303 (mcp-gateway) is onboot=0 and operator-managed — not included here
|
||||||
|
tier4_lxcs: [221, 222, 223, 302]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Shutdown Tier 4 VMs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/qemu/{{ item }}/status/shutdown
|
||||||
|
loop: "{{ tier4_vms }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Shutdown Tier 4 LXCs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/lxc/{{ item }}/status/shutdown
|
||||||
|
loop: "{{ tier4_lxcs }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Wait for Tier 4 VMs to stop
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
pvesh get /nodes/proxmox/qemu/{{ item }}/status/current --output-format json |
|
||||||
|
python3 -c "import sys,json; print(json.load(sys.stdin).get('status','unknown'))"
|
||||||
|
register: t4_vm_status
|
||||||
|
until: t4_vm_status.stdout.strip() == "stopped"
|
||||||
|
retries: 12
|
||||||
|
delay: 5
|
||||||
|
loop: "{{ tier4_vms }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Wait for Tier 4 LXCs to stop
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
pvesh get /nodes/proxmox/lxc/{{ item }}/status/current --output-format json |
|
||||||
|
python3 -c "import sys,json; print(json.load(sys.stdin).get('status','unknown'))"
|
||||||
|
register: t4_lxc_status
|
||||||
|
until: t4_lxc_status.stdout.strip() == "stopped"
|
||||||
|
retries: 12
|
||||||
|
delay: 5
|
||||||
|
loop: "{{ tier4_lxcs }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: "Shutdown Tier 3 — Applications"
|
||||||
|
hosts: pve-node
|
||||||
|
gather_facts: false
|
||||||
|
tags: [shutdown]
|
||||||
|
|
||||||
|
vars:
|
||||||
|
tier3_vms: [115, 110]
|
||||||
|
tier3_lxcs: [301]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Shutdown Tier 3 VMs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/qemu/{{ item }}/status/shutdown
|
||||||
|
loop: "{{ tier3_vms }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Shutdown Tier 3 LXCs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/lxc/{{ item }}/status/shutdown
|
||||||
|
loop: "{{ tier3_lxcs }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Wait for Tier 3 VMs to stop
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
pvesh get /nodes/proxmox/qemu/{{ item }}/status/current --output-format json |
|
||||||
|
python3 -c "import sys,json; print(json.load(sys.stdin).get('status','unknown'))"
|
||||||
|
register: t3_vm_status
|
||||||
|
until: t3_vm_status.stdout.strip() == "stopped"
|
||||||
|
retries: 12
|
||||||
|
delay: 5
|
||||||
|
loop: "{{ tier3_vms }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Wait for Tier 3 LXCs to stop
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
pvesh get /nodes/proxmox/lxc/{{ item }}/status/current --output-format json |
|
||||||
|
python3 -c "import sys,json; print(json.load(sys.stdin).get('status','unknown'))"
|
||||||
|
register: t3_lxc_status
|
||||||
|
until: t3_lxc_status.stdout.strip() == "stopped"
|
||||||
|
retries: 12
|
||||||
|
delay: 5
|
||||||
|
loop: "{{ tier3_lxcs }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: "Shutdown Tier 2 — Infrastructure"
|
||||||
|
hosts: pve-node
|
||||||
|
gather_facts: false
|
||||||
|
tags: [shutdown]
|
||||||
|
|
||||||
|
vars:
|
||||||
|
tier2_vms: [106, 116]
|
||||||
|
tier2_lxcs: [225, 210, 227]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Shutdown Tier 2 VMs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/qemu/{{ item }}/status/shutdown
|
||||||
|
loop: "{{ tier2_vms }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Shutdown Tier 2 LXCs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/lxc/{{ item }}/status/shutdown
|
||||||
|
loop: "{{ tier2_lxcs }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Wait for Tier 2 VMs to stop
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
pvesh get /nodes/proxmox/qemu/{{ item }}/status/current --output-format json |
|
||||||
|
python3 -c "import sys,json; print(json.load(sys.stdin).get('status','unknown'))"
|
||||||
|
register: t2_vm_status
|
||||||
|
until: t2_vm_status.stdout.strip() == "stopped"
|
||||||
|
retries: 12
|
||||||
|
delay: 5
|
||||||
|
loop: "{{ tier2_vms }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Wait for Tier 2 LXCs to stop
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
pvesh get /nodes/proxmox/lxc/{{ item }}/status/current --output-format json |
|
||||||
|
python3 -c "import sys,json; print(json.load(sys.stdin).get('status','unknown'))"
|
||||||
|
register: t2_lxc_status
|
||||||
|
until: t2_lxc_status.stdout.strip() == "stopped"
|
||||||
|
retries: 12
|
||||||
|
delay: 5
|
||||||
|
loop: "{{ tier2_lxcs }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: "Shutdown Tier 1 — Databases"
|
||||||
|
hosts: pve-node
|
||||||
|
gather_facts: false
|
||||||
|
tags: [shutdown]
|
||||||
|
|
||||||
|
vars:
|
||||||
|
tier1_vms: [112]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Shutdown database VMs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/qemu/{{ item }}/status/shutdown
|
||||||
|
loop: "{{ tier1_vms }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Wait for database VMs to stop (up to 90s)
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
pvesh get /nodes/proxmox/qemu/{{ item }}/status/current --output-format json |
|
||||||
|
python3 -c "import sys,json; print(json.load(sys.stdin).get('status','unknown'))"
|
||||||
|
register: t1_vm_status
|
||||||
|
until: t1_vm_status.stdout.strip() == "stopped"
|
||||||
|
retries: 18
|
||||||
|
delay: 5
|
||||||
|
loop: "{{ tier1_vms }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Force stop database VMs if still running
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
status=$(pvesh get /nodes/proxmox/qemu/{{ item }}/status/current --output-format json |
|
||||||
|
python3 -c "import sys,json; print(json.load(sys.stdin).get('status','unknown'))");
|
||||||
|
if [ "$status" = "running" ]; then
|
||||||
|
pvesh create /nodes/proxmox/qemu/{{ item }}/status/stop;
|
||||||
|
echo "Force stopped VM {{ item }}";
|
||||||
|
else
|
||||||
|
echo "VM {{ item }} already stopped";
|
||||||
|
fi
|
||||||
|
loop: "{{ tier1_vms }}"
|
||||||
|
register: force_stop_result
|
||||||
|
changed_when: force_stop_result.results | default([]) | selectattr('stdout', 'defined') | selectattr('stdout', 'search', 'Force stopped') | list | length > 0
|
||||||
|
|
||||||
|
- name: "Verify and reboot Proxmox host"
|
||||||
|
hosts: pve-node
|
||||||
|
gather_facts: false
|
||||||
|
tags: [reboot]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Verify all guests are stopped (excluding LXC 304)
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
running_vms=$(pvesh get /nodes/proxmox/qemu --output-format json |
|
||||||
|
python3 -c "import sys,json; vms=[v for v in json.load(sys.stdin) if v.get('status')=='running']; print(len(vms))");
|
||||||
|
running_lxcs=$(pvesh get /nodes/proxmox/lxc --output-format json |
|
||||||
|
python3 -c "import sys,json; cts=[c for c in json.load(sys.stdin) if c.get('status')=='running' and c['vmid'] != 304]; print(len(cts))");
|
||||||
|
echo "Running VMs: $running_vms, Running LXCs: $running_lxcs";
|
||||||
|
if [ "$running_vms" != "0" ] || [ "$running_lxcs" != "0" ]; then exit 1; fi
|
||||||
|
register: verify_stopped
|
||||||
|
|
||||||
|
- name: Issue fire-and-forget reboot (controller will be killed)
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
nohup bash -c 'sleep 10 && reboot' &>/dev/null &
|
||||||
|
echo "Reboot scheduled in 10 seconds"
|
||||||
|
register: reboot_issued
|
||||||
|
when: not ansible_check_mode
|
||||||
|
|
||||||
|
- name: Log reboot issued
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "{{ reboot_issued.stdout }} — Ansible process will terminate when host reboots. Post-reboot startup handled by ansible-post-reboot.service on LXC 304."
|
||||||
214
ansible/playbooks/post-reboot-startup.yml
Normal file
214
ansible/playbooks/post-reboot-startup.yml
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
---
|
||||||
|
# Post-Reboot Startup — Controlled Guest Startup After Proxmox Reboot
|
||||||
|
#
|
||||||
|
# Starts all guests in dependency order with staggered delays to avoid
|
||||||
|
# I/O storms. Runs automatically via ansible-post-reboot.service on
|
||||||
|
# LXC 304 after the Proxmox host reboots.
|
||||||
|
#
|
||||||
|
# Can also be run manually:
|
||||||
|
# ansible-playbook /opt/ansible/playbooks/post-reboot-startup.yml
|
||||||
|
#
|
||||||
|
# Note: VM 109 (homeassistant) is excluded from Ansible inventory
|
||||||
|
# (self-managed via HA Supervisor) but is included in pvesh start/stop.
|
||||||
|
|
||||||
|
- name: Wait for Proxmox API to be ready
|
||||||
|
hosts: pve-node
|
||||||
|
gather_facts: false
|
||||||
|
tags: [startup]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Wait for Proxmox API
|
||||||
|
ansible.builtin.command: pvesh get /version --output-format json
|
||||||
|
register: pve_version
|
||||||
|
until: pve_version.rc == 0
|
||||||
|
retries: 30
|
||||||
|
delay: 10
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Display Proxmox version
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg: "Proxmox API ready: {{ pve_version.stdout | from_json | json_query('version') | default('unknown') }}"
|
||||||
|
|
||||||
|
- name: "Startup Tier 1 — Databases"
|
||||||
|
hosts: pve-node
|
||||||
|
gather_facts: false
|
||||||
|
tags: [startup]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Start database VM (112)
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/qemu/112/status/start
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Wait for VM 112 to be running
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
pvesh get /nodes/proxmox/qemu/112/status/current --output-format json |
|
||||||
|
python3 -c "import sys,json; print(json.load(sys.stdin).get('status','unknown'))"
|
||||||
|
register: db_status
|
||||||
|
until: db_status.stdout.strip() == "running"
|
||||||
|
retries: 12
|
||||||
|
delay: 5
|
||||||
|
changed_when: false
|
||||||
|
|
||||||
|
- name: Wait for database services to initialize
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: 30
|
||||||
|
|
||||||
|
- name: "Startup Tier 2 — Infrastructure"
|
||||||
|
hosts: pve-node
|
||||||
|
gather_facts: false
|
||||||
|
tags: [startup]
|
||||||
|
|
||||||
|
vars:
|
||||||
|
tier2_vms: [106, 116]
|
||||||
|
tier2_lxcs: [225, 210, 227]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Start Tier 2 VMs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/qemu/{{ item }}/status/start
|
||||||
|
loop: "{{ tier2_vms }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Start Tier 2 LXCs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/lxc/{{ item }}/status/start
|
||||||
|
loop: "{{ tier2_lxcs }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Wait for infrastructure to come up
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: 30
|
||||||
|
|
||||||
|
- name: "Startup Tier 3 — Applications"
|
||||||
|
hosts: pve-node
|
||||||
|
gather_facts: false
|
||||||
|
tags: [startup]
|
||||||
|
|
||||||
|
vars:
|
||||||
|
tier3_vms: [115, 110]
|
||||||
|
tier3_lxcs: [301]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Start Tier 3 VMs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/qemu/{{ item }}/status/start
|
||||||
|
loop: "{{ tier3_vms }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Start Tier 3 LXCs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/lxc/{{ item }}/status/start
|
||||||
|
loop: "{{ tier3_lxcs }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Wait for applications to start
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: 30
|
||||||
|
|
||||||
|
- name: Restart Pi-hole container via SSH (UDP DNS fix)
|
||||||
|
ansible.builtin.command: ssh docker-home "docker restart pihole"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Wait for Pi-hole to stabilize
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: 10
|
||||||
|
|
||||||
|
- name: "Startup Tier 4 — Media & Others"
|
||||||
|
hosts: pve-node
|
||||||
|
gather_facts: false
|
||||||
|
tags: [startup]
|
||||||
|
|
||||||
|
vars:
|
||||||
|
tier4_vms: [109]
|
||||||
|
tier4_lxcs: [221, 222, 223, 302]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Start Tier 4 VMs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/qemu/{{ item }}/status/start
|
||||||
|
loop: "{{ tier4_vms }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Start Tier 4 LXCs
|
||||||
|
ansible.builtin.command: pvesh create /nodes/proxmox/lxc/{{ item }}/status/start
|
||||||
|
loop: "{{ tier4_lxcs }}"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Post-reboot validation
|
||||||
|
hosts: pve-node
|
||||||
|
gather_facts: false
|
||||||
|
tags: [startup, validate]
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Wait for all services to initialize
|
||||||
|
ansible.builtin.pause:
|
||||||
|
seconds: 60
|
||||||
|
|
||||||
|
- name: Check all expected VMs are running
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
pvesh get /nodes/proxmox/qemu --output-format json |
|
||||||
|
python3 -c "
|
||||||
|
import sys, json
|
||||||
|
vms = json.load(sys.stdin)
|
||||||
|
expected = {106, 109, 110, 112, 115, 116}
|
||||||
|
running = {v['vmid'] for v in vms if v.get('status') == 'running'}
|
||||||
|
missing = expected - running
|
||||||
|
if missing:
|
||||||
|
print(f'WARN: VMs not running: {missing}')
|
||||||
|
sys.exit(1)
|
||||||
|
print(f'All expected VMs running: {running & expected}')
|
||||||
|
"
|
||||||
|
register: vm_check
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Check all expected LXCs are running
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
pvesh get /nodes/proxmox/lxc --output-format json |
|
||||||
|
python3 -c "
|
||||||
|
import sys, json
|
||||||
|
cts = json.load(sys.stdin)
|
||||||
|
# LXC 303 (mcp-gateway) intentionally excluded — onboot=0, operator-managed
|
||||||
|
expected = {210, 221, 222, 223, 225, 227, 301, 302, 304}
|
||||||
|
running = {c['vmid'] for c in cts if c.get('status') == 'running'}
|
||||||
|
missing = expected - running
|
||||||
|
if missing:
|
||||||
|
print(f'WARN: LXCs not running: {missing}')
|
||||||
|
sys.exit(1)
|
||||||
|
print(f'All expected LXCs running: {running & expected}')
|
||||||
|
"
|
||||||
|
register: lxc_check
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Clean up old maintenance snapshots (older than 7 days)
|
||||||
|
ansible.builtin.shell: >
|
||||||
|
cutoff=$(date -d '7 days ago' +%s);
|
||||||
|
for vmid in $(pvesh get /nodes/proxmox/qemu --output-format json |
|
||||||
|
python3 -c "import sys,json; [print(v['vmid']) for v in json.load(sys.stdin)]"); do
|
||||||
|
for snap in $(pvesh get /nodes/proxmox/qemu/$vmid/snapshot --output-format json |
|
||||||
|
python3 -c "import sys,json; [print(s['name']) for s in json.load(sys.stdin) if s['name'].startswith('pre-maintenance-')]" 2>/dev/null); do
|
||||||
|
snap_date=$(echo $snap | sed 's/pre-maintenance-//');
|
||||||
|
snap_epoch=$(date -d "$snap_date" +%s 2>/dev/null);
|
||||||
|
if [ -z "$snap_epoch" ]; then
|
||||||
|
echo "WARN: could not parse date for snapshot $snap on VM $vmid";
|
||||||
|
elif [ "$snap_epoch" -lt "$cutoff" ]; then
|
||||||
|
pvesh delete /nodes/proxmox/qemu/$vmid/snapshot/$snap && echo "Deleted $snap from VM $vmid";
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
done;
|
||||||
|
for ctid in $(pvesh get /nodes/proxmox/lxc --output-format json |
|
||||||
|
python3 -c "import sys,json; [print(c['vmid']) for c in json.load(sys.stdin)]"); do
|
||||||
|
for snap in $(pvesh get /nodes/proxmox/lxc/$ctid/snapshot --output-format json |
|
||||||
|
python3 -c "import sys,json; [print(s['name']) for s in json.load(sys.stdin) if s['name'].startswith('pre-maintenance-')]" 2>/dev/null); do
|
||||||
|
snap_date=$(echo $snap | sed 's/pre-maintenance-//');
|
||||||
|
snap_epoch=$(date -d "$snap_date" +%s 2>/dev/null);
|
||||||
|
if [ -z "$snap_epoch" ]; then
|
||||||
|
echo "WARN: could not parse date for snapshot $snap on LXC $ctid";
|
||||||
|
elif [ "$snap_epoch" -lt "$cutoff" ]; then
|
||||||
|
pvesh delete /nodes/proxmox/lxc/$ctid/snapshot/$snap && echo "Deleted $snap from LXC $ctid";
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
done;
|
||||||
|
echo "Snapshot cleanup complete"
|
||||||
|
ignore_errors: true
|
||||||
|
|
||||||
|
- name: Display validation results
|
||||||
|
ansible.builtin.debug:
|
||||||
|
msg:
|
||||||
|
- "VM status: {{ vm_check.stdout }}"
|
||||||
|
- "LXC status: {{ lxc_check.stdout }}"
|
||||||
|
- "Maintenance reboot complete — post-reboot startup finished"
|
||||||
15
ansible/systemd/ansible-monthly-reboot.service
Normal file
15
ansible/systemd/ansible-monthly-reboot.service
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Monthly Proxmox maintenance reboot (Ansible)
|
||||||
|
After=network-online.target
|
||||||
|
Wants=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
User=cal
|
||||||
|
WorkingDirectory=/opt/ansible
|
||||||
|
ExecStart=/usr/bin/ansible-playbook /opt/ansible/playbooks/monthly-reboot.yml
|
||||||
|
StandardOutput=append:/opt/ansible/logs/monthly-reboot.log
|
||||||
|
StandardError=append:/opt/ansible/logs/monthly-reboot.log
|
||||||
|
TimeoutStartSec=900
|
||||||
|
|
||||||
|
# No [Install] section — this service is activated exclusively by ansible-monthly-reboot.timer
|
||||||
13
ansible/systemd/ansible-monthly-reboot.timer
Normal file
13
ansible/systemd/ansible-monthly-reboot.timer
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Monthly Proxmox maintenance reboot timer
|
||||||
|
Documentation=https://git.manticorum.com/cal/claude-home/src/branch/main/server-configs/proxmox/maintenance-reboot.md
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
# First Sunday of the month at 08:00 UTC (3:00 AM ET during EDT)
|
||||||
|
# Day range 01-07 ensures it's always the first occurrence of that weekday
|
||||||
|
OnCalendar=Sun *-*-01..07 08:00:00
|
||||||
|
Persistent=true
|
||||||
|
RandomizedDelaySec=600
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
||||||
21
ansible/systemd/ansible-post-reboot.service
Normal file
21
ansible/systemd/ansible-post-reboot.service
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Post-reboot controlled guest startup (Ansible)
|
||||||
|
After=network-online.target
|
||||||
|
Wants=network-online.target
|
||||||
|
# Only run after a fresh boot — not on service restart
|
||||||
|
ConditionUpTimeSec=600
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
User=cal
|
||||||
|
WorkingDirectory=/opt/ansible
|
||||||
|
# Delay 120s to let Proxmox API stabilize and onboot guests settle
|
||||||
|
ExecStartPre=/bin/sleep 120
|
||||||
|
ExecStart=/usr/bin/ansible-playbook /opt/ansible/playbooks/post-reboot-startup.yml
|
||||||
|
StandardOutput=append:/opt/ansible/logs/post-reboot-startup.log
|
||||||
|
StandardError=append:/opt/ansible/logs/post-reboot-startup.log
|
||||||
|
TimeoutStartSec=1800
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
# Runs automatically on every boot of LXC 304
|
||||||
|
WantedBy=multi-user.target
|
||||||
@ -14,7 +14,7 @@ tags: [proxmox, maintenance, reboot, ansible, operations, systemd]
|
|||||||
|--------|-------|
|
|--------|-------|
|
||||||
| **Schedule** | 1st Sunday of every month, 3:00 AM ET (08:00 UTC) |
|
| **Schedule** | 1st Sunday of every month, 3:00 AM ET (08:00 UTC) |
|
||||||
| **Expected downtime** | ~15 minutes (host reboot + VM/LXC startup) |
|
| **Expected downtime** | ~15 minutes (host reboot + VM/LXC startup) |
|
||||||
| **Orchestration** | Ansible playbook on LXC 304 (ansible-controller) |
|
| **Orchestration** | Ansible on LXC 304 — shutdown playbook → host reboot → post-reboot startup playbook |
|
||||||
| **Calendar** | Google Calendar recurring event: "Proxmox Monthly Maintenance Reboot" |
|
| **Calendar** | Google Calendar recurring event: "Proxmox Monthly Maintenance Reboot" |
|
||||||
| **HA DNS** | ubuntu-manticore (10.10.0.226) provides Pi-hole 2 during Proxmox downtime |
|
| **HA DNS** | ubuntu-manticore (10.10.0.226) provides Pi-hole 2 during Proxmox downtime |
|
||||||
|
|
||||||
@ -24,16 +24,25 @@ tags: [proxmox, maintenance, reboot, ansible, operations, systemd]
|
|||||||
- Long uptimes allow memory leaks and process state drift (e.g., avahi busy-loops)
|
- Long uptimes allow memory leaks and process state drift (e.g., avahi busy-loops)
|
||||||
- Validates that all VMs/LXCs auto-start cleanly with `onboot: 1`
|
- Validates that all VMs/LXCs auto-start cleanly with `onboot: 1`
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
The reboot is split into two playbooks because LXC 304 (the Ansible controller) is itself a guest on the Proxmox host being rebooted:
|
||||||
|
|
||||||
|
1. **`monthly-reboot.yml`** — Snapshots all guests, shuts them down in dependency order, issues a fire-and-forget `reboot` to the Proxmox host, then exits. LXC 304 is killed when the host reboots.
|
||||||
|
2. **`post-reboot-startup.yml`** — After the host reboots, LXC 304 auto-starts via `onboot: 1`. A systemd service (`ansible-post-reboot.service`) waits 120 seconds for the Proxmox API to stabilize, then starts all guests in dependency order with staggered delays.
|
||||||
|
|
||||||
|
The `onboot: 1` flag on all production guests acts as a safety net — even if the post-reboot playbook fails, Proxmox will start everything (though without controlled ordering).
|
||||||
|
|
||||||
## Prerequisites (Before Maintenance)
|
## Prerequisites (Before Maintenance)
|
||||||
|
|
||||||
- [ ] Verify no active Tdarr transcodes on ubuntu-manticore
|
- [ ] Verify no active Tdarr transcodes on ubuntu-manticore
|
||||||
- [ ] Verify no running database backups
|
- [ ] Verify no running database backups
|
||||||
- [ ] Switch workstation DNS to `1.1.1.1` (Pi-hole 1 on VM 106 will be offline)
|
- [ ] Ensure workstation has Pi-hole 2 (10.10.0.226) as a fallback DNS server so it fails over automatically during downtime
|
||||||
- [ ] Confirm ubuntu-manticore Pi-hole 2 is healthy: `ssh manticore "docker exec pihole pihole status"`
|
- [ ] Confirm ubuntu-manticore Pi-hole 2 is healthy: `ssh manticore "docker exec pihole pihole status"`
|
||||||
|
|
||||||
## `onboot` Audit
|
## `onboot` Audit
|
||||||
|
|
||||||
All production VMs and LXCs must have `onboot: 1` so they restart automatically if the playbook fails mid-sequence.
|
All production VMs and LXCs must have `onboot: 1` so they restart automatically as a safety net.
|
||||||
|
|
||||||
**Check VMs:**
|
**Check VMs:**
|
||||||
```bash
|
```bash
|
||||||
@ -55,18 +64,18 @@ done"
|
|||||||
|
|
||||||
**Audit results (2026-04-03):**
|
**Audit results (2026-04-03):**
|
||||||
|
|
||||||
| ID | Name | Type | `onboot` | Action needed |
|
| ID | Name | Type | `onboot` | Status |
|
||||||
|----|------|------|----------|---------------|
|
|----|------|------|----------|--------|
|
||||||
| 106 | docker-home | VM | 1 | OK |
|
| 106 | docker-home | VM | 1 | OK |
|
||||||
| 109 | homeassistant | VM | NOT SET | **Add `onboot: 1`** |
|
| 109 | homeassistant | VM | 1 | OK (fixed 2026-04-03) |
|
||||||
| 110 | discord-bots | VM | 1 | OK |
|
| 110 | discord-bots | VM | 1 | OK |
|
||||||
| 112 | databases-bots | VM | 1 | OK |
|
| 112 | databases-bots | VM | 1 | OK |
|
||||||
| 115 | docker-sba | VM | 1 | OK |
|
| 115 | docker-sba | VM | 1 | OK |
|
||||||
| 116 | docker-home-servers | VM | 1 | OK |
|
| 116 | docker-home-servers | VM | 1 | OK |
|
||||||
| 210 | docker-n8n-lxc | LXC | 1 | OK |
|
| 210 | docker-n8n-lxc | LXC | 1 | OK |
|
||||||
| 221 | arr-stack | LXC | NOT SET | **Add `onboot: 1`** |
|
| 221 | arr-stack | LXC | 1 | OK (fixed 2026-04-03) |
|
||||||
| 222 | memos | LXC | 1 | OK |
|
| 222 | memos | LXC | 1 | OK |
|
||||||
| 223 | foundry-lxc | LXC | NOT SET | **Add `onboot: 1`** |
|
| 223 | foundry-lxc | LXC | 1 | OK (fixed 2026-04-03) |
|
||||||
| 225 | gitea | LXC | 1 | OK |
|
| 225 | gitea | LXC | 1 | OK |
|
||||||
| 227 | uptime-kuma | LXC | 1 | OK |
|
| 227 | uptime-kuma | LXC | 1 | OK |
|
||||||
| 301 | claude-discord-coordinator | LXC | 1 | OK |
|
| 301 | claude-discord-coordinator | LXC | 1 | OK |
|
||||||
@ -74,16 +83,15 @@ done"
|
|||||||
| 303 | mcp-gateway | LXC | 0 | Intentional (on-demand) |
|
| 303 | mcp-gateway | LXC | 0 | Intentional (on-demand) |
|
||||||
| 304 | ansible-controller | LXC | 1 | OK |
|
| 304 | ansible-controller | LXC | 1 | OK |
|
||||||
|
|
||||||
**Fix missing `onboot`:**
|
**If any production guest is missing `onboot: 1`:**
|
||||||
```bash
|
```bash
|
||||||
ssh proxmox "qm set 109 --onboot 1"
|
ssh proxmox "qm set <VMID> --onboot 1" # for VMs
|
||||||
ssh proxmox "pct set 221 --onboot 1"
|
ssh proxmox "pct set <CTID> --onboot 1" # for LXCs
|
||||||
ssh proxmox "pct set 223 --onboot 1"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Shutdown Order (Dependency-Aware)
|
## Shutdown Order (Dependency-Aware)
|
||||||
|
|
||||||
Reverse of the validated startup sequence. Stop consumers before their dependencies.
|
Reverse of the validated startup sequence. Stop consumers before their dependencies. Each tier polls per-guest status rather than using fixed waits.
|
||||||
|
|
||||||
```
|
```
|
||||||
Tier 4 — Media & Others (no downstream dependents)
|
Tier 4 — Media & Others (no downstream dependents)
|
||||||
@ -92,7 +100,6 @@ Tier 4 — Media & Others (no downstream dependents)
|
|||||||
LXC 222 memos
|
LXC 222 memos
|
||||||
LXC 223 foundry-lxc
|
LXC 223 foundry-lxc
|
||||||
LXC 302 claude-runner
|
LXC 302 claude-runner
|
||||||
LXC 303 mcp-gateway (if running)
|
|
||||||
|
|
||||||
Tier 3 — Applications (depend on databases + infra)
|
Tier 3 — Applications (depend on databases + infra)
|
||||||
VM 115 docker-sba (Paper Dynasty, Major Domo)
|
VM 115 docker-sba (Paper Dynasty, Major Domo)
|
||||||
@ -107,21 +114,19 @@ Tier 2 — Infrastructure + DNS (depend on databases)
|
|||||||
VM 116 docker-home-servers
|
VM 116 docker-home-servers
|
||||||
|
|
||||||
Tier 1 — Databases (no dependencies, shut down last)
|
Tier 1 — Databases (no dependencies, shut down last)
|
||||||
VM 112 databases-bots
|
VM 112 databases-bots (force-stop after 90s if ACPI ignored)
|
||||||
|
|
||||||
Tier 0 — Ansible controller shuts itself down last
|
→ LXC 304 issues fire-and-forget reboot to Proxmox host, then is killed
|
||||||
LXC 304 ansible-controller
|
|
||||||
|
|
||||||
→ Proxmox host reboots
|
|
||||||
```
|
```
|
||||||
|
|
||||||
**Known quirks:**
|
**Known quirks:**
|
||||||
- VM 112 (databases-bots) may ignore ACPI shutdown — use `--forceStop` after timeout
|
- VM 112 (databases-bots) may ignore ACPI shutdown — playbook force-stops after 90s
|
||||||
- VM 109 (homeassistant) is self-managed via HA Supervisor, excluded from Ansible inventory
|
- VM 109 (homeassistant) is self-managed via HA Supervisor, excluded from Ansible inventory
|
||||||
|
- LXC 303 (mcp-gateway) has `onboot: 0` and is operator-managed — not included in shutdown/startup. If it was running before maintenance, bring it up manually afterward
|
||||||
|
|
||||||
## Startup Order (Staggered)
|
## Startup Order (Staggered)
|
||||||
|
|
||||||
After the Proxmox host reboots, guests with `onboot: 1` will auto-start. The Ansible playbook overrides this with a controlled sequence:
|
After the Proxmox host reboots, LXC 304 auto-starts and the `ansible-post-reboot.service` waits 120s before running the controlled startup:
|
||||||
|
|
||||||
```
|
```
|
||||||
Tier 1 — Databases first
|
Tier 1 — Databases first
|
||||||
@ -142,8 +147,8 @@ Tier 3 — Applications
|
|||||||
LXC 301 claude-discord-coordinator
|
LXC 301 claude-discord-coordinator
|
||||||
→ wait 30s
|
→ wait 30s
|
||||||
|
|
||||||
Pi-hole fix — restart container to clear UDP DNS bug
|
Pi-hole fix — restart container via SSH to clear UDP DNS bug
|
||||||
qm guest exec 106 -- docker restart pihole
|
ssh docker-home "docker restart pihole"
|
||||||
→ wait 10s
|
→ wait 10s
|
||||||
|
|
||||||
Tier 4 — Media & Others
|
Tier 4 — Media & Others
|
||||||
@ -151,6 +156,7 @@ Tier 4 — Media & Others
|
|||||||
LXC 221 arr-stack
|
LXC 221 arr-stack
|
||||||
LXC 222 memos
|
LXC 222 memos
|
||||||
LXC 223 foundry-lxc
|
LXC 223 foundry-lxc
|
||||||
|
LXC 302 claude-runner
|
||||||
```
|
```
|
||||||
|
|
||||||
## Post-Reboot Validation
|
## Post-Reboot Validation
|
||||||
@ -161,28 +167,35 @@ Tier 4 — Media & Others
|
|||||||
- [ ] Discord bots responding (check Discord)
|
- [ ] Discord bots responding (check Discord)
|
||||||
- [ ] Uptime Kuma dashboard green: `curl -sf http://10.10.0.227:3001/api/status-page/homelab`
|
- [ ] Uptime Kuma dashboard green: `curl -sf http://10.10.0.227:3001/api/status-page/homelab`
|
||||||
- [ ] Home Assistant running: `curl -sf http://10.10.0.109:8123/api/ -H 'Authorization: Bearer <token>'`
|
- [ ] Home Assistant running: `curl -sf http://10.10.0.109:8123/api/ -H 'Authorization: Bearer <token>'`
|
||||||
- [ ] Switch workstation DNS back from `1.1.1.1` to Pi-hole
|
- [ ] Maintenance snapshots cleaned up (auto, 7-day retention)
|
||||||
|
|
||||||
## Automation
|
## Automation
|
||||||
|
|
||||||
### Ansible Playbook
|
### Ansible Playbooks
|
||||||
|
|
||||||
Located at `/opt/ansible/playbooks/monthly-reboot.yml` on LXC 304.
|
Both located at `/opt/ansible/playbooks/` on LXC 304.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Dry run (check mode)
|
# Dry run — shutdown only
|
||||||
ssh ansible "ansible-playbook /opt/ansible/playbooks/monthly-reboot.yml --check"
|
ssh ansible "ansible-playbook /opt/ansible/playbooks/monthly-reboot.yml --check"
|
||||||
|
|
||||||
# Manual execution
|
# Manual full execution — shutdown + reboot
|
||||||
ssh ansible "ansible-playbook /opt/ansible/playbooks/monthly-reboot.yml"
|
ssh ansible "ansible-playbook /opt/ansible/playbooks/monthly-reboot.yml"
|
||||||
|
|
||||||
# Limit to shutdown only (skip reboot)
|
# Manual post-reboot startup (if automatic startup failed)
|
||||||
|
ssh ansible "ansible-playbook /opt/ansible/playbooks/post-reboot-startup.yml"
|
||||||
|
|
||||||
|
# Shutdown only — skip the host reboot
|
||||||
ssh ansible "ansible-playbook /opt/ansible/playbooks/monthly-reboot.yml --tags shutdown"
|
ssh ansible "ansible-playbook /opt/ansible/playbooks/monthly-reboot.yml --tags shutdown"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Systemd Timer
|
### Systemd Units (on LXC 304)
|
||||||
|
|
||||||
The playbook runs automatically via systemd timer on LXC 304:
|
| Unit | Purpose | Schedule |
|
||||||
|
|------|---------|----------|
|
||||||
|
| `ansible-monthly-reboot.timer` | Triggers shutdown + reboot playbook | 1st Sunday of month, 08:00 UTC |
|
||||||
|
| `ansible-monthly-reboot.service` | Runs `monthly-reboot.yml` | Activated by timer |
|
||||||
|
| `ansible-post-reboot.service` | Runs `post-reboot-startup.yml` | On boot (multi-user.target), only if uptime < 10 min |
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Check timer status
|
# Check timer status
|
||||||
@ -191,10 +204,32 @@ ssh ansible "systemctl status ansible-monthly-reboot.timer"
|
|||||||
# Next scheduled run
|
# Next scheduled run
|
||||||
ssh ansible "systemctl list-timers ansible-monthly-reboot.timer"
|
ssh ansible "systemctl list-timers ansible-monthly-reboot.timer"
|
||||||
|
|
||||||
|
# Check post-reboot service status
|
||||||
|
ssh ansible "systemctl status ansible-post-reboot.service"
|
||||||
|
|
||||||
# Disable for a month (e.g., during an incident)
|
# Disable for a month (e.g., during an incident)
|
||||||
ssh ansible "systemctl stop ansible-monthly-reboot.timer"
|
ssh ansible "systemctl stop ansible-monthly-reboot.timer"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Deployment (one-time setup on LXC 304)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Copy playbooks
|
||||||
|
scp ansible/playbooks/monthly-reboot.yml ansible:/opt/ansible/playbooks/
|
||||||
|
scp ansible/playbooks/post-reboot-startup.yml ansible:/opt/ansible/playbooks/
|
||||||
|
|
||||||
|
# Copy and enable systemd units
|
||||||
|
scp ansible/systemd/ansible-monthly-reboot.timer ansible:/etc/systemd/system/
|
||||||
|
scp ansible/systemd/ansible-monthly-reboot.service ansible:/etc/systemd/system/
|
||||||
|
scp ansible/systemd/ansible-post-reboot.service ansible:/etc/systemd/system/
|
||||||
|
ssh ansible "sudo systemctl daemon-reload && \
|
||||||
|
sudo systemctl enable --now ansible-monthly-reboot.timer && \
|
||||||
|
sudo systemctl enable ansible-post-reboot.service"
|
||||||
|
|
||||||
|
# Verify SSH key access from LXC 304 to docker-home (needed for Pi-hole restart)
|
||||||
|
ssh ansible "ssh -o BatchMode=yes docker-home 'echo ok'"
|
||||||
|
```
|
||||||
|
|
||||||
## Rollback
|
## Rollback
|
||||||
|
|
||||||
If a guest fails to start after reboot:
|
If a guest fails to start after reboot:
|
||||||
@ -202,6 +237,7 @@ If a guest fails to start after reboot:
|
|||||||
2. Review guest logs: `ssh proxmox "journalctl -u pve-guests -n 50"`
|
2. Review guest logs: `ssh proxmox "journalctl -u pve-guests -n 50"`
|
||||||
3. Manual start: `ssh proxmox "pvesh create /nodes/proxmox/qemu/<VMID>/status/start"`
|
3. Manual start: `ssh proxmox "pvesh create /nodes/proxmox/qemu/<VMID>/status/start"`
|
||||||
4. If guest is corrupted, restore from the pre-reboot Proxmox snapshot
|
4. If guest is corrupted, restore from the pre-reboot Proxmox snapshot
|
||||||
|
5. If post-reboot startup failed entirely, run manually: `ssh ansible "ansible-playbook /opt/ansible/playbooks/post-reboot-startup.yml"`
|
||||||
|
|
||||||
## Related Documentation
|
## Related Documentation
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user