Complete restructure from patterns/examples/reference to technology-focused directories: • Created technology-specific directories with comprehensive documentation: - /tdarr/ - Transcoding automation with gaming-aware scheduling - /docker/ - Container management with GPU acceleration patterns - /vm-management/ - Virtual machine automation and cloud-init - /networking/ - SSH infrastructure, reverse proxy, and security - /monitoring/ - System health checks and Discord notifications - /databases/ - Database patterns and troubleshooting - /development/ - Programming language patterns (bash, nodejs, python, vuejs) • Enhanced CLAUDE.md with intelligent context loading: - Technology-first loading rules for automatic context provision - Troubleshooting keyword triggers for emergency scenarios - Documentation maintenance protocols with automated reminders - Context window management for optimal documentation updates • Preserved valuable content from .claude/tmp/: - SSH security improvements and server inventory - Tdarr CIFS troubleshooting and Docker iptables solutions - Operational scripts with proper technology classification • Benefits achieved: - Self-contained technology directories with complete context - Automatic loading of relevant documentation based on keywords - Emergency-ready troubleshooting with comprehensive guides - Scalable structure for future technology additions - Eliminated context bloat through targeted loading 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
131 lines
5.0 KiB
Bash
Executable File
131 lines
5.0 KiB
Bash
Executable File
#!/bin/bash
|
|
# Tdarr Unmapped Node with GPU Support - System Stability Optimized
|
|
# This script starts an unmapped Tdarr node with resource limits and local NVMe cache
|
|
# Updated 2025-08-11: Added container security measures to prevent kernel crashes
|
|
# Updated 2025-08-11: Fixed GPU parameter to use Podman CDI standard (--device nvidia.com/gpu=all)
|
|
|
|
set -e
|
|
|
|
CONTAINER_NAME="tdarr-node-gpu-unmapped"
|
|
SERVER_IP="10.10.0.43"
|
|
SERVER_PORT="8266" # Standard server port
|
|
NODE_NAME="nobara-pc-gpu-unmapped"
|
|
|
|
echo "🚀 Starting UNMAPPED Tdarr Node with GPU support and resource limits..."
|
|
|
|
# Check for root privileges (required for memlock and other resource limits)
|
|
if [ "$EUID" -ne 0 ]; then
|
|
echo ""
|
|
echo "❌ This script requires root privileges for secure container resource limits."
|
|
echo ""
|
|
echo "🔒 Root privileges needed for:"
|
|
echo " - Memory lock limits (512MB) - prevent GPU memory exhaustion"
|
|
echo " - System-level resource limits - protect against container resource abuse"
|
|
echo " - GPU device access - privileged container operations"
|
|
echo " - Memory/CPU/I/O constraints - full cgroups resource control"
|
|
echo ""
|
|
echo "🚀 Please run with sudo:"
|
|
echo " sudo $0"
|
|
echo ""
|
|
exit 1
|
|
fi
|
|
|
|
# Check system requirements
|
|
echo "🔍 Checking system requirements..."
|
|
if ! command -v nvidia-smi &> /dev/null; then
|
|
echo "⚠️ Warning: nvidia-smi not found. GPU access may not work."
|
|
fi
|
|
|
|
if [ ! -f "/etc/cdi/nvidia.yaml" ]; then
|
|
echo "⚠️ Warning: NVIDIA CDI configuration not found at /etc/cdi/nvidia.yaml"
|
|
echo " Run: nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml"
|
|
fi
|
|
|
|
echo "✅ Running with root privileges - full resource limits enabled"
|
|
|
|
# Stop and remove existing container if it exists
|
|
if podman ps -a --format "{{.Names}}" | grep -q "^${CONTAINER_NAME}$"; then
|
|
echo "🛑 Stopping existing container: ${CONTAINER_NAME}"
|
|
podman stop "${CONTAINER_NAME}" 2>/dev/null || true
|
|
podman rm "${CONTAINER_NAME}" 2>/dev/null || true
|
|
fi
|
|
|
|
# Start Tdarr node with GPU support - UNMAPPED VERSION with Resource Limits
|
|
echo "🎬 Starting Unmapped Tdarr Node container with resource limits..."
|
|
podman run -d --name "${CONTAINER_NAME}" \
|
|
--device nvidia.com/gpu=all \
|
|
--restart unless-stopped \
|
|
--memory=32g \
|
|
--memory-swap=40g \
|
|
--cpus="14" \
|
|
--pids-limit=1000 \
|
|
--ulimit nofile=65536:65536 \
|
|
--ulimit memlock=536870912:536870912 \
|
|
--device-read-bps /dev/nvme0n1:1g \
|
|
--device-write-bps /dev/nvme0n1:1g \
|
|
-e TZ=America/Chicago \
|
|
-e UMASK_SET=002 \
|
|
-e nodeName="${NODE_NAME}" \
|
|
-e serverIP="${SERVER_IP}" \
|
|
-e serverPort="${SERVER_PORT}" \
|
|
-e inContainer=true \
|
|
-e ffmpegVersion=6 \
|
|
-e logLevel=DEBUG \
|
|
-e NVIDIA_DRIVER_CAPABILITIES=all \
|
|
-e NVIDIA_VISIBLE_DEVICES=all \
|
|
-e nodeType=unmapped \
|
|
-e unmappedNodeCache=/cache \
|
|
-v "/mnt/NV2/tdarr-cache:/cache" \
|
|
ghcr.io/haveagitgat/tdarr_node:latest
|
|
|
|
echo "⏳ Waiting for container to initialize..."
|
|
sleep 5
|
|
|
|
# Check container status
|
|
if podman ps --format "{{.Names}}" | grep -q "^${CONTAINER_NAME}$"; then
|
|
echo "✅ Unmapped Tdarr Node is running successfully!"
|
|
echo ""
|
|
echo "📊 Container Status:"
|
|
podman ps --filter "name=${CONTAINER_NAME}" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
|
|
echo ""
|
|
echo "🔍 Testing GPU Access (using Podman CDI standard):"
|
|
if podman exec "${CONTAINER_NAME}" nvidia-smi --query-gpu=name --format=csv,noheader,nounits 2>/dev/null; then
|
|
echo "🎉 GPU is accessible in container!"
|
|
else
|
|
echo "⚠️ GPU test failed, but container is running"
|
|
echo " Check: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml"
|
|
fi
|
|
echo ""
|
|
echo "🛡️ Resource Limits Applied:"
|
|
echo " Memory: 32GB limit + 8GB swap"
|
|
echo " CPUs: 14 cores (2 reserved for system)"
|
|
echo " PIDs: 1000 limit"
|
|
echo " NVMe I/O: 1GB/s read/write limit"
|
|
echo ""
|
|
echo "🏗️ Architecture: Unmapped Node"
|
|
echo " - No direct media volume mounts"
|
|
echo " - Downloads files to local NVMe cache"
|
|
echo " - Prevents CIFS streaming during transcoding"
|
|
echo " - Eliminates kernel memory corruption risk"
|
|
echo ""
|
|
echo "🌐 Connection Details:"
|
|
echo " Server: ${SERVER_IP}:${SERVER_PORT}"
|
|
echo " Node Name: ${NODE_NAME}"
|
|
echo " Node Type: Unmapped"
|
|
echo " Web UI: http://${SERVER_IP}:8265"
|
|
echo ""
|
|
echo "📋 Container Management:"
|
|
echo " View logs: podman logs ${CONTAINER_NAME}"
|
|
echo " Stop: podman stop ${CONTAINER_NAME}"
|
|
echo " Remove: podman rm ${CONTAINER_NAME}"
|
|
echo ""
|
|
echo "⚠️ Important Configuration Requirements:"
|
|
echo " - Server must have 'Allow unmapped Nodes' enabled"
|
|
echo " - NVIDIA CDI configuration required for GPU access"
|
|
echo " - cgroups V2 recommended for full resource limit support"
|
|
else
|
|
echo "❌ Failed to start container"
|
|
echo "📋 Checking logs..."
|
|
podman logs "${CONTAINER_NAME}" --tail 10
|
|
exit 1
|
|
fi |