#!/bin/bash # Tdarr Unmapped Node with GPU Support - System Stability Optimized # This script starts an unmapped Tdarr node with resource limits and local NVMe cache # Updated 2025-08-11: Added container security measures to prevent kernel crashes # Updated 2025-08-11: Fixed GPU parameter to use Podman CDI standard (--device nvidia.com/gpu=all) set -e CONTAINER_NAME="tdarr-node-gpu-unmapped" SERVER_IP="10.10.0.43" SERVER_PORT="8266" # Standard server port NODE_NAME="nobara-pc-gpu-unmapped" echo "🚀 Starting UNMAPPED Tdarr Node with GPU support and resource limits..." # Check for root privileges (required for memlock and other resource limits) if [ "$EUID" -ne 0 ]; then echo "" echo "❌ This script requires root privileges for secure container resource limits." echo "" echo "🔒 Root privileges needed for:" echo " - Memory lock limits (512MB) - prevent GPU memory exhaustion" echo " - System-level resource limits - protect against container resource abuse" echo " - GPU device access - privileged container operations" echo " - Memory/CPU/I/O constraints - full cgroups resource control" echo "" echo "🚀 Please run with sudo:" echo " sudo $0" echo "" exit 1 fi # Check system requirements echo "🔍 Checking system requirements..." if ! command -v nvidia-smi &> /dev/null; then echo "⚠️ Warning: nvidia-smi not found. GPU access may not work." fi if [ ! -f "/etc/cdi/nvidia.yaml" ]; then echo "⚠️ Warning: NVIDIA CDI configuration not found at /etc/cdi/nvidia.yaml" echo " Run: nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml" fi echo "✅ Running with root privileges - full resource limits enabled" # Stop and remove existing container if it exists if podman ps -a --format "{{.Names}}" | grep -q "^${CONTAINER_NAME}$"; then echo "🛑 Stopping existing container: ${CONTAINER_NAME}" podman stop "${CONTAINER_NAME}" 2>/dev/null || true podman rm "${CONTAINER_NAME}" 2>/dev/null || true fi # Start Tdarr node with GPU support - UNMAPPED VERSION with Resource Limits echo "🎬 Starting Unmapped Tdarr Node container with resource limits..." podman run -d --name "${CONTAINER_NAME}" \ --device nvidia.com/gpu=all \ --restart unless-stopped \ --memory=32g \ --memory-swap=40g \ --cpus="14" \ --pids-limit=1000 \ --ulimit nofile=65536:65536 \ --ulimit memlock=536870912:536870912 \ --device-read-bps /dev/nvme0n1:1g \ --device-write-bps /dev/nvme0n1:1g \ -e TZ=America/Chicago \ -e UMASK_SET=002 \ -e nodeName="${NODE_NAME}" \ -e serverIP="${SERVER_IP}" \ -e serverPort="${SERVER_PORT}" \ -e inContainer=true \ -e ffmpegVersion=6 \ -e logLevel=DEBUG \ -e NVIDIA_DRIVER_CAPABILITIES=all \ -e NVIDIA_VISIBLE_DEVICES=all \ -e nodeType=unmapped \ -e unmappedNodeCache=/cache \ -v "/mnt/NV2/tdarr-cache:/cache" \ ghcr.io/haveagitgat/tdarr_node:latest echo "⏳ Waiting for container to initialize..." sleep 5 # Check container status if podman ps --format "{{.Names}}" | grep -q "^${CONTAINER_NAME}$"; then echo "✅ Unmapped Tdarr Node is running successfully!" echo "" echo "📊 Container Status:" podman ps --filter "name=${CONTAINER_NAME}" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" echo "" echo "🔍 Testing GPU Access (using Podman CDI standard):" if podman exec "${CONTAINER_NAME}" nvidia-smi --query-gpu=name --format=csv,noheader,nounits 2>/dev/null; then echo "🎉 GPU is accessible in container!" else echo "⚠️ GPU test failed, but container is running" echo " Check: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml" fi echo "" echo "🛡️ Resource Limits Applied:" echo " Memory: 32GB limit + 8GB swap" echo " CPUs: 14 cores (2 reserved for system)" echo " PIDs: 1000 limit" echo " NVMe I/O: 1GB/s read/write limit" echo "" echo "🏗️ Architecture: Unmapped Node" echo " - No direct media volume mounts" echo " - Downloads files to local NVMe cache" echo " - Prevents CIFS streaming during transcoding" echo " - Eliminates kernel memory corruption risk" echo "" echo "🌐 Connection Details:" echo " Server: ${SERVER_IP}:${SERVER_PORT}" echo " Node Name: ${NODE_NAME}" echo " Node Type: Unmapped" echo " Web UI: http://${SERVER_IP}:8265" echo "" echo "📋 Container Management:" echo " View logs: podman logs ${CONTAINER_NAME}" echo " Stop: podman stop ${CONTAINER_NAME}" echo " Remove: podman rm ${CONTAINER_NAME}" echo "" echo "⚠️ Important Configuration Requirements:" echo " - Server must have 'Allow unmapped Nodes' enabled" echo " - NVIDIA CDI configuration required for GPU access" echo " - cgroups V2 recommended for full resource limit support" else echo "❌ Failed to start container" echo "📋 Checking logs..." podman logs "${CONTAINER_NAME}" --tail 10 exit 1 fi