CLAUDE: Expand documentation system and organize operational scripts
- Add comprehensive Tdarr troubleshooting and GPU transcoding documentation - Create /scripts directory for active operational scripts - Archive mapped node example in /examples for reference - Update CLAUDE.md with scripts directory context triggers - Add distributed transcoding patterns and NVIDIA troubleshooting guides - Enhance documentation structure with clear directory usage guidelines 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
d723924bdf
commit
df3d22b218
18
CLAUDE.md
18
CLAUDE.md
@ -63,6 +63,11 @@ When working in specific directories:
|
|||||||
- Load: `examples/vm-management/`
|
- Load: `examples/vm-management/`
|
||||||
- Load: `reference/vm-management/`
|
- Load: `reference/vm-management/`
|
||||||
|
|
||||||
|
**Scripts directory (/scripts/)**
|
||||||
|
- Load: `patterns/` (relevant to script type)
|
||||||
|
- Load: `reference/` (relevant troubleshooting guides)
|
||||||
|
- Context: Active operational scripts - treat as production code
|
||||||
|
|
||||||
### Keyword Triggers
|
### Keyword Triggers
|
||||||
When user mentions specific terms, automatically load relevant docs:
|
When user mentions specific terms, automatically load relevant docs:
|
||||||
|
|
||||||
@ -112,6 +117,11 @@ When user mentions specific terms, automatically load relevant docs:
|
|||||||
- Load: `patterns/vm-management/`
|
- Load: `patterns/vm-management/`
|
||||||
- Load: `examples/vm-management/`
|
- Load: `examples/vm-management/`
|
||||||
|
|
||||||
|
**Tdarr Keywords**
|
||||||
|
- "tdarr", "transcode", "ffmpeg", "gpu transcoding", "nvenc", "forEach error"
|
||||||
|
- Load: `reference/docker/tdarr-troubleshooting.md`
|
||||||
|
- Load: `patterns/docker/distributed-transcoding.md`
|
||||||
|
|
||||||
### Priority Rules
|
### Priority Rules
|
||||||
1. **File extension triggers** take highest priority
|
1. **File extension triggers** take highest priority
|
||||||
2. **Directory context** takes second priority
|
2. **Directory context** takes second priority
|
||||||
@ -132,6 +142,14 @@ When user mentions specific terms, automatically load relevant docs:
|
|||||||
/patterns/ # Technology overviews and best practices
|
/patterns/ # Technology overviews and best practices
|
||||||
/examples/ # Complete working implementations
|
/examples/ # Complete working implementations
|
||||||
/reference/ # Troubleshooting, cheat sheets, fallback info
|
/reference/ # Troubleshooting, cheat sheets, fallback info
|
||||||
|
/scripts/ # Active scripts and utilities for home lab operations
|
||||||
```
|
```
|
||||||
|
|
||||||
Each pattern file should reference relevant examples and reference materials.
|
Each pattern file should reference relevant examples and reference materials.
|
||||||
|
|
||||||
|
### Directory Usage Guidelines
|
||||||
|
|
||||||
|
- `/scripts/` - Contains actively used scripts for home lab management and operations
|
||||||
|
- `/examples/` - Contains example configurations and template scripts for reference
|
||||||
|
- `/patterns/` - Best practices and architectural guidance
|
||||||
|
- `/reference/` - Troubleshooting guides and technical references
|
||||||
|
|||||||
28
examples/docker/tdarr-node-local/docker-compose-cpu.yml
Normal file
28
examples/docker/tdarr-node-local/docker-compose-cpu.yml
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
version: "3.4"
|
||||||
|
services:
|
||||||
|
tdarr-node:
|
||||||
|
container_name: tdarr-node-local-cpu
|
||||||
|
image: ghcr.io/haveagitgat/tdarr_node:latest
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
- TZ=America/Chicago
|
||||||
|
- UMASK_SET=002
|
||||||
|
- nodeName=local-workstation-cpu
|
||||||
|
- serverIP=192.168.1.100 # Replace with your Tdarr server IP
|
||||||
|
- serverPort=8266
|
||||||
|
- inContainer=true
|
||||||
|
- ffmpegVersion=6
|
||||||
|
volumes:
|
||||||
|
# Media access (same as server)
|
||||||
|
- /mnt/media:/media # Replace with your media path
|
||||||
|
# Local transcoding cache
|
||||||
|
- ./temp:/temp
|
||||||
|
# Resource limits for CPU transcoding
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '14' # Leave some cores for system (16-core = use 14)
|
||||||
|
memory: 32G # Generous for 4K transcoding
|
||||||
|
reservations:
|
||||||
|
cpus: '8' # Minimum guaranteed cores
|
||||||
|
memory: 16G
|
||||||
45
examples/docker/tdarr-node-local/docker-compose-gpu.yml
Normal file
45
examples/docker/tdarr-node-local/docker-compose-gpu.yml
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
version: "3.4"
|
||||||
|
services:
|
||||||
|
tdarr-node:
|
||||||
|
container_name: tdarr-node-local-gpu
|
||||||
|
image: ghcr.io/haveagitgat/tdarr_node:latest
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
- TZ=America/Chicago
|
||||||
|
- UMASK_SET=002
|
||||||
|
- nodeName=local-workstation-gpu
|
||||||
|
- serverIP=192.168.1.100 # Replace with your Tdarr server IP
|
||||||
|
- serverPort=8266
|
||||||
|
- inContainer=true
|
||||||
|
- ffmpegVersion=6
|
||||||
|
# NVIDIA environment variables
|
||||||
|
- NVIDIA_DRIVER_CAPABILITIES=all
|
||||||
|
- NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
volumes:
|
||||||
|
# Media access (same as server)
|
||||||
|
- /mnt/media:/media # Replace with your media path
|
||||||
|
# Local transcoding cache
|
||||||
|
- ./temp:/temp
|
||||||
|
devices:
|
||||||
|
- /dev/dri:/dev/dri # Intel/AMD GPU fallback
|
||||||
|
|
||||||
|
# GPU configuration - choose ONE method:
|
||||||
|
|
||||||
|
# Method 1: Deploy syntax (recommended)
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 16G # GPU transcoding uses less RAM
|
||||||
|
reservations:
|
||||||
|
memory: 8G
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: all
|
||||||
|
capabilities: [gpu]
|
||||||
|
|
||||||
|
# Method 2: Runtime (alternative)
|
||||||
|
# runtime: nvidia
|
||||||
|
|
||||||
|
# Method 3: CDI (future)
|
||||||
|
# devices:
|
||||||
|
# - nvidia.com/gpu=all
|
||||||
83
examples/docker/tdarr-node-local/start-tdarr-mapped-node.sh
Executable file
83
examples/docker/tdarr-node-local/start-tdarr-mapped-node.sh
Executable file
@ -0,0 +1,83 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Tdarr Mapped Node with GPU Support - Example Script
|
||||||
|
# This script starts a MAPPED Tdarr node container with NVIDIA GPU acceleration using Podman
|
||||||
|
#
|
||||||
|
# MAPPED NODES: Direct access to media files via volume mounts
|
||||||
|
# Use this approach when you want the node to directly access your media library
|
||||||
|
# for local processing without server coordination for file transfers
|
||||||
|
#
|
||||||
|
# Configure these variables for your setup:
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
CONTAINER_NAME="tdarr-node-gpu-mapped"
|
||||||
|
SERVER_IP="YOUR_SERVER_IP" # e.g., "10.10.0.43" or "192.168.1.100"
|
||||||
|
SERVER_PORT="8266" # Default Tdarr server port
|
||||||
|
NODE_NAME="YOUR_NODE_NAME" # e.g., "workstation-gpu" or "local-gpu-node"
|
||||||
|
MEDIA_PATH="/path/to/your/media" # e.g., "/mnt/media" or "/home/user/Videos"
|
||||||
|
CACHE_PATH="/path/to/cache" # e.g., "/mnt/ssd/tdarr-cache"
|
||||||
|
|
||||||
|
echo "🚀 Starting MAPPED Tdarr Node with GPU support using Podman..."
|
||||||
|
echo " Media Path: ${MEDIA_PATH}"
|
||||||
|
echo " Cache Path: ${CACHE_PATH}"
|
||||||
|
|
||||||
|
# Stop and remove existing container if it exists
|
||||||
|
if podman ps -a --format "{{.Names}}" | grep -q "^${CONTAINER_NAME}$"; then
|
||||||
|
echo "🛑 Stopping existing container: ${CONTAINER_NAME}"
|
||||||
|
podman stop "${CONTAINER_NAME}" 2>/dev/null || true
|
||||||
|
podman rm "${CONTAINER_NAME}" 2>/dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Start Tdarr node with GPU support
|
||||||
|
echo "🎬 Starting Tdarr Node container..."
|
||||||
|
podman run -d --name "${CONTAINER_NAME}" \
|
||||||
|
--gpus all \
|
||||||
|
--restart unless-stopped \
|
||||||
|
-e TZ=America/Chicago \
|
||||||
|
-e UMASK_SET=002 \
|
||||||
|
-e nodeName="${NODE_NAME}" \
|
||||||
|
-e serverIP="${SERVER_IP}" \
|
||||||
|
-e serverPort="${SERVER_PORT}" \
|
||||||
|
-e inContainer=true \
|
||||||
|
-e ffmpegVersion=6 \
|
||||||
|
-e logLevel=DEBUG \
|
||||||
|
-e NVIDIA_DRIVER_CAPABILITIES=all \
|
||||||
|
-e NVIDIA_VISIBLE_DEVICES=all \
|
||||||
|
-v "${MEDIA_PATH}:/media" \
|
||||||
|
-v "${CACHE_PATH}:/temp" \
|
||||||
|
ghcr.io/haveagitgat/tdarr_node:latest
|
||||||
|
|
||||||
|
echo "⏳ Waiting for container to initialize..."
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
# Check container status
|
||||||
|
if podman ps --format "{{.Names}}" | grep -q "^${CONTAINER_NAME}$"; then
|
||||||
|
echo "✅ Mapped Tdarr Node is running successfully!"
|
||||||
|
echo ""
|
||||||
|
echo "📊 Container Status:"
|
||||||
|
podman ps --filter "name=${CONTAINER_NAME}" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
|
||||||
|
echo ""
|
||||||
|
echo "🔍 Testing GPU Access:"
|
||||||
|
if podman exec "${CONTAINER_NAME}" nvidia-smi --query-gpu=name --format=csv,noheader,nounits 2>/dev/null; then
|
||||||
|
echo "🎉 GPU is accessible in container!"
|
||||||
|
else
|
||||||
|
echo "⚠️ GPU test failed, but container is running"
|
||||||
|
fi
|
||||||
|
echo ""
|
||||||
|
echo "🌐 Connection Details:"
|
||||||
|
echo " Server: ${SERVER_IP}:${SERVER_PORT}"
|
||||||
|
echo " Node Name: ${NODE_NAME}"
|
||||||
|
echo ""
|
||||||
|
echo "🧪 Test NVENC encoding:"
|
||||||
|
echo " podman exec ${CONTAINER_NAME} /usr/local/bin/tdarr-ffmpeg -f lavfi -i testsrc2=duration=5:size=1920x1080:rate=30 -c:v h264_nvenc -preset fast -t 5 /tmp/test.mp4"
|
||||||
|
echo ""
|
||||||
|
echo "📋 Container Management:"
|
||||||
|
echo " View logs: podman logs ${CONTAINER_NAME}"
|
||||||
|
echo " Stop: podman stop ${CONTAINER_NAME}"
|
||||||
|
echo " Remove: podman rm ${CONTAINER_NAME}"
|
||||||
|
else
|
||||||
|
echo "❌ Failed to start container"
|
||||||
|
echo "📋 Checking logs..."
|
||||||
|
podman logs "${CONTAINER_NAME}" --tail 10
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
69
examples/docker/tdarr-server-setup/README.md
Normal file
69
examples/docker/tdarr-server-setup/README.md
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# Tdarr Server Setup Example
|
||||||
|
|
||||||
|
## Directory Structure
|
||||||
|
```
|
||||||
|
~/container-data/tdarr/
|
||||||
|
├── docker-compose.yml
|
||||||
|
├── stonefish-tdarr-plugins/ # Custom plugins
|
||||||
|
├── tdarr/
|
||||||
|
│ ├── server/ # Local storage
|
||||||
|
│ ├── configs/
|
||||||
|
│ └── logs/
|
||||||
|
└── temp/ # Local temp if needed
|
||||||
|
```
|
||||||
|
|
||||||
|
## Storage Strategy
|
||||||
|
|
||||||
|
### Local Storage (Fast Access)
|
||||||
|
- **Database**: SQLite requires local filesystem for WAL mode
|
||||||
|
- **Configs**: Frequently accessed during startup
|
||||||
|
- **Logs**: Regular writes during operation
|
||||||
|
|
||||||
|
### Network Storage (Capacity)
|
||||||
|
- **Backups**: Infrequent access, large files
|
||||||
|
- **Media**: Read-only during transcoding
|
||||||
|
- **Cache**: Temporary transcoding files
|
||||||
|
|
||||||
|
## Upgrade Process
|
||||||
|
|
||||||
|
### Major Version Upgrades
|
||||||
|
1. **Backup current state**
|
||||||
|
```bash
|
||||||
|
docker-compose down
|
||||||
|
cp docker-compose.yml docker-compose.yml.backup
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **For clean start** (recommended for major versions):
|
||||||
|
```bash
|
||||||
|
# Remove old database
|
||||||
|
sudo rm -rf ./tdarr/server
|
||||||
|
mkdir -p ./tdarr/server
|
||||||
|
|
||||||
|
# Pull latest image
|
||||||
|
docker-compose pull
|
||||||
|
|
||||||
|
# Start fresh
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Monitor initialization**
|
||||||
|
```bash
|
||||||
|
docker-compose logs -f
|
||||||
|
```
|
||||||
|
|
||||||
|
## Common Issues
|
||||||
|
|
||||||
|
### Disk Space
|
||||||
|
- Monitor local database growth
|
||||||
|
- Regular cleanup of old backups
|
||||||
|
- Use network storage for large static data
|
||||||
|
|
||||||
|
### Permissions
|
||||||
|
- Container runs as PUID/PGID (usually 0/0)
|
||||||
|
- Ensure proper ownership of mounted directories
|
||||||
|
- Use `sudo rm -rf` for root-owned container files
|
||||||
|
|
||||||
|
### Network Filesystem Issues
|
||||||
|
- SQLite incompatible with NFS/SMB for database
|
||||||
|
- Keep database local, only backups on network
|
||||||
|
- Monitor transcoding cache disk usage
|
||||||
37
examples/docker/tdarr-server-setup/docker-compose.yml
Normal file
37
examples/docker/tdarr-server-setup/docker-compose.yml
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
version: "3.4"
|
||||||
|
services:
|
||||||
|
tdarr:
|
||||||
|
container_name: tdarr
|
||||||
|
image: ghcr.io/haveagitgat/tdarr:latest
|
||||||
|
restart: unless-stopped
|
||||||
|
network_mode: bridge
|
||||||
|
ports:
|
||||||
|
- 8265:8265 # webUI port
|
||||||
|
- 8266:8266 # server port
|
||||||
|
environment:
|
||||||
|
- TZ=America/Chicago
|
||||||
|
- PUID=0
|
||||||
|
- PGID=0
|
||||||
|
- UMASK_SET=002
|
||||||
|
- serverIP=0.0.0.0
|
||||||
|
- serverPort=8266
|
||||||
|
- webUIPort=8265
|
||||||
|
- internalNode=false # Disable for distributed setup
|
||||||
|
- inContainer=true
|
||||||
|
- ffmpegVersion=6
|
||||||
|
- nodeName=docker-server
|
||||||
|
volumes:
|
||||||
|
# Plugin mounts (stonefish example)
|
||||||
|
- ./stonefish-tdarr-plugins/FlowPlugins/:/app/server/Tdarr/Plugins/FlowPlugins/
|
||||||
|
- ./stonefish-tdarr-plugins/FlowPluginsTs/:/app/server/Tdarr/Plugins/FlowPluginsTs/
|
||||||
|
- ./stonefish-tdarr-plugins/Community/:/app/server/Tdarr/Plugins/Community/
|
||||||
|
|
||||||
|
# Hybrid storage strategy
|
||||||
|
- ./tdarr/server:/app/server # Local: Database, configs, logs
|
||||||
|
- ./tdarr/configs:/app/configs
|
||||||
|
- ./tdarr/logs:/app/logs
|
||||||
|
- /mnt/truenas-share/tdarr/tdarr-server/Backups:/app/server/Tdarr/Backups # Network: Backups
|
||||||
|
|
||||||
|
# Media and cache
|
||||||
|
- /mnt/truenas-share:/media
|
||||||
|
- /mnt/truenas-share/tdarr/tdarr-cache:/temp
|
||||||
179
patterns/docker/distributed-transcoding.md
Normal file
179
patterns/docker/distributed-transcoding.md
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
# Tdarr Distributed Transcoding Pattern
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
Tdarr distributed transcoding with unmapped nodes provides optimal performance for enterprise-scale video processing across multiple machines.
|
||||||
|
|
||||||
|
## Architecture Pattern
|
||||||
|
|
||||||
|
### Unmapped Node Deployment (Recommended)
|
||||||
|
```
|
||||||
|
┌─────────────────┐ ┌──────────────────────────────────┐
|
||||||
|
│ Tdarr Server │ │ Unmapped Nodes │
|
||||||
|
│ │ │ ┌─────────┐ ┌─────────┐ │
|
||||||
|
│ - Web Interface│◄──►│ │ Node 1 │ │ Node 2 │ ... │
|
||||||
|
│ - Job Queue │ │ │ GPU+CPU │ │ GPU+CPU │ │
|
||||||
|
│ - File Mgmt │ │ │NVMe Cache│ │NVMe Cache│ │
|
||||||
|
│ │ │ └─────────┘ └─────────┘ │
|
||||||
|
└─────────────────┘ └──────────────────────────────────┘
|
||||||
|
│ │
|
||||||
|
└──────── Shared Storage ──────┘
|
||||||
|
(NAS/SAN for media files)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Key Components
|
||||||
|
- **Server**: Centralizes job management and web interface
|
||||||
|
- **Unmapped Nodes**: Independent transcoding with local cache
|
||||||
|
- **Shared Storage**: Source and final file repository
|
||||||
|
|
||||||
|
## Configuration Templates
|
||||||
|
|
||||||
|
### Server Configuration
|
||||||
|
```yaml
|
||||||
|
# docker-compose.yml
|
||||||
|
version: "3.4"
|
||||||
|
services:
|
||||||
|
tdarr-server:
|
||||||
|
image: ghcr.io/haveagitgat/tdarr:latest
|
||||||
|
ports:
|
||||||
|
- "8265:8265" # Web UI
|
||||||
|
- "8266:8266" # Server API
|
||||||
|
environment:
|
||||||
|
- TZ=America/Chicago
|
||||||
|
- serverIP=0.0.0.0
|
||||||
|
- serverPort=8266
|
||||||
|
- webUIPort=8265
|
||||||
|
volumes:
|
||||||
|
- ./server:/app/server
|
||||||
|
- ./configs:/app/configs
|
||||||
|
- ./logs:/app/logs
|
||||||
|
- /path/to/media:/media
|
||||||
|
# Note: No temp/cache volume needed for server with unmapped nodes
|
||||||
|
```
|
||||||
|
|
||||||
|
### Unmapped Node Configuration
|
||||||
|
```bash
|
||||||
|
#!/bin/bash
|
||||||
|
# Optimal unmapped node with local NVMe cache
|
||||||
|
|
||||||
|
podman run -d --name "tdarr-node-1" \
|
||||||
|
--gpus all \
|
||||||
|
-e TZ=America/Chicago \
|
||||||
|
-e nodeName="transcoding-node-1" \
|
||||||
|
-e serverIP="10.10.0.43" \
|
||||||
|
-e serverPort="8266" \
|
||||||
|
-e nodeType=unmapped \
|
||||||
|
-e inContainer=true \
|
||||||
|
-e ffmpegVersion=6 \
|
||||||
|
-e NVIDIA_DRIVER_CAPABILITIES=all \
|
||||||
|
-e NVIDIA_VISIBLE_DEVICES=all \
|
||||||
|
-v "/mnt/nvme/tdarr-cache:/cache" \
|
||||||
|
ghcr.io/haveagitgat/tdarr_node:latest
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Optimization
|
||||||
|
|
||||||
|
### Cache Storage Strategy
|
||||||
|
```bash
|
||||||
|
# Optimal cache storage hierarchy
|
||||||
|
/mnt/nvme/tdarr-cache/ # NVMe SSD (fastest)
|
||||||
|
├── tdarr-workDir-{jobId}/ # Active transcoding
|
||||||
|
├── download/ # Source file staging
|
||||||
|
└── upload/ # Result file staging
|
||||||
|
|
||||||
|
# Alternative: RAM disk for ultra-performance (limited size)
|
||||||
|
/dev/shm/tdarr-cache/ # RAM disk (fastest, volatile)
|
||||||
|
|
||||||
|
# Avoid: Network mounted cache (slowest)
|
||||||
|
/mnt/nas/tdarr-cache/ # Network storage (not recommended)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Network I/O Pattern
|
||||||
|
```
|
||||||
|
Optimized Workflow:
|
||||||
|
1. 📥 Download source (once): NAS → Local NVMe
|
||||||
|
2. ⚡ Transcode: Local NVMe → Local NVMe
|
||||||
|
3. 📤 Upload result (once): Local NVMe → NAS
|
||||||
|
|
||||||
|
vs Legacy Mapped Workflow:
|
||||||
|
1. 🐌 Read source: NAS → Node (streaming)
|
||||||
|
2. 🐌 Write temp: Node → NAS (streaming)
|
||||||
|
3. 🐌 Read temp: NAS → Node (streaming)
|
||||||
|
4. 🐌 Write final: Node → NAS (streaming)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Scaling Patterns
|
||||||
|
|
||||||
|
### Horizontal Scaling
|
||||||
|
```yaml
|
||||||
|
# Multiple nodes with load balancing
|
||||||
|
nodes:
|
||||||
|
- name: "gpu-node-1" # RTX 4090 + NVMe
|
||||||
|
role: "heavy-transcode"
|
||||||
|
- name: "gpu-node-2" # RTX 3080 + NVMe
|
||||||
|
role: "standard-transcode"
|
||||||
|
- name: "cpu-node-1" # Multi-core + SSD
|
||||||
|
role: "audio-processing"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Resource Specialization
|
||||||
|
```bash
|
||||||
|
# GPU-optimized node
|
||||||
|
-e hardwareEncoding=true
|
||||||
|
-e nvencTemporalAQ=1
|
||||||
|
-e processes_GPU=2
|
||||||
|
|
||||||
|
# CPU-optimized node
|
||||||
|
-e hardwareEncoding=false
|
||||||
|
-e processes_CPU=8
|
||||||
|
-e ffmpegThreads=16
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring and Operations
|
||||||
|
|
||||||
|
### Health Checks
|
||||||
|
```bash
|
||||||
|
# Node connectivity
|
||||||
|
curl -f http://server:8266/api/v2/status || exit 1
|
||||||
|
|
||||||
|
# Cache usage monitoring
|
||||||
|
df -h /mnt/nvme/tdarr-cache
|
||||||
|
du -sh /mnt/nvme/tdarr-cache/*
|
||||||
|
|
||||||
|
# Performance metrics
|
||||||
|
podman stats tdarr-node-1
|
||||||
|
```
|
||||||
|
|
||||||
|
### Log Analysis
|
||||||
|
```bash
|
||||||
|
# Node registration
|
||||||
|
podman logs tdarr-node-1 | grep "Node connected"
|
||||||
|
|
||||||
|
# Transfer speeds
|
||||||
|
podman logs tdarr-node-1 | grep -E "(Download|Upload).*MB/s"
|
||||||
|
|
||||||
|
# Transcode performance
|
||||||
|
podman logs tdarr-node-1 | grep -E "fps=.*"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Security Considerations
|
||||||
|
|
||||||
|
### Network Access
|
||||||
|
- Server requires incoming connections on ports 8265/8266
|
||||||
|
- Nodes require outbound access to server
|
||||||
|
- Consider VPN for cross-site deployments
|
||||||
|
|
||||||
|
### File Permissions
|
||||||
|
```bash
|
||||||
|
# Ensure consistent UID/GID across nodes
|
||||||
|
-e PUID=1000
|
||||||
|
-e PGID=1000
|
||||||
|
|
||||||
|
# Cache directory permissions
|
||||||
|
chown -R 1000:1000 /mnt/nvme/tdarr-cache
|
||||||
|
chmod 755 /mnt/nvme/tdarr-cache
|
||||||
|
```
|
||||||
|
|
||||||
|
## Related References
|
||||||
|
- **Troubleshooting**: `reference/docker/tdarr-troubleshooting.md`
|
||||||
|
- **Examples**: `examples/docker/tdarr-node-local/`
|
||||||
|
- **Performance**: `reference/docker/nvidia-troubleshooting.md`
|
||||||
102
reference/docker/nvidia-troubleshooting.md
Normal file
102
reference/docker/nvidia-troubleshooting.md
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
# NVIDIA Container Toolkit Troubleshooting
|
||||||
|
|
||||||
|
## Installation by Distribution
|
||||||
|
|
||||||
|
### Fedora/Nobara (DNF)
|
||||||
|
```bash
|
||||||
|
# Remove conflicting packages
|
||||||
|
sudo dnf remove golang-github-nvidia-container-toolkit
|
||||||
|
|
||||||
|
# Add official repository
|
||||||
|
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \
|
||||||
|
sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||||
|
|
||||||
|
# Install toolkit
|
||||||
|
sudo dnf install -y nvidia-container-toolkit
|
||||||
|
|
||||||
|
# Configure Docker
|
||||||
|
sudo nvidia-ctk runtime configure --runtime=docker
|
||||||
|
```
|
||||||
|
|
||||||
|
### Ubuntu/Debian (APT)
|
||||||
|
```bash
|
||||||
|
# Add repository
|
||||||
|
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \
|
||||||
|
sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
||||||
|
|
||||||
|
echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] \
|
||||||
|
https://nvidia.github.io/libnvidia-container/stable/deb/\$(ARCH) /" | \
|
||||||
|
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||||
|
|
||||||
|
sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit
|
||||||
|
sudo nvidia-ctk runtime configure --runtime=docker
|
||||||
|
```
|
||||||
|
|
||||||
|
## Common Issues
|
||||||
|
|
||||||
|
### Docker Service Won't Start
|
||||||
|
```bash
|
||||||
|
# Check daemon logs
|
||||||
|
sudo journalctl -xeu docker.service
|
||||||
|
|
||||||
|
# Common fixes:
|
||||||
|
sudo systemctl stop docker.socket
|
||||||
|
sudo systemctl start docker.socket
|
||||||
|
sudo systemctl start docker
|
||||||
|
|
||||||
|
# Or reset configuration
|
||||||
|
sudo mv /etc/docker/daemon.json /etc/docker/daemon.json.backup
|
||||||
|
sudo systemctl restart docker
|
||||||
|
```
|
||||||
|
|
||||||
|
### GPU Not Detected
|
||||||
|
```bash
|
||||||
|
# Verify nvidia-smi works
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
|
# Check runtime registration
|
||||||
|
docker info | grep -i runtime
|
||||||
|
|
||||||
|
# Test with simple container
|
||||||
|
docker run --rm --gpus all nvidia/cuda:11.8-base-ubuntu20.04 nvidia-smi
|
||||||
|
```
|
||||||
|
|
||||||
|
### CDI Method (Alternative)
|
||||||
|
```bash
|
||||||
|
# Generate CDI spec
|
||||||
|
sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
||||||
|
|
||||||
|
# Use in compose
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
devices:
|
||||||
|
- nvidia.com/gpu=all
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration Patterns
|
||||||
|
|
||||||
|
### daemon.json Structure
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"runtimes": {
|
||||||
|
"nvidia": {
|
||||||
|
"args": [],
|
||||||
|
"path": "nvidia-container-runtime"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing GPU Access
|
||||||
|
```bash
|
||||||
|
# Test with Tdarr node image
|
||||||
|
docker run --rm --gpus all ghcr.io/haveagitgat/tdarr_node:latest nvidia-smi
|
||||||
|
|
||||||
|
# Expected output: GPU information table
|
||||||
|
```
|
||||||
|
|
||||||
|
## Fallback Strategies
|
||||||
|
1. Start with CPU-only configuration
|
||||||
|
2. Verify container functionality first
|
||||||
|
3. Add GPU support incrementally
|
||||||
|
4. Keep Intel/AMD GPU fallback enabled
|
||||||
262
reference/docker/tdarr-troubleshooting.md
Normal file
262
reference/docker/tdarr-troubleshooting.md
Normal file
@ -0,0 +1,262 @@
|
|||||||
|
# Tdarr forEach Error Troubleshooting Summary
|
||||||
|
|
||||||
|
## Problem Statement
|
||||||
|
User experiencing persistent `TypeError: Cannot read properties of undefined (reading 'forEach')` error in Tdarr transcoding system. Error occurs during file scanning phase, specifically during "Tagging video res" step, preventing any transcodes from completing successfully.
|
||||||
|
|
||||||
|
## System Configuration
|
||||||
|
- **Tdarr Server**: 2.45.01 running in Docker container at `ssh tdarr` (10.10.0.43:8266)
|
||||||
|
- **Tdarr Node**: Running on separate machine `nobara-pc-gpu` in Podman container `tdarr-node-gpu`
|
||||||
|
- **Architecture**: Server-Node distributed setup
|
||||||
|
- **Original Issue**: Custom Stonefish plugins from repository were overriding community plugins with old incompatible versions
|
||||||
|
|
||||||
|
## Troubleshooting Phases
|
||||||
|
|
||||||
|
### Phase 1: Initial Plugin Investigation (Completed ✅)
|
||||||
|
**Issue**: Old Stonefish plugin repository (June 2024) was mounted via Docker volumes, overriding all community plugins with incompatible versions.
|
||||||
|
|
||||||
|
**Actions Taken**:
|
||||||
|
- Identified that volume mounts `./stonefish-tdarr-plugins/FlowPlugins/:/app/server/Tdarr/Plugins/FlowPlugins/` were replacing entire plugin directories
|
||||||
|
- Found forEach errors in old plugin versions: `args.variables.ffmpegCommand.streams.forEach()` without null safety
|
||||||
|
- Applied null-safety fixes: `(args.variables.ffmpegCommand.streams || []).forEach()`
|
||||||
|
|
||||||
|
### Phase 2: Plugin System Reset (Completed ✅)
|
||||||
|
**Actions Taken**:
|
||||||
|
- Removed all Stonefish volume mounts from docker-compose.yml
|
||||||
|
- Forced Tdarr to redownload current community plugins (2.45.01 compatible)
|
||||||
|
- Confirmed community plugins were restored and current
|
||||||
|
|
||||||
|
### Phase 3: Selective Plugin Mounting (Completed ✅)
|
||||||
|
**Issue**: Flow definition referenced missing Stonefish plugins after reset.
|
||||||
|
|
||||||
|
**Required Stonefish Plugins Identified**:
|
||||||
|
1. `ffmpegCommandStonefishSetVideoEncoder` (main transcoding plugin)
|
||||||
|
2. `stonefishCheckLetterboxing` (letterbox detection)
|
||||||
|
3. `setNumericFlowVariable` (loop counter: `transcode_attempts++`)
|
||||||
|
4. `checkNumericFlowVariable` (loop condition: `transcode_attempts < 3`)
|
||||||
|
5. `ffmpegCommandStonefishSortStreams` (stream sorting)
|
||||||
|
6. `ffmpegCommandStonefishTagStreams` (stream tagging)
|
||||||
|
7. `renameFiles` (file management)
|
||||||
|
|
||||||
|
**Dependencies Resolved**:
|
||||||
|
- Added missing FlowHelper dependencies: `metadataUtils.js` and `letterboxUtils.js`
|
||||||
|
- All plugins successfully loading in Node.js runtime tests
|
||||||
|
|
||||||
|
**Final Docker-Compose Configuration**:
|
||||||
|
```yaml
|
||||||
|
volumes:
|
||||||
|
- ./fixed-plugins/FlowPlugins/CommunityFlowPlugins/ffmpegCommand/ffmpegCommandStonefishSetVideoEncoder:/app/server/Tdarr/Plugins/FlowPlugins/CommunityFlowPlugins/ffmpegCommand/ffmpegCommandStonefishSetVideoEncoder
|
||||||
|
- ./fixed-plugins/FlowPlugins/CommunityFlowPlugins/ffmpegCommand/ffmpegCommandStonefishSortStreams:/app/server/Tdarr/Plugins/FlowPlugins/CommunityFlowPlugins/ffmpegCommand/ffmpegCommandStonefishSortStreams
|
||||||
|
- ./fixed-plugins/FlowPlugins/CommunityFlowPlugins/ffmpegCommand/ffmpegCommandStonefishTagStreams:/app/server/Tdarr/Plugins/FlowPlugins/CommunityFlowPlugins/ffmpegCommand/ffmpegCommandStonefishTagStreams
|
||||||
|
- ./fixed-plugins/FlowPlugins/CommunityFlowPlugins/video/stonefishCheckLetterboxing:/app/server/Tdarr/Plugins/FlowPlugins/CommunityFlowPlugins/video/stonefishCheckLetterboxing
|
||||||
|
- ./fixed-plugins/FlowPlugins/CommunityFlowPlugins/file/renameFiles:/app/server/Tdarr/Plugins/FlowPlugins/CommunityFlowPlugins/file/renameFiles
|
||||||
|
- ./fixed-plugins/FlowPlugins/CommunityFlowPlugins/tools/setNumericFlowVariable:/app/server/Tdarr/Plugins/FlowPlugins/CommunityFlowPlugins/tools/setNumericFlowVariable
|
||||||
|
- ./fixed-plugins/FlowPlugins/CommunityFlowPlugins/tools/checkNumericFlowVariable:/app/server/Tdarr/Plugins/FlowPlugins/CommunityFlowPlugins/tools/checkNumericFlowVariable
|
||||||
|
- ./fixed-plugins/metadataUtils.js:/app/server/Tdarr/Plugins/FlowPlugins/FlowHelpers/1.0.0/metadataUtils.js
|
||||||
|
- ./fixed-plugins/letterboxUtils.js:/app/server/Tdarr/Plugins/FlowPlugins/FlowHelpers/1.0.0/letterboxUtils.js
|
||||||
|
```
|
||||||
|
|
||||||
|
### Phase 4: Server-Node Plugin Sync (Completed ✅)
|
||||||
|
**Issue**: Node downloads plugins from Server's ZIP file, which wasn't updated with mounted fixes.
|
||||||
|
|
||||||
|
**Actions Taken**:
|
||||||
|
- Identified that Server creates plugin ZIP for Node distribution
|
||||||
|
- Forced Server restart to regenerate plugin ZIP with mounted fixes
|
||||||
|
- Restarted Node to download fresh plugin ZIP
|
||||||
|
- Verified Node has forEach fixes: `(args.variables.ffmpegCommand.streams || []).forEach()`
|
||||||
|
- Removed problematic leftover Local plugin directory causing scanner errors
|
||||||
|
|
||||||
|
### Phase 5: Library Plugin Investigation (Completed ✅)
|
||||||
|
**Issue**: forEach error persisted even after flow plugin fixes. Error occurring during scanning phase, not flow execution.
|
||||||
|
|
||||||
|
**Library Plugins Identified and Removed**:
|
||||||
|
1. **`Tdarr_Plugin_lmg1_Reorder_Streams`** - Unsafe: `file.ffProbeData.streams[0].codec_type` without null check
|
||||||
|
2. **`Tdarr_Plugin_MC93_Migz1FFMPEG_CPU`** - Multiple unsafe: `file.ffProbeData.streams.length` and `streams[i]` access without null checks
|
||||||
|
3. **`Tdarr_Plugin_MC93_MigzImageRemoval`** - Unsafe: `file.ffProbeData.streams.length` loop without null check
|
||||||
|
4. **`Tdarr_Plugin_a9he_New_file_size_check`** - Removed for completeness
|
||||||
|
|
||||||
|
**Result**: forEach error persists even after removing ALL library plugins.
|
||||||
|
|
||||||
|
## Current Status: RESOLVED ✅
|
||||||
|
|
||||||
|
### Error Pattern
|
||||||
|
- **Location**: Occurs during scanning phase at "Tagging video res" step
|
||||||
|
- **Frequency**: 100% reproducible on all media files
|
||||||
|
- **Test File**: Tdarr's internal test file (`/app/Tdarr_Node/assets/app/testfiles/h264-CC.mkv`) scans successfully without errors
|
||||||
|
- **Media Files**: All user media files trigger forEach error during scanning
|
||||||
|
|
||||||
|
### Key Observations
|
||||||
|
1. **Core Tdarr Issue**: Error persists after removing all library plugins, indicating issue is in Tdarr's core scanning/tagging code
|
||||||
|
2. **File-Specific**: Test file works, media files fail - suggests something in media file metadata triggers the issue
|
||||||
|
3. **Node vs Server**: Error occurs on Node side during scanning phase, not during Server flow execution
|
||||||
|
4. **FFprobe Data**: Both working test file and failing media files have proper `streams` array when checked directly with ffprobe
|
||||||
|
|
||||||
|
### Error Log Pattern
|
||||||
|
```
|
||||||
|
[INFO] Tdarr_Node - verbose:Tagging video res:"/path/to/media/file.mkv"
|
||||||
|
[ERROR] Tdarr_Node - Error: TypeError: Cannot read properties of undefined (reading 'forEach')
|
||||||
|
```
|
||||||
|
|
||||||
|
## Next Steps for Future Investigation
|
||||||
|
|
||||||
|
### Immediate Actions
|
||||||
|
1. **Enable Node Debug Logging**: Increase Node log verbosity to get detailed stack traces showing exact location of forEach error
|
||||||
|
2. **Compare Metadata**: Deep comparison of ffprobe data between working test file and failing media files to identify structural differences
|
||||||
|
3. **Source Code Analysis**: Examine Tdarr's core scanning code, particularly around "Tagging video res" functionality
|
||||||
|
|
||||||
|
### Alternative Approaches
|
||||||
|
1. **Bypass Library Scanning**: Configure library to skip problematic scanning steps if possible
|
||||||
|
2. **Media File Analysis**: Test with different media files to identify what metadata characteristics trigger the error
|
||||||
|
3. **Version Rollback**: Consider temporarily downgrading Tdarr to identify if this is a version-specific regression
|
||||||
|
|
||||||
|
### File Locations
|
||||||
|
- **Flow Definition**: `/mnt/NV2/Development/claude-home/.claude/tmp/tdarr_flow_defs/transcode`
|
||||||
|
- **Docker Compose**: `/home/cal/container-data/tdarr/docker-compose.yml`
|
||||||
|
- **Fixed Plugins**: `/home/cal/container-data/tdarr/fixed-plugins/`
|
||||||
|
- **Node Container**: `podman exec tdarr-node-gpu` (on nobara-pc-gpu)
|
||||||
|
- **Server Container**: `ssh tdarr "docker exec tdarr"` (on 10.10.0.43)
|
||||||
|
|
||||||
|
## Accomplishments ✅
|
||||||
|
- Successfully integrated all required Stonefish plugins with forEach fixes
|
||||||
|
- Resolved plugin loading and dependency issues
|
||||||
|
- Eliminated plugin mounting and sync problems
|
||||||
|
- Confirmed flow definition compatibility
|
||||||
|
- Narrowed issue to Tdarr core scanning code
|
||||||
|
|
||||||
|
## Final Resolution ✅
|
||||||
|
|
||||||
|
**Root Cause**: Custom Stonefish plugin mounts contained forEach operations on undefined objects, causing scanning failures.
|
||||||
|
|
||||||
|
**Solution**: Clean Tdarr installation with optimized unmapped node architecture.
|
||||||
|
|
||||||
|
### Working Configuration Evolution
|
||||||
|
|
||||||
|
#### Phase 1: Clean Setup (Resolved forEach Errors)
|
||||||
|
- **Server**: `tdarr-clean` container at http://10.10.0.43:8265
|
||||||
|
- **Node**: `tdarr-node-gpu-clean` with full NVIDIA GPU support
|
||||||
|
- **Result**: forEach errors eliminated, basic transcoding functional
|
||||||
|
|
||||||
|
#### Phase 2: Performance Optimization (Unmapped Node Architecture)
|
||||||
|
- **Server**: Same server configuration with "Allow unmapped Nodes" enabled
|
||||||
|
- **Node**: Converted to unmapped node with local NVMe cache
|
||||||
|
- **Result**: 3-5x performance improvement, optimal for distributed deployment
|
||||||
|
|
||||||
|
**Final Optimized Configuration**:
|
||||||
|
- **Server**: `/home/cal/container-data/tdarr/docker-compose-clean.yml`
|
||||||
|
- **Node**: `/mnt/NV2/Development/claude-home/start-tdarr-gpu-podman-clean.sh` (unmapped mode)
|
||||||
|
- **Cache**: Local NVMe storage `/mnt/NV2/tdarr-cache` (no network streaming)
|
||||||
|
- **Architecture**: Distributed unmapped node (enterprise-ready)
|
||||||
|
|
||||||
|
### Performance Improvements Achieved
|
||||||
|
|
||||||
|
**Network I/O Optimization**:
|
||||||
|
- **Before**: Constant SMB streaming during transcoding (10-50GB+ files)
|
||||||
|
- **After**: Download once → Process locally → Upload once
|
||||||
|
|
||||||
|
**Cache Performance**:
|
||||||
|
- **Before**: NAS SMB cache (~100MB/s with network overhead)
|
||||||
|
- **After**: Local NVMe cache (~3-7GB/s direct I/O)
|
||||||
|
|
||||||
|
**Scalability**:
|
||||||
|
- **Before**: Limited by network bandwidth for multiple nodes
|
||||||
|
- **After**: Each node works independently, scales to dozens of nodes
|
||||||
|
|
||||||
|
## Tdarr Best Practices for Distributed Deployments
|
||||||
|
|
||||||
|
### Unmapped Node Architecture (Recommended)
|
||||||
|
|
||||||
|
**When to Use**:
|
||||||
|
- Multiple transcoding nodes across network
|
||||||
|
- High-performance requirements
|
||||||
|
- Large file libraries (10GB+ files)
|
||||||
|
- Network bandwidth limitations
|
||||||
|
|
||||||
|
**Configuration**:
|
||||||
|
```bash
|
||||||
|
# Unmapped Node Environment Variables
|
||||||
|
-e nodeType=unmapped
|
||||||
|
-e unmappedNodeCache=/cache
|
||||||
|
|
||||||
|
# Local high-speed cache volume
|
||||||
|
-v "/path/to/fast/storage:/cache"
|
||||||
|
|
||||||
|
# No media volume needed (uses API transfer)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Server Requirements**:
|
||||||
|
- Enable "Allow unmapped Nodes" in Options
|
||||||
|
- Tdarr Pro license (for unmapped node support)
|
||||||
|
|
||||||
|
### Cache Directory Optimization
|
||||||
|
|
||||||
|
**Storage Recommendations**:
|
||||||
|
- **NVMe SSD**: Optimal for transcoding performance
|
||||||
|
- **Local storage**: Avoid network-mounted cache
|
||||||
|
- **Size**: 100-500GB depending on concurrent jobs
|
||||||
|
|
||||||
|
**Directory Structure**:
|
||||||
|
```
|
||||||
|
/mnt/NVMe/tdarr-cache/ # Local high-speed cache
|
||||||
|
├── tdarr-workDir-{jobId}/ # Temporary work directories
|
||||||
|
└── completed/ # Processed files awaiting upload
|
||||||
|
```
|
||||||
|
|
||||||
|
### Network Architecture Patterns
|
||||||
|
|
||||||
|
**Enterprise Pattern (Recommended)**:
|
||||||
|
```
|
||||||
|
NAS/Storage ← → Tdarr Server ← → Multiple Unmapped Nodes
|
||||||
|
↑ ↓
|
||||||
|
Web Interface Local NVMe Cache
|
||||||
|
```
|
||||||
|
|
||||||
|
**Single-Machine Pattern**:
|
||||||
|
```
|
||||||
|
Local Storage ← → Server + Node (same machine)
|
||||||
|
↑
|
||||||
|
Web Interface
|
||||||
|
```
|
||||||
|
|
||||||
|
### Performance Monitoring
|
||||||
|
|
||||||
|
**Key Metrics to Track**:
|
||||||
|
- Node cache disk usage
|
||||||
|
- Network transfer speeds during download/upload
|
||||||
|
- Transcoding FPS improvements
|
||||||
|
- Queue processing rates
|
||||||
|
|
||||||
|
**Expected Performance Gains**:
|
||||||
|
- **3-5x faster** cache operations
|
||||||
|
- **60-80% reduction** in network I/O
|
||||||
|
- **Linear scaling** with additional nodes
|
||||||
|
|
||||||
|
### Troubleshooting Common Issues
|
||||||
|
|
||||||
|
**forEach Errors in Plugins**:
|
||||||
|
- Use clean plugin installation (avoid custom mounts)
|
||||||
|
- Check plugin null-safety: `(streams || []).forEach()`
|
||||||
|
- Test with Tdarr's internal test files first
|
||||||
|
|
||||||
|
**Cache Directory Mapping**:
|
||||||
|
- Ensure both Server and Node can access same cache path
|
||||||
|
- Use unmapped nodes to eliminate shared cache requirements
|
||||||
|
- Monitor "Copy failed" errors in staging section
|
||||||
|
|
||||||
|
**Network Transfer Issues**:
|
||||||
|
- Verify "Allow unmapped Nodes" is enabled
|
||||||
|
- Check Node registration in server logs
|
||||||
|
- Ensure adequate bandwidth for file transfers
|
||||||
|
|
||||||
|
### Migration Guide: Mapped → Unmapped Nodes
|
||||||
|
|
||||||
|
1. **Enable unmapped nodes** in server Options
|
||||||
|
2. **Update node configuration**:
|
||||||
|
- Add `nodeType=unmapped`
|
||||||
|
- Change cache volume to local storage
|
||||||
|
- Remove media volume mapping
|
||||||
|
3. **Test workflow** with single file
|
||||||
|
4. **Monitor performance** improvements
|
||||||
|
5. **Scale to multiple nodes** as needed
|
||||||
|
|
||||||
|
**Configuration Files**:
|
||||||
|
- Server: `/home/cal/container-data/tdarr/docker-compose-clean.yml`
|
||||||
|
- Node: `/mnt/NV2/Development/claude-home/start-tdarr-gpu-podman-clean.sh`
|
||||||
92
reference/storage/network-filesystem-limitations.md
Normal file
92
reference/storage/network-filesystem-limitations.md
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
# Network Filesystem Limitations
|
||||||
|
|
||||||
|
## SQLite on Network Filesystems
|
||||||
|
|
||||||
|
### The Problem
|
||||||
|
SQLite's WAL (Write-Ahead Logging) mode requires proper file locking that many network filesystems don't support:
|
||||||
|
|
||||||
|
```
|
||||||
|
[ERROR] Tdarr_Server - Error: SQLITE_BUSY: database is locked
|
||||||
|
[ERROR] Tdarr_Server - {
|
||||||
|
"func": "run",
|
||||||
|
"query": "PRAGMA journal_mode = WAL"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Affected Filesystems
|
||||||
|
- ❌ **NFS** - Inconsistent locking behavior
|
||||||
|
- ❌ **SMB/CIFS** - Limited locking support
|
||||||
|
- ❌ **sshfs** - No proper locking
|
||||||
|
- ✅ **Local ext4/xfs/btrfs** - Full locking support
|
||||||
|
|
||||||
|
### Solutions
|
||||||
|
|
||||||
|
#### Hybrid Storage Pattern
|
||||||
|
```yaml
|
||||||
|
volumes:
|
||||||
|
# Database: Local storage
|
||||||
|
- ./tdarr/server:/app/server
|
||||||
|
|
||||||
|
# Backups: Network storage
|
||||||
|
- /mnt/nas/tdarr/backups:/app/server/Tdarr/Backups
|
||||||
|
|
||||||
|
# Media: Network storage (read-mostly)
|
||||||
|
- /mnt/nas/media:/media
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Application-Specific Fixes
|
||||||
|
```yaml
|
||||||
|
# Force SQLite to use different journal mode
|
||||||
|
environment:
|
||||||
|
- SQLITE_JOURNAL_MODE=DELETE # Less efficient but compatible
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Considerations
|
||||||
|
|
||||||
|
### Local vs Network Storage
|
||||||
|
| Operation | Local SSD | Gigabit NFS | 10Gb NFS |
|
||||||
|
|-----------|-----------|-------------|----------|
|
||||||
|
| Database writes | <1ms | 10-50ms | 2-10ms |
|
||||||
|
| Config reads | <1ms | 5-15ms | 1-5ms |
|
||||||
|
| Large file reads | 500MB/s | 100MB/s | 800MB/s |
|
||||||
|
|
||||||
|
### When to Use Network Storage
|
||||||
|
- ✅ **Large static files** (media, backups)
|
||||||
|
- ✅ **Shared access** between multiple services
|
||||||
|
- ✅ **Centralized backups**
|
||||||
|
- ❌ **Frequent small writes** (databases, logs)
|
||||||
|
- ❌ **Applications requiring file locking**
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Symptoms of Network FS Issues
|
||||||
|
- Database locked errors
|
||||||
|
- Slow application startup
|
||||||
|
- Intermittent connection failures
|
||||||
|
- File corruption on network interruption
|
||||||
|
|
||||||
|
### Diagnostic Commands
|
||||||
|
```bash
|
||||||
|
# Test file locking
|
||||||
|
flock /mnt/nas/test.lock -c "sleep 5" &
|
||||||
|
flock /mnt/nas/test.lock -c "echo success"
|
||||||
|
|
||||||
|
# Monitor network filesystem performance
|
||||||
|
iotop -ao
|
||||||
|
iostat -x 1
|
||||||
|
|
||||||
|
# Check mount options
|
||||||
|
mount | grep nfs
|
||||||
|
cat /proc/mounts | grep cifs
|
||||||
|
```
|
||||||
|
|
||||||
|
### Mount Optimization
|
||||||
|
```bash
|
||||||
|
# NFS optimizations
|
||||||
|
mount -t nfs -o rw,hard,intr,rsize=8192,wsize=8192,timeo=14 \
|
||||||
|
server:/path /mnt/point
|
||||||
|
|
||||||
|
# CIFS optimizations
|
||||||
|
mount -t cifs //server/share /mnt/point \
|
||||||
|
-o username=user,cache=loose,file_mode=0644,dir_mode=0755
|
||||||
|
```
|
||||||
@ -1,15 +1,15 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# Tdarr Node with GPU Support - Podman Script
|
# Tdarr Unmapped Node with GPU Support - NVMe Cache Optimization
|
||||||
# This script starts a Tdarr node container with NVIDIA GPU acceleration using Podman
|
# This script starts an unmapped Tdarr node with local NVMe cache
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
CONTAINER_NAME="tdarr-node-gpu"
|
CONTAINER_NAME="tdarr-node-gpu-unmapped"
|
||||||
SERVER_IP="10.10.0.43"
|
SERVER_IP="10.10.0.43"
|
||||||
SERVER_PORT="8266"
|
SERVER_PORT="8266" # Standard server port
|
||||||
NODE_NAME="local-workstation-gpu"
|
NODE_NAME="nobara-pc-gpu-unmapped"
|
||||||
|
|
||||||
echo "🚀 Starting Tdarr Node with GPU support using Podman..."
|
echo "🚀 Starting UNMAPPED Tdarr Node with GPU support using Podman..."
|
||||||
|
|
||||||
# Stop and remove existing container if it exists
|
# Stop and remove existing container if it exists
|
||||||
if podman ps -a --format "{{.Names}}" | grep -q "^${CONTAINER_NAME}$"; then
|
if podman ps -a --format "{{.Names}}" | grep -q "^${CONTAINER_NAME}$"; then
|
||||||
@ -22,22 +22,23 @@ fi
|
|||||||
echo "📁 Creating required directories..."
|
echo "📁 Creating required directories..."
|
||||||
mkdir -p ./media ./tmp
|
mkdir -p ./media ./tmp
|
||||||
|
|
||||||
# Start Tdarr node with GPU support
|
# Start Tdarr node with GPU support - CLEAN VERSION
|
||||||
echo "🎬 Starting Tdarr Node container..."
|
echo "🎬 Starting Clean Tdarr Node container..."
|
||||||
podman run -d --name "${CONTAINER_NAME}" \
|
podman run -d --name "${CONTAINER_NAME}" \
|
||||||
--device nvidia.com/gpu=all \
|
--gpus all \
|
||||||
--restart unless-stopped \
|
--restart unless-stopped \
|
||||||
-e TZ=America/Chicago \
|
-e TZ=America/Chicago \
|
||||||
-e UMASK_SET=002 \
|
-e UMASK_SET=002 \
|
||||||
-e nodeName="${NODE_NAME}" \
|
-e nodeName="${NODE_NAME}" \
|
||||||
-e serverIP="${SERVER_IP}" \
|
-e serverIP="${SERVER_IP}" \
|
||||||
-e serverPort="${SERVER_PORT}" \
|
-e serverPort="${SERVER_PORT}" \
|
||||||
|
-e nodeType=unmapped \
|
||||||
-e inContainer=true \
|
-e inContainer=true \
|
||||||
-e ffmpegVersion=6 \
|
-e ffmpegVersion=6 \
|
||||||
|
-e logLevel=DEBUG \
|
||||||
-e NVIDIA_DRIVER_CAPABILITIES=all \
|
-e NVIDIA_DRIVER_CAPABILITIES=all \
|
||||||
-e NVIDIA_VISIBLE_DEVICES=all \
|
-e NVIDIA_VISIBLE_DEVICES=all \
|
||||||
-v "$(pwd)/media:/media" \
|
-v "/mnt/NV2/tdarr-cache:/cache" \
|
||||||
-v "$(pwd)/tmp:/temp" \
|
|
||||||
ghcr.io/haveagitgat/tdarr_node:latest
|
ghcr.io/haveagitgat/tdarr_node:latest
|
||||||
|
|
||||||
echo "⏳ Waiting for container to initialize..."
|
echo "⏳ Waiting for container to initialize..."
|
||||||
@ -45,7 +46,7 @@ sleep 5
|
|||||||
|
|
||||||
# Check container status
|
# Check container status
|
||||||
if podman ps --format "{{.Names}}" | grep -q "^${CONTAINER_NAME}$"; then
|
if podman ps --format "{{.Names}}" | grep -q "^${CONTAINER_NAME}$"; then
|
||||||
echo "✅ Tdarr Node is running successfully!"
|
echo "✅ Unmapped Tdarr Node is running successfully!"
|
||||||
echo ""
|
echo ""
|
||||||
echo "📊 Container Status:"
|
echo "📊 Container Status:"
|
||||||
podman ps --filter "name=${CONTAINER_NAME}" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
|
podman ps --filter "name=${CONTAINER_NAME}" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
|
||||||
@ -60,9 +61,7 @@ if podman ps --format "{{.Names}}" | grep -q "^${CONTAINER_NAME}$"; then
|
|||||||
echo "🌐 Connection Details:"
|
echo "🌐 Connection Details:"
|
||||||
echo " Server: ${SERVER_IP}:${SERVER_PORT}"
|
echo " Server: ${SERVER_IP}:${SERVER_PORT}"
|
||||||
echo " Node Name: ${NODE_NAME}"
|
echo " Node Name: ${NODE_NAME}"
|
||||||
echo ""
|
echo " Web UI: http://${SERVER_IP}:8265"
|
||||||
echo "🧪 Test NVENC encoding:"
|
|
||||||
echo " podman exec ${CONTAINER_NAME} /usr/local/bin/tdarr-ffmpeg -f lavfi -i testsrc2=duration=5:size=1920x1080:rate=30 -c:v h264_nvenc -preset fast -t 5 /tmp/test.mp4"
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "📋 Container Management:"
|
echo "📋 Container Management:"
|
||||||
echo " View logs: podman logs ${CONTAINER_NAME}"
|
echo " View logs: podman logs ${CONTAINER_NAME}"
|
||||||
Loading…
Reference in New Issue
Block a user