paper-dynasty-card-creation/scripts/benchmark_render.sh
Cal Corum 979f3080d5 feat: concurrent upload pipeline and benchmarks (Phase 0)
- Replace sequential upload loop with asyncio.gather + Semaphore(8) (WP-04)
- Offload synchronous boto3 S3 calls to thread pool executor
- Increase fetch_card_image timeout from 6s to 10s
- Add --concurrency/-j CLI flag to pd-cards upload
- Add progress reporting every 20 completions
- Individual card failures no longer abort batch
- Apply same concurrency pattern to legacy check_cards_and_upload.py (WP-05)
- Add benchmark script for render pipeline measurements (WP-00)

Target: 800-card upload from ~40 min to <5 min (with server-side
persistent browser deployed).

Refs: #87, #91, #92

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 23:53:56 -05:00

291 lines
9.8 KiB
Bash
Executable File

#!/bin/bash
# =============================================================================
# WP-00: Paper Dynasty Card Render & Upload Pipeline Benchmark
# Phase 0 - Render Pipeline Optimization
#
# Usage:
# ./scripts/benchmark_render.sh # Run full benchmark (dev API)
# ./scripts/benchmark_render.sh --prod # Run against production API
# ./scripts/benchmark_render.sh --quick # Connectivity check only
#
# Requirements: curl, bc
# =============================================================================
# --- Configuration -----------------------------------------------------------
DEV_API="https://pddev.manticorum.com/api"
PROD_API="https://pd.manticorum.com/api"
API_URL="$DEV_API"
# Player IDs in the 12000-13000 range (2005 Live cardset)
# Mix of batters and pitchers across different teams
PLAYER_IDS=(12785 12790 12800 12810 12820 12830 12840 12850 12860 12870)
RESULTS_FILE="$(dirname "$0")/benchmark_results.txt"
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
RUN_LABEL="benchmark-$(date +%s)"
# --- Argument parsing ---------------------------------------------------------
QUICK_MODE=false
for arg in "$@"; do
case "$arg" in
--prod) API_URL="$PROD_API" ;;
--quick) QUICK_MODE=true ;;
--help|-h)
echo "Usage: $0 [--prod] [--quick]"
echo " --prod Use production API instead of dev"
echo " --quick Connectivity check only (1 request)"
exit 0
;;
esac
done
# --- Helpers -----------------------------------------------------------------
hr() { printf '%0.s-' {1..72}; echo; }
# bc-based float arithmetic
fadd() { echo "$1 + $2" | bc -l; }
fdiv() { echo "scale=6; $1 / $2" | bc -l; }
flt() { echo "$1 < $2" | bc -l; } # returns 1 if true
fmt3() { printf "%.3f" "$1"; } # format to 3 decimal places
# Print and simultaneously append to results file
log() { echo "$@" | tee -a "$RESULTS_FILE"; }
# Single card render with timing; sets LAST_HTTP, LAST_TIME, LAST_SIZE
measure_card() {
local player_id="$1"
local card_type="${2:-batting}"
local cache_bust="${RUN_LABEL}-${player_id}"
local url="${API_URL}/v2/players/${player_id}/${card_type}card?d=${cache_bust}"
# -s silent, -o discard body, -w write timing vars separated by |
local result
result=$(curl -s -o /dev/null \
-w "%{http_code}|%{time_total}|%{time_connect}|%{time_starttransfer}|%{size_download}" \
--max-time 30 \
"$url" 2>&1)
LAST_HTTP=$(echo "$result" | cut -d'|' -f1)
LAST_TIME=$(echo "$result" | cut -d'|' -f2)
LAST_CONN=$(echo "$result" | cut -d'|' -f3)
LAST_TTFB=$(echo "$result" | cut -d'|' -f4)
LAST_SIZE=$(echo "$result" | cut -d'|' -f5)
LAST_URL="$url"
}
# =============================================================================
# START
# =============================================================================
# Truncate results file for this run and write header
cat > "$RESULTS_FILE" << EOF
Paper Dynasty Card Render Benchmark
Run timestamp : $TIMESTAMP
API target : $API_URL
Cache-bust tag: $RUN_LABEL
EOF
echo "" >> "$RESULTS_FILE"
echo ""
log "=============================================================="
log " Paper Dynasty Card Render Benchmark - WP-00 / Phase 0"
log " $(date '+%Y-%m-%d %H:%M:%S')"
log " API: $API_URL"
log "=============================================================="
echo ""
# =============================================================================
# SECTION 1: Connectivity Check
# =============================================================================
log "--- Section 1: Connectivity Check ---"
log ""
log "Sending single request to verify API is reachable..."
log " Player : 12785 (batting card)"
log " URL : ${API_URL}/v2/players/12785/battingcard?d=${RUN_LABEL}-probe"
echo ""
measure_card 12785 batting
if [ "$LAST_HTTP" = "200" ]; then
log " HTTP : $LAST_HTTP OK"
log " Total : $(fmt3 $LAST_TIME)s"
log " Connect: $(fmt3 $LAST_CONN)s"
log " TTFB : $(fmt3 $LAST_TTFB)s"
log " Size : ${LAST_SIZE} bytes ($(echo "scale=1; $LAST_SIZE/1024" | bc)KB)"
log ""
log " Connectivity: PASS"
elif [ -z "$LAST_HTTP" ] || [ "$LAST_HTTP" = "000" ]; then
log " ERROR: Could not reach $API_URL (no response / timeout)"
log " Aborting benchmark."
echo ""
exit 1
else
log " HTTP : $LAST_HTTP"
log " WARNING: Unexpected status code. Continuing anyway."
fi
echo ""
if [ "$QUICK_MODE" = true ]; then
log "Quick mode: exiting after connectivity check."
echo ""
exit 0
fi
# =============================================================================
# SECTION 2: Sequential Card Render Benchmark (10 cards)
# =============================================================================
log ""
hr
log "--- Section 2: Sequential Card Render Benchmark ---"
log ""
log "Rendering ${#PLAYER_IDS[@]} cards sequentially with fresh cache busts."
log "Each request forces a full server-side render (bypasses nginx cache)."
log ""
log "$(printf '%-8s %-10s %-10s %-10s %-10s %-8s' 'Player' 'HTTP' 'Total(s)' 'TTFB(s)' 'Connect(s)' 'Size(KB)')"
log "$(printf '%0.s-' {1..62})"
# Accumulators
total_time="0"
min_time=""
max_time=""
success_count=0
fail_count=0
all_times=()
for pid in "${PLAYER_IDS[@]}"; do
measure_card "$pid" batting
size_kb=$(echo "scale=1; $LAST_SIZE/1024" | bc)
row=$(printf '%-8s %-10s %-10s %-10s %-10s %-8s' \
"$pid" \
"$LAST_HTTP" \
"$(fmt3 $LAST_TIME)" \
"$(fmt3 $LAST_TTFB)" \
"$(fmt3 $LAST_CONN)" \
"$size_kb")
if [ "$LAST_HTTP" = "200" ]; then
log "$row"
total_time=$(fadd "$total_time" "$LAST_TIME")
all_times+=("$LAST_TIME")
success_count=$((success_count + 1))
# Track min
if [ -z "$min_time" ] || [ "$(flt $LAST_TIME $min_time)" = "1" ]; then
min_time="$LAST_TIME"
fi
# Track max
if [ -z "$max_time" ] || [ "$(flt $max_time $LAST_TIME)" = "1" ]; then
max_time="$LAST_TIME"
fi
else
log "$row << FAILED"
fail_count=$((fail_count + 1))
fi
done
echo ""
log ""
log "--- Section 2: Results Summary ---"
log ""
if [ "$success_count" -gt 0 ]; then
avg_time=$(fdiv "$total_time" "$success_count")
log " Cards requested : ${#PLAYER_IDS[@]}"
log " Successful : $success_count"
log " Failed : $fail_count"
log " Total wall time : $(fmt3 $total_time)s"
log " Average per card : $(fmt3 $avg_time)s"
log " Minimum : $(fmt3 $min_time)s"
log " Maximum : $(fmt3 $max_time)s"
log ""
# Rough throughput estimate (sequential)
cards_per_min=$(echo "scale=1; 60 / $avg_time" | bc)
log " Sequential throughput: ~${cards_per_min} cards/min"
# Estimate full cardset at ~500 players * 2 cards each = 1000 renders
est_1000=$(echo "scale=0; (1000 * $avg_time) / 1" | bc)
log " Est. full cardset (1000 renders, sequential): ~${est_1000}s (~$(echo "scale=1; $est_1000/60" | bc) min)"
else
log " No successful renders to summarize."
fi
# =============================================================================
# SECTION 3: Upload Pipeline Reference
# =============================================================================
echo ""
log ""
hr
log "--- Section 3: Upload Pipeline Benchmark Commands ---"
log ""
log "The upload pipeline (pd_cards/core/upload.py) fetches rendered PNG cards"
log "and uploads them to S3. It uses a persistent aiohttp session with a 6s"
log "timeout per card."
log ""
log "To time a dry-run batch of 20 cards:"
log ""
log " cd /mnt/NV2/Development/paper-dynasty/card-creation"
log " time pd-cards upload s3 --cardset \"2005 Live\" --limit 20 --dry-run"
log ""
log "To time a real upload batch of 20 cards (writes to S3, updates DB URLs):"
log ""
log " time pd-cards upload s3 --cardset \"2005 Live\" --limit 20"
log ""
log "Notes:"
log " - dry-run validates card URLs exist without uploading"
log " - Remove --limit for full cardset run"
log " - Pipeline is currently sequential (one card at a time per session)"
log " - Each card: fetch PNG (~2-4s render) + S3 put (~0.1-0.5s) = ~2.5-4.5s/card"
log " - Parallelism target (Phase 0 goal): 10-20 concurrent fetches via asyncio"
log ""
# =============================================================================
# SECTION 4: Before/After Comparison Template
# =============================================================================
echo ""
hr
log "--- Section 4: Before/After Comparison Template ---"
log ""
log "Fill in after optimization work is complete."
log ""
log " Metric Before After Delta"
log " $(printf '%0.s-' {1..64})"
if [ "$success_count" -gt 0 ]; then
log " Avg render time (s) $(fmt3 $avg_time) ___._____ ___._____"
log " Min render time (s) $(fmt3 $min_time) ___._____ ___._____"
log " Max render time (s) $(fmt3 $max_time) ___._____ ___._____"
log " Sequential cards/min ${cards_per_min} ___.___ ___.___"
else
log " Avg render time (s) (no data) ___._____ ___._____"
fi
log " Upload batch (20 cards) ___._____s ___._____s ___._____s"
log " Upload cards/min ___.___ ___.___ ___.___"
log " Full cardset time (est) ___._____min ___._____min ___ min saved"
log ""
# =============================================================================
# DONE
# =============================================================================
echo ""
hr
log "Benchmark complete."
log "Results saved to: $RESULTS_FILE"
log ""
# Voice notify
curl -s -X POST http://localhost:8888/notify \
-H 'Content-Type: application/json' \
-d "{\"message\":\"Benchmark complete. Average render time $(fmt3 ${avg_time:-0}) seconds per card\"}" \
> /dev/null 2>&1 || true