Merge branch 'main' into fix/2-3-security-hardcoded-secrets

This commit is contained in:
Claude 2026-03-23 03:50:00 +00:00
commit 6c20f93901
8 changed files with 1183 additions and 255 deletions

View File

@ -118,6 +118,9 @@ pd-cards scouting all && pd-cards scouting upload
pd-cards upload s3 --cardset "2005 Live" --dry-run
pd-cards upload s3 --cardset "2005 Live" --limit 10
# High-concurrency local rendering (start API server locally first)
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
# Check cards without uploading
pd-cards upload check --cardset "2005 Live" --limit 10
@ -263,6 +266,7 @@ Before running retrosheet_data.py, verify these configuration settings:
- `UPDATE_PLAYER_URLS`: Enable/disable updating player records with S3 URLs (careful - modifies database)
- `AWS_BUCKET_NAME`: S3 bucket name (default: 'paper-dynasty')
- `AWS_REGION`: AWS region (default: 'us-east-1')
- `PD_API_URL` (env var): Override the API base URL for card rendering (default: `https://pd.manticorum.com/api`). Set to `http://localhost:8000/api` for local rendering.
**S3 URL Structure**: `cards/cardset-{cardset_id:03d}/player-{player_id}/{batting|pitching}card.png?d={release_date}`
- Uses zero-padded 3-digit cardset ID for consistent sorting

View File

@ -1,5 +1,7 @@
import asyncio
import datetime
import functools
import os
import sys
import boto3
@ -14,6 +16,9 @@ HTML_CARDS = False # boolean to only check and not generate cards
SKIP_ARMS = False
SKIP_BATS = False
# Concurrency
CONCURRENCY = 8 # number of parallel card-processing tasks
# AWS Configuration
AWS_BUCKET_NAME = "paper-dynasty" # Change to your bucket name
AWS_REGION = "us-east-1" # Change to your region
@ -23,11 +28,11 @@ UPLOAD_TO_S3 = (
)
UPDATE_PLAYER_URLS = True # Set to False to skip player URL updates (testing) - STEP 6: Update player URLs
# Initialize S3 client
# Initialize S3 client (module-level; boto3 client is thread-safe for concurrent reads)
s3_client = boto3.client("s3", region_name=AWS_REGION) if UPLOAD_TO_S3 else None
async def fetch_card_image(session, card_url: str, timeout: int = 6) -> bytes:
async def fetch_card_image(session, card_url: str, timeout: int = 10) -> bytes:
"""
Fetch card image from URL and return raw bytes.
@ -131,168 +136,220 @@ async def main(args):
timestamp = int(now.timestamp())
release_date = f"{now.year}-{now.month}-{now.day}-{timestamp}"
# PD API base URL for card generation
PD_API_URL = "https://pd.manticorum.com/api"
# PD API base URL for card generation (override with PD_API_URL env var for local rendering)
PD_API_URL = os.environ.get("PD_API_URL", "https://pd.manticorum.com/api")
print(f"\nRelease date for cards: {release_date}")
print(f"API URL: {PD_API_URL}")
print(f"S3 Upload: {'ENABLED' if UPLOAD_TO_S3 else 'DISABLED'}")
print(f"URL Update: {'ENABLED' if UPDATE_PLAYER_URLS else 'DISABLED'}")
print(f"Concurrency: {CONCURRENCY} parallel tasks\n")
# Build filtered list respecting SKIP_ARMS, SKIP_BATS, START_ID, TEST_COUNT
max_count = TEST_COUNT if TEST_COUNT is not None else 9999
filtered_players = []
for x in all_players:
if len(filtered_players) >= max_count:
break
if "pitching" in x["image"] and SKIP_ARMS:
continue
if "batting" in x["image"] and SKIP_BATS:
continue
if START_ID is not None and START_ID > x["player_id"]:
continue
filtered_players.append(x)
total = len(filtered_players)
logger.info(f"Processing {total} cards with concurrency={CONCURRENCY}")
# Shared mutable state protected by locks
errors = []
successes = []
uploads = []
url_updates = []
cxn_error = False
count = -1
completed = 0
progress_lock = asyncio.Lock()
results_lock = asyncio.Lock()
start_time = datetime.datetime.now()
loop = asyncio.get_running_loop()
semaphore = asyncio.Semaphore(CONCURRENCY)
print(f"\nRelease date for cards: {release_date}")
print(f"S3 Upload: {'ENABLED' if UPLOAD_TO_S3 else 'DISABLED'}")
print(f"URL Update: {'ENABLED' if UPDATE_PLAYER_URLS else 'DISABLED'}\n")
async def report_progress():
"""Increment the completed counter and log/print every 20 completions."""
nonlocal completed
async with progress_lock:
completed += 1
if completed % 20 == 0 or completed == total:
print(f"Progress: {completed}/{total} cards processed")
logger.info(f"Progress: {completed}/{total} cards processed")
# Create persistent aiohttp session for all card fetches
async with aiohttp.ClientSession() as session:
for x in all_players:
if "pitching" in x["image"] and SKIP_ARMS:
pass
elif "batting" in x["image"] and SKIP_BATS:
pass
elif START_ID is not None and START_ID > x["player_id"]:
pass
elif "sombaseball" in x["image"]:
errors.append((x, f"Bad card url: {x['image']}"))
async def process_single_card(x: dict) -> None:
"""
Process one player entry under the semaphore: fetch card image(s), upload
to S3 (offloading the synchronous boto3 call to a thread pool), and
optionally patch the player record with the new S3 URL.
Both the primary card (image) and the secondary card for two-way players
(image2) are handled. Failures are appended to the shared errors list
rather than re-raised so the overall batch continues.
"""
async with semaphore:
player_id = x["player_id"]
# --- primary card ---
if "sombaseball" in x["image"]:
async with results_lock:
errors.append((x, f"Bad card url: {x['image']}"))
await report_progress()
return
card_type = "pitching" if "pitching" in x["image"] else "batting"
pd_card_url = (
f"{PD_API_URL}/v2/players/{player_id}/{card_type}card?d={release_date}"
)
if HTML_CARDS:
card_url = f"{pd_card_url}&html=true"
timeout = 2
else:
count += 1
if count % 20 == 0:
print(f"Card #{count + 1} being pulled is {x['p_name']}...")
elif TEST_COUNT is not None and TEST_COUNT < count:
print("Done test run")
break
card_url = pd_card_url
timeout = 10
# Determine card type from existing image URL
card_type = "pitching" if "pitching" in x["image"] else "batting"
# Generate card URL from PD API (forces fresh generation from database)
pd_card_url = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type}card?d={release_date}"
if HTML_CARDS:
card_url = f"{pd_card_url}&html=true"
timeout = 2
else:
card_url = pd_card_url
timeout = 6
try:
# Upload to S3 if enabled
if UPLOAD_TO_S3 and not HTML_CARDS:
# Fetch card image bytes directly
image_bytes = await fetch_card_image(
session, card_url, timeout=timeout
)
s3_url = upload_card_to_s3(
primary_ok = False
try:
if UPLOAD_TO_S3 and not HTML_CARDS:
image_bytes = await fetch_card_image(
session, card_url, timeout=timeout
)
# boto3 is synchronous — offload to thread pool so the event
# loop is not blocked during the S3 PUT
s3_url = await loop.run_in_executor(
None,
functools.partial(
upload_card_to_s3,
image_bytes,
x["player_id"],
player_id,
card_type,
release_date,
cardset["id"],
)
uploads.append((x["player_id"], card_type, s3_url))
),
)
async with results_lock:
uploads.append((player_id, card_type, s3_url))
if UPDATE_PLAYER_URLS:
await db_patch(
"players",
object_id=player_id,
params=[("image", s3_url)],
)
async with results_lock:
url_updates.append((player_id, card_type, s3_url))
logger.info(f"Updated player {player_id} image URL to S3")
else:
# Just validate card exists (old behavior)
logger.info("calling the card url")
await url_get(card_url, timeout=timeout)
primary_ok = True
except ConnectionError as e:
logger.error(f"Connection error for player {player_id}: {e}")
async with results_lock:
errors.append((x, e))
except ValueError as e:
async with results_lock:
errors.append((x, e))
except Exception as e:
logger.error(f"S3 upload/update failed for player {player_id}: {e}")
async with results_lock:
errors.append((x, f"S3 error: {e}"))
if not primary_ok:
await report_progress()
return
# --- secondary card (two-way players) ---
if x["image2"] is not None:
if "sombaseball" in x["image2"]:
async with results_lock:
errors.append((x, f"Bad card url: {x['image2']}"))
await report_progress()
return
card_type2 = "pitching" if "pitching" in x["image2"] else "batting"
pd_card_url2 = f"{PD_API_URL}/v2/players/{player_id}/{card_type2}card?d={release_date}"
if HTML_CARDS:
card_url2 = f"{pd_card_url2}&html=true"
else:
card_url2 = pd_card_url2
try:
if UPLOAD_TO_S3 and not HTML_CARDS:
image_bytes2 = await fetch_card_image(
session, card_url2, timeout=10
)
s3_url2 = await loop.run_in_executor(
None,
functools.partial(
upload_card_to_s3,
image_bytes2,
player_id,
card_type2,
release_date,
cardset["id"],
),
)
async with results_lock:
uploads.append((player_id, card_type2, s3_url2))
# Update player record with new S3 URL
if UPDATE_PLAYER_URLS:
await db_patch(
"players",
object_id=x["player_id"],
params=[("image", s3_url)],
)
url_updates.append((x["player_id"], card_type, s3_url))
logger.info(
f"Updated player {x['player_id']} image URL to S3"
params=[("image2", s3_url2)],
)
async with results_lock:
url_updates.append((player_id, card_type2, s3_url2))
logger.info(f"Updated player {player_id} image2 URL to S3")
else:
# Just validate card exists (old behavior)
logger.info("calling the card url")
resp = await url_get(card_url, timeout=timeout)
await url_get(card_url2, timeout=10)
async with results_lock:
successes.append(x)
except ConnectionError as e:
if cxn_error:
raise e
cxn_error = True
errors.append((x, e))
logger.error(f"Connection error for player {player_id} image2: {e}")
async with results_lock:
errors.append((x, e))
except ValueError as e:
errors.append((x, e))
async with results_lock:
errors.append((x, e))
except Exception as e:
logger.error(
f"S3 upload/update failed for player {x['player_id']}: {e}"
f"S3 upload/update failed for player {player_id} image2: {e}"
)
errors.append((x, f"S3 error: {e}"))
continue
async with results_lock:
errors.append((x, f"S3 error (image2): {e}"))
# Handle image2 (dual-position players)
if x["image2"] is not None:
# Determine second card type
card_type2 = "pitching" if "pitching" in x["image2"] else "batting"
# Generate card URL from PD API (forces fresh generation from database)
pd_card_url2 = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type2}card?d={release_date}"
if HTML_CARDS:
card_url2 = f"{pd_card_url2}&html=true"
else:
card_url2 = pd_card_url2
if "sombaseball" in x["image2"]:
errors.append((x, f"Bad card url: {x['image2']}"))
else:
try:
if UPLOAD_TO_S3 and not HTML_CARDS:
# Fetch second card image bytes directly from PD API
image_bytes2 = await fetch_card_image(
session, card_url2, timeout=6
)
s3_url2 = upload_card_to_s3(
image_bytes2,
x["player_id"],
card_type2,
release_date,
cardset["id"],
)
uploads.append((x["player_id"], card_type2, s3_url2))
# Update player record with new S3 URL for image2
if UPDATE_PLAYER_URLS:
await db_patch(
"players",
object_id=x["player_id"],
params=[("image2", s3_url2)],
)
url_updates.append(
(x["player_id"], card_type2, s3_url2)
)
logger.info(
f"Updated player {x['player_id']} image2 URL to S3"
)
else:
# Just validate card exists (old behavior)
resp = await url_get(card_url2, timeout=6)
successes.append(x)
except ConnectionError as e:
if cxn_error:
raise e
cxn_error = True
errors.append((x, e))
except ValueError as e:
errors.append((x, e))
except Exception as e:
logger.error(
f"S3 upload/update failed for player {x['player_id']} image2: {e}"
)
errors.append((x, f"S3 error (image2): {e}"))
else:
else:
async with results_lock:
successes.append(x)
await report_progress()
# Create persistent aiohttp session shared across all concurrent tasks
async with aiohttp.ClientSession() as session:
tasks = [process_single_card(x) for x in filtered_players]
await asyncio.gather(*tasks, return_exceptions=True)
# Print summary
print(f"\n{'=' * 60}")
print("SUMMARY")

View File

@ -404,17 +404,35 @@ pd-cards upload s3 --cardset <name> [OPTIONS]
| `--upload/--no-upload` | | `True` | Upload to S3 |
| `--update-urls/--no-update-urls` | | `True` | Update player URLs in database |
| `--dry-run` | `-n` | `False` | Preview without uploading |
| `--concurrency` | `-j` | `8` | Number of parallel uploads |
| `--api-url` | | `https://pd.manticorum.com/api` | API base URL for card rendering |
**Prerequisites:** AWS CLI configured with credentials (`~/.aws/credentials`)
**S3 URL Structure:** `cards/cardset-{id:03d}/player-{player_id}/{batting|pitching}card.png?d={date}`
**Local Rendering:** For high-concurrency local rendering, start the Paper Dynasty API server locally and point uploads at it:
```bash
# Terminal 1: Start local API server (from database repo)
cd /mnt/NV2/Development/paper-dynasty/database
DATABASE_TYPE=postgresql POSTGRES_HOST=10.10.0.42 POSTGRES_DB=paperdynasty_dev \
POSTGRES_USER=sba_admin POSTGRES_PASSWORD=<pw> POSTGRES_PORT=5432 \
API_TOKEN=Tp3aO3jhYve5NJF1IqOmJTmk \
uvicorn app.main:app --host 0.0.0.0 --port 8000
# Terminal 2: Upload with local rendering
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
```
**Examples:**
```bash
pd-cards upload s3 --cardset "2005 Live" --dry-run
pd-cards upload s3 --cardset "2005 Live" --limit 10
pd-cards upload s3 --cardset "2005 Live" --start-id 5000
pd-cards upload s3 --cardset "2005 Live" --skip-pitchers
pd-cards upload s3 --cardset "2005 Live" --concurrency 16
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
```
---

View File

@ -0,0 +1,356 @@
# Phase 0 — Render Pipeline Optimization: Project Plan
**Version:** 1.1
**Date:** 2026-03-13
**PRD Reference:** `docs/prd-evolution/02-architecture.md` § Card Render Pipeline Optimization, `13-implementation.md` § Phase 0
**Status:** Complete — deployed to dev (PR #94), client-side concurrent uploads merged via PR #28 (card-creation repo)
---
## Overview
Phase 0 is independent of Card Evolution and benefits all existing card workflows immediately. The goal is to reduce per-card render time and full cardset uploads significantly by eliminating browser spawn overhead, CDN dependencies, and sequential processing.
**Bottlenecks addressed:**
1. New Chromium process spawned per render request (~1.0-1.5s overhead)
2. Google Fonts CDN fetched over network on every render (~0.3-0.5s) — no persistent cache since browser is destroyed after each render
3. Upload pipeline is fully sequential — one card at a time, blocking S3 upload via synchronous boto3
**Results:**
| Metric | Before | Target | Actual |
|--------|--------|--------|--------|
| Per-card render (fresh) | ~2.0s (benchmark avg) | <1.0s | **~0.98s avg** (range 0.63-1.44s, **~51% reduction**) |
| Per-card render (cached) | N/A | — | **~0.1s** |
| External dependencies during render | Google Fonts CDN | None | **None** |
| Chromium processes per 800-card run | 800 | 1 | **1** |
| 800-card upload (sequential, estimated) | ~27 min | ~8-13 min | ~13 min (estimated at 0.98s/card) |
| 800-card upload (concurrent 8x, estimated) | N/A | ~2-4 min | ~2-3 min (estimated) |
**Benchmark details (7 fresh renders on dev, 2026-03-13):**
| Player | Type | Time |
|--------|------|------|
| Michael Young (12726) | Batting | 0.96s |
| Darin Erstad (12729) | Batting | 0.78s |
| Wilson Valdez (12746) | Batting | 1.44s |
| Player 12750 | Batting | 0.76s |
| Jarrod Washburn (12880) | Pitching | 0.63s |
| Ryan Drese (12879) | Pitching | 1.25s |
| Player 12890 | Pitching | 1.07s |
**Average: 0.98s** — meets the <1s target. Occasional spikes to ~1.4s from Chromium GC pressure. Pitching cards tend to render slightly faster due to less template data.
**Optimization breakdown:**
- Persistent browser (WP-02): eliminated ~1.0s spawn overhead
- Variable font deduplication (WP-01 fix): eliminated ~163KB redundant base64 parsing, saved ~0.4s
- Remaining ~0.98s is Playwright page creation, HTML parsing, and PNG screenshot — not reducible without GPU acceleration or a different rendering approach
---
## Work Packages (6 WPs)
### WP-00: Baseline Benchmarks
**Repo:** `database` + `card-creation`
**Complexity:** XS
**Dependencies:** None
Capture before-metrics so we can measure improvement.
#### Tasks
1. Time 10 sequential card renders via the API (curl with timing)
2. Time a small batch S3 upload (e.g., 20 cards) via `pd-cards upload`
3. Record results in a benchmark log
#### Tests
- [ ] Benchmark script or documented curl commands exist and are repeatable
#### Acceptance Criteria
1. Baseline numbers recorded for per-card render time
2. Baseline numbers recorded for batch upload time
3. Methodology is repeatable for post-optimization comparison
---
### WP-01: Self-Hosted Fonts
**Repo:** `database`
**Complexity:** S
**Dependencies:** None (can run in parallel with WP-02)
Replace Google Fonts CDN with locally embedded WOFF2 fonts. Eliminates ~0.3-0.5s network round-trip per render and removes external dependency.
#### Current State
- `storage/templates/player_card.html` lines 5-7: `<link>` tags to `fonts.googleapis.com`
- `storage/templates/style.html`: References `"Open Sans"` and `"Source Sans 3"` font-families
- Two fonts used: Open Sans (300, 400, 700) and Source Sans 3 (400, 700)
#### Implementation
1. Download WOFF2 files for both fonts (5 files total: Open Sans 300/400/700, Source Sans 3 400/700)
2. Base64-encode each WOFF2 file
3. Add `@font-face` declarations with base64 data URIs to `style.html`
4. Remove the three `<link>` tags from `player_card.html`
5. Visual diff: render the same card before/after and verify identical output
#### Files
- **Create:** `database/storage/fonts/` directory with raw WOFF2 files (source archive, not deployed)
- **Modify:** `database/storage/templates/style.html` — add `@font-face` declarations
- **Modify:** `database/storage/templates/player_card.html` — remove `<link>` tags (lines 5-7)
#### Tests
- [ ] Unit: `style.html` contains no `fonts.googleapis.com` references
- [ ] Unit: `player_card.html` contains no `<link>` to external font CDNs
- [ ] Unit: `@font-face` declarations present for all 5 font variants
- [ ] Visual: rendered card is pixel-identical to pre-change output (manual check)
#### Acceptance Criteria
1. No external network requests during card render
2. All 5 font weights render correctly
3. Card appearance unchanged
---
### WP-02: Persistent Browser Instance
**Repo:** `database`
**Complexity:** M
**Dependencies:** None (can run in parallel with WP-01)
Replace per-request Chromium launch/teardown with a persistent browser that lives for the lifetime of the API process. Eliminates ~1.0-1.5s spawn overhead per render.
#### Current State
- `app/routers_v2/players.py` lines 801-826: `async with async_playwright() as p:` block creates and destroys a browser per request
- No browser reuse, no connection pooling
#### Implementation
1. Add module-level `_browser` and `_playwright` globals to `players.py`
2. Implement `get_browser()` — lazy-init with `is_connected()` auto-reconnect
3. Implement `shutdown_browser()` — clean teardown for API shutdown
4. Replace the `async with async_playwright()` block with page-per-request pattern:
```python
browser = await get_browser()
page = await browser.new_page(viewport={"width": 1280, "height": 720})
try:
await page.set_content(html_string)
await page.screenshot(path=file_path, type="png", clip={...})
finally:
await page.close()
```
5. Ensure page is always closed in `finally` block to prevent memory leaks
#### Files
- **Modify:** `database/app/routers_v2/players.py` — persistent browser, page-per-request
#### Tests
- [ ] Unit: `get_browser()` returns a connected browser
- [ ] Unit: `get_browser()` returns same instance on second call
- [ ] Unit: `get_browser()` relaunches if browser disconnected
- [ ] Integration: render 10 cards sequentially, no browser leaks (page count returns to 0 between renders)
- [ ] Integration: concurrent renders (4 simultaneous requests) complete without errors
- [ ] Integration: `shutdown_browser()` cleanly closes browser and playwright
#### Acceptance Criteria
1. Only 1 Chromium process running regardless of render count
2. Page count returns to 0 between renders (no leaks)
3. Auto-reconnect works if browser crashes
4. ~~Per-card render time drops to ~1.0-1.5s~~ **Actual: ~0.98s avg fresh render (from ~2.0s baseline) — target met**
---
### WP-03: FastAPI Lifespan Hooks
**Repo:** `database`
**Complexity:** S
**Dependencies:** WP-02
Wire `get_browser()` and `shutdown_browser()` into FastAPI's lifespan so the browser warms up on startup and cleans up on shutdown.
#### Current State
- `app/main.py` line 54: plain `FastAPI(...)` constructor with no lifespan
- Only middleware is the DB session handler (lines 97-105)
#### Implementation
1. Add `@asynccontextmanager` lifespan function that calls `get_browser()` on startup and `shutdown_browser()` on shutdown
2. Pass `lifespan=lifespan` to `FastAPI()` constructor
3. Verify existing middleware is unaffected
#### Files
- **Modify:** `database/app/main.py` — add lifespan hook, pass to FastAPI constructor
- **Modify:** `database/app/routers_v2/players.py` — export `get_browser`/`shutdown_browser` (if not already importable)
#### Tests
- [ ] Integration: browser is connected immediately after API startup (before any render request)
- [ ] Integration: browser is closed after API shutdown (no orphan processes)
- [ ] Integration: existing DB middleware still functions correctly
- [ ] Integration: API health endpoint still responds
#### Acceptance Criteria
1. Browser pre-warmed on startup — first render request has no cold-start penalty
2. Clean shutdown — no orphan Chromium processes after API stop
3. No regression in existing API behavior
---
### WP-04: Concurrent Upload Pipeline
**Repo:** `card-creation`
**Complexity:** M
**Dependencies:** WP-02 (persistent browser must be deployed for concurrent renders to work)
Replace the sequential upload loop with semaphore-bounded `asyncio.gather` for parallel card fetching, rendering, and S3 upload.
#### Current State
- `pd_cards/core/upload.py` `upload_cards_to_s3()` (lines 109-333): sequential `for x in all_players:` loop
- `fetch_card_image` timeout hardcoded to 6s (line 28)
- `upload_card_to_s3()` uses synchronous `boto3.put_object` — blocks the event loop
- Single `aiohttp.ClientSession` is reused (good)
#### Implementation
1. Wrap per-card processing in an `async def process_card(player)` coroutine
2. Add `asyncio.Semaphore(concurrency)` guard (default concurrency=8)
3. Replace sequential loop with `asyncio.gather(*[process_card(p) for p in all_players], return_exceptions=True)`
4. Offload synchronous `upload_card_to_s3()` to thread pool via `asyncio.get_event_loop().run_in_executor(None, upload_card_to_s3, ...)`
5. Increase `fetch_card_image` timeout from 6s to 10s
6. Add error handling: individual card failures logged but don't abort the batch
7. Add progress reporting: log completion count every N cards (not every start)
8. Add `--concurrency` CLI argument to `pd-cards upload` command
#### Files
- **Modify:** `pd_cards/core/upload.py` — concurrent pipeline, timeout increase
- **Modify:** `pd_cards/cli/upload.py` (or wherever CLI args are defined) — add `--concurrency` flag
#### Tests
- [ ] Unit: semaphore limits concurrent tasks to specified count
- [ ] Unit: individual card failure doesn't abort batch (return_exceptions=True)
- [ ] Unit: progress logging fires at correct intervals
- [ ] Integration: 20-card concurrent upload completes successfully
- [ ] Integration: S3 URLs are correct after concurrent upload
- [ ] Integration: `--concurrency 1` behaves like sequential (regression safety)
#### Acceptance Criteria
1. Default concurrency of 8 parallel card processes
2. Individual failures logged, don't abort batch
3. `fetch_card_image` timeout is 10s
4. 800-card upload estimated at ~3-4 minutes with 8x concurrency (with WP-01 + WP-02 deployed)
5. `--concurrency` flag available on CLI
---
### WP-05: Legacy Upload Script Update
**Repo:** `card-creation`
**Complexity:** S
**Dependencies:** WP-04
Apply the same concurrency pattern to `check_cards_and_upload.py` for users who still use the legacy script.
#### Current State
- `check_cards_and_upload.py` lines 150-293: identical sequential pattern to `pd_cards/core/upload.py`
- Module-level boto3 client (line 27)
#### Implementation
1. Refactor the sequential loop to use `asyncio.gather` + `Semaphore` (same pattern as WP-04)
2. Offload synchronous S3 calls to thread pool
3. Increase fetch timeout to 10s
4. Add progress reporting
#### Files
- **Modify:** `check_cards_and_upload.py`
#### Tests
- [ ] Integration: legacy script uploads 10 cards concurrently without errors
- [ ] Integration: S3 URLs match expected format
#### Acceptance Criteria
1. Same concurrency behavior as WP-04
2. No regression in existing functionality
---
## WP Summary
| WP | Title | Repo | Size | Dependencies | Tests |
|----|-------|------|------|-------------|-------|
| WP-00 | Baseline Benchmarks | both | XS | — | 1 |
| WP-01 | Self-Hosted Fonts | database | S | — | 4 |
| WP-02 | Persistent Browser Instance | database | M | — | 6 |
| WP-03 | FastAPI Lifespan Hooks | database | S | WP-02 | 4 |
| WP-04 | Concurrent Upload Pipeline | card-creation | M | WP-02 | 6 |
| WP-05 | Legacy Upload Script Update | card-creation | S | WP-04 | 2 |
**Total: 6 WPs, ~23 tests**
---
## Dependency Graph
```
WP-00 (benchmarks)
|
v
WP-01 (fonts) ──────┐
├──> WP-03 (lifespan) ──> Deploy to dev ──> WP-04 (concurrent upload)
WP-02 (browser) ────┘ |
v
WP-05 (legacy script)
|
v
Re-run benchmarks
```
**Parallelization:**
- WP-00, WP-01, WP-02 can all start immediately in parallel
- WP-03 needs WP-02
- WP-04 needs WP-02 deployed (persistent browser must be running server-side for concurrent fetches to work)
- WP-05 needs WP-04 (reuse the pattern)
---
## Risks
| Risk | Likelihood | Impact | Mitigation |
|------|-----------|--------|------------|
| Base64-embedded fonts bloat template HTML | Medium | Low | WOFF2 files are small (~20-40KB each). Total ~150KB base64 added to template. Acceptable since template is loaded once into Playwright, not transmitted to clients. |
| Persistent browser memory leak | Medium | Medium | Always close pages in `finally` block. Monitor RSS after sustained renders. Add `is_connected()` check for crash recovery. |
| Concurrent renders overload API server | Low | High | Semaphore bounds concurrency. Start at 8, tune based on server RAM (~100MB per page). 8 pages = ~800MB, well within 16GB. |
| Synchronous boto3 blocks event loop under concurrency | Medium | Medium | Use `run_in_executor` to offload to thread pool. Consider `aioboto3` if thread pool proves insufficient. |
| Visual regression from font change | Low | High | Visual diff test before/after. Render same card with both approaches and compare pixel output. |
---
## Open Questions
None — Phase 0 is straightforward infrastructure optimization with no design decisions pending.
---
## Follow-On: Local High-Concurrency Rendering (2026-03-14)
After Phase 0 was deployed, a follow-on improvement was implemented: **configurable API URL** for card rendering. This enables running the Paper Dynasty API server locally on the workstation and pointing upload scripts at `localhost` for dramatically higher concurrency.
### Changes
- `pd_cards/core/upload.py``upload_cards_to_s3()`, `refresh_card_images()`, `check_card_images()` accept `api_url` parameter (defaults to production)
- `pd_cards/commands/upload.py``--api-url` CLI option on `upload s3` command
- `check_cards_and_upload.py``PD_API_URL` env var override (legacy script)
### Expected Performance
| Scenario | Per-card | 800 cards |
|----------|----------|-----------|
| Remote server, 8x concurrency (current) | ~0.98s render + network | ~2-3 min |
| Local server, 32x concurrency | ~0.98s render, 32 parallel | ~30-45 sec |
### Usage
```bash
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
```
---
## Notes
- Phase 0 is a prerequisite for Phase 4 (Animated Cosmetics) which needs the persistent browser for efficient multi-frame APNG capture
- The persistent browser also benefits Phase 2/3 variant rendering
- GPU acceleration was evaluated and rejected — see PRD `02-architecture.md` § Optimization 4
- Consider `aioboto3` as a future enhancement if `run_in_executor` thread pool becomes a bottleneck

View File

@ -5,6 +5,7 @@ Commands for uploading card images to AWS S3.
"""
import asyncio
import sys
from pathlib import Path
from typing import Optional
@ -40,14 +41,27 @@ def s3(
dry_run: bool = typer.Option(
False, "--dry-run", "-n", help="Preview without uploading"
),
concurrency: int = typer.Option(
8, "--concurrency", "-j", help="Number of parallel uploads (default: 8)"
),
api_url: str = typer.Option(
"https://pd.manticorum.com/api",
"--api-url",
help="API base URL for card rendering (use http://localhost:8000/api for local server)",
),
):
"""
Upload card images to AWS S3.
Fetches card images from Paper Dynasty API and uploads to S3 bucket.
Cards are processed concurrently; use --concurrency to tune parallelism.
For high-concurrency local rendering, start the API server locally and use:
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
Example:
pd-cards upload s3 --cardset "2005 Live" --limit 10
pd-cards upload s3 --cardset "2005 Live" --concurrency 16
"""
console.print()
console.print("=" * 70)
@ -65,8 +79,10 @@ def s3(
console.print("Skipping: Batting cards")
if skip_pitchers:
console.print("Skipping: Pitching cards")
console.print(f"API URL: {api_url}")
console.print(f"Upload to S3: {upload and not dry_run}")
console.print(f"Update URLs: {update_urls and not dry_run}")
console.print(f"Concurrency: {concurrency} parallel tasks")
console.print()
if dry_run:
@ -76,39 +92,53 @@ def s3(
raise typer.Exit(0)
try:
import sys
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
import check_cards_and_upload as ccu
from pd_cards.core.upload import upload_cards_to_s3
# Configure the module's globals
ccu.CARDSET_NAME = cardset
ccu.START_ID = start_id
ccu.TEST_COUNT = limit if limit else 9999
ccu.HTML_CARDS = html
ccu.SKIP_BATS = skip_batters
ccu.SKIP_ARMS = skip_pitchers
ccu.UPLOAD_TO_S3 = upload
ccu.UPDATE_PLAYER_URLS = update_urls
# Re-initialize S3 client if uploading
if upload:
import boto3
ccu.s3_client = boto3.client("s3", region_name=ccu.AWS_REGION)
else:
ccu.s3_client = None
def progress_callback(_count: int, label: str) -> None:
console.print(f" Progress: {label}")
console.print("[bold]Starting S3 upload...[/bold]")
console.print()
asyncio.run(ccu.main([]))
result = asyncio.run(
upload_cards_to_s3(
cardset_name=cardset,
start_id=start_id,
limit=limit,
html_cards=html,
skip_batters=skip_batters,
skip_pitchers=skip_pitchers,
upload=upload,
update_urls=update_urls,
on_progress=progress_callback,
concurrency=concurrency,
api_url=api_url,
)
)
success_count = len(result["successes"])
error_count = len(result["errors"])
upload_count = len(result["uploads"])
url_update_count = len(result["url_updates"])
console.print()
console.print("=" * 70)
console.print("[bold green]✓ S3 UPLOAD COMPLETE[/bold green]")
console.print("=" * 70)
console.print(f" Successes: {success_count}")
console.print(f" S3 uploads: {upload_count}")
console.print(f" URL updates: {url_update_count}")
if error_count:
console.print(f" [red]Errors: {error_count}[/red]")
for player, err in result["errors"][:10]:
console.print(
f" - player {player.get('player_id', '?')} "
f"({player.get('p_name', '?')}): {err}"
)
if error_count > 10:
console.print(f" ... and {error_count - 10} more (see logs)")
except ImportError as e:
console.print(f"[red]Error importing modules: {e}[/red]")

View File

@ -4,6 +4,7 @@ Card image upload and management core logic.
Business logic for uploading card images to AWS S3 and managing card URLs.
"""
import asyncio
import datetime
from typing import Optional
import urllib.parse
@ -25,7 +26,7 @@ def get_s3_base_url(
return f"https://{bucket}.s3.{region}.amazonaws.com"
async def fetch_card_image(session, card_url: str, timeout: int = 6) -> bytes:
async def fetch_card_image(session, card_url: str, timeout: int = 10) -> bytes:
"""
Fetch card image from URL and return raw bytes.
@ -106,6 +107,9 @@ def upload_card_to_s3(
raise
DEFAULT_PD_API_URL = "https://pd.manticorum.com/api"
async def upload_cards_to_s3(
cardset_name: str,
start_id: Optional[int] = None,
@ -118,9 +122,18 @@ async def upload_cards_to_s3(
bucket: str = DEFAULT_AWS_BUCKET,
region: str = DEFAULT_AWS_REGION,
on_progress: callable = None,
concurrency: int = 8,
api_url: str = DEFAULT_PD_API_URL,
) -> dict:
"""
Upload card images to S3 for a cardset.
Upload card images to S3 for a cardset using concurrent async tasks.
Cards are fetched and uploaded in parallel, bounded by ``concurrency``
semaphore slots. boto3 S3 calls (synchronous) are offloaded to a thread
pool via ``loop.run_in_executor`` so they do not block the event loop.
Individual card failures are collected and do NOT abort the batch;
a summary is logged once all tasks complete.
Args:
cardset_name: Name of the cardset to process
@ -134,6 +147,7 @@ async def upload_cards_to_s3(
bucket: S3 bucket name
region: AWS region
on_progress: Callback function for progress updates
concurrency: Number of parallel card-processing tasks (default 8)
Returns:
Dict with counts of errors, successes, uploads, url_updates
@ -165,163 +179,225 @@ async def upload_cards_to_s3(
timestamp = int(now.timestamp())
release_date = f"{now.year}-{now.month}-{now.day}-{timestamp}"
# PD API base URL for card generation
PD_API_URL = "https://pd.manticorum.com/api"
# PD API base URL for card generation (configurable for local rendering)
PD_API_URL = api_url
logger.info(f"Using API URL: {PD_API_URL}")
# Initialize S3 client if uploading
# Initialize S3 client if uploading (boto3 client is thread-safe for reads;
# we will call it from a thread pool so we create it once here)
s3_client = boto3.client("s3", region_name=region) if upload else None
# Build the filtered list of players to process, respecting start_id / limit
max_count = limit or 9999
filtered_players = []
for x in all_players:
if len(filtered_players) >= max_count:
break
if "pitching" in x["image"] and skip_pitchers:
continue
if "batting" in x["image"] and skip_batters:
continue
if start_id is not None and start_id > x["player_id"]:
continue
filtered_players.append(x)
total = len(filtered_players)
logger.info(f"Processing {total} cards with concurrency={concurrency}")
# Shared mutable state protected by a lock
errors = []
successes = []
uploads = []
url_updates = []
cxn_error = False
count = 0
max_count = limit or 9999
completed = 0
progress_lock = asyncio.Lock()
results_lock = asyncio.Lock()
async with aiohttp.ClientSession() as session:
for x in all_players:
# Apply filters
if "pitching" in x["image"] and skip_pitchers:
continue
if "batting" in x["image"] and skip_batters:
continue
if start_id is not None and start_id > x["player_id"]:
continue
loop = asyncio.get_running_loop()
semaphore = asyncio.Semaphore(concurrency)
async def report_progress():
"""Increment the completed counter and log every 20 completions."""
nonlocal completed
async with progress_lock:
completed += 1
if completed % 20 == 0 or completed == total:
logger.info(f"Progress: {completed}/{total} cards processed")
if on_progress:
on_progress(completed, f"{completed}/{total}")
async def process_single_card(x: dict) -> None:
"""
Process one player entry: fetch card image(s), upload to S3, and
optionally patch the player record with the new S3 URL.
Both the primary card (image) and the secondary card for two-way
players (image2) are handled here. Errors are appended to the
shared ``errors`` list rather than re-raised so the batch continues.
"""
async with semaphore:
player_id = x["player_id"]
# --- primary card ---
if "sombaseball" in x["image"]:
errors.append((x, f"Bad card url: {x['image']}"))
continue
if count >= max_count:
break
async with results_lock:
errors.append((x, f"Bad card url: {x['image']}"))
await report_progress()
return
count += 1
if on_progress and count % 20 == 0:
on_progress(count, x["p_name"])
# Determine card type from existing image URL
card_type = "pitching" if "pitching" in x["image"] else "batting"
# Generate card URL from PD API (forces fresh generation from database)
pd_card_url = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type}card?d={release_date}"
pd_card_url = (
f"{PD_API_URL}/v2/players/{player_id}/{card_type}card?d={release_date}"
)
if html_cards:
card_url = f"{pd_card_url}&html=true"
timeout = 2
else:
card_url = pd_card_url
timeout = 6
timeout = 10
primary_ok = False
try:
if upload and not html_cards:
# Fetch card image bytes directly
image_bytes = await fetch_card_image(
session, card_url, timeout=timeout
)
s3_url = upload_card_to_s3(
# boto3 is synchronous — offload to thread pool
s3_url = await loop.run_in_executor(
None,
upload_card_to_s3,
s3_client,
image_bytes,
x["player_id"],
player_id,
card_type,
release_date,
cardset["id"],
bucket,
region,
)
uploads.append((x["player_id"], card_type, s3_url))
async with results_lock:
uploads.append((player_id, card_type, s3_url))
# Update player record with new S3 URL
if update_urls:
await db_patch(
"players",
object_id=x["player_id"],
object_id=player_id,
params=[("image", s3_url)],
)
url_updates.append((x["player_id"], card_type, s3_url))
logger.info(f"Updated player {x['player_id']} image URL to S3")
async with results_lock:
url_updates.append((player_id, card_type, s3_url))
logger.info(f"Updated player {player_id} image URL to S3")
else:
# Just validate card exists
logger.info(f"Validating card URL: {card_url}")
await url_get(card_url, timeout=timeout)
primary_ok = True
except ConnectionError as e:
if cxn_error:
raise e
cxn_error = True
errors.append((x, e))
logger.error(f"Connection error for player {player_id}: {e}")
async with results_lock:
errors.append((x, e))
except ValueError as e:
errors.append((x, e))
async with results_lock:
errors.append((x, e))
except Exception as e:
logger.error(
f"S3 upload/update failed for player {x['player_id']}: {e}"
)
errors.append((x, f"S3 error: {e}"))
continue
logger.error(f"S3 upload/update failed for player {player_id}: {e}")
async with results_lock:
errors.append((x, f"S3 error: {e}"))
# Handle image2 (dual-position players)
if not primary_ok:
await report_progress()
return
# --- secondary card (two-way players) ---
if x["image2"] is not None:
card_type2 = "pitching" if "pitching" in x["image2"] else "batting"
pd_card_url2 = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type2}card?d={release_date}"
if html_cards:
card_url2 = f"{pd_card_url2}&html=true"
else:
card_url2 = pd_card_url2
if "sombaseball" in x["image2"]:
errors.append((x, f"Bad card url: {x['image2']}"))
else:
try:
if upload and not html_cards:
image_bytes2 = await fetch_card_image(
session, card_url2, timeout=6
)
s3_url2 = upload_card_to_s3(
s3_client,
image_bytes2,
x["player_id"],
card_type2,
release_date,
cardset["id"],
bucket,
region,
)
uploads.append((x["player_id"], card_type2, s3_url2))
async with results_lock:
errors.append((x, f"Bad card url: {x['image2']}"))
await report_progress()
return
if update_urls:
await db_patch(
"players",
object_id=x["player_id"],
params=[("image2", s3_url2)],
)
url_updates.append(
(x["player_id"], card_type2, s3_url2)
)
logger.info(
f"Updated player {x['player_id']} image2 URL to S3"
)
else:
await url_get(card_url2, timeout=6)
card_type2 = "pitching" if "pitching" in x["image2"] else "batting"
pd_card_url2 = f"{PD_API_URL}/v2/players/{player_id}/{card_type2}card?d={release_date}"
card_url2 = f"{pd_card_url2}&html=true" if html_cards else pd_card_url2
try:
if upload and not html_cards:
image_bytes2 = await fetch_card_image(
session, card_url2, timeout=10
)
s3_url2 = await loop.run_in_executor(
None,
upload_card_to_s3,
s3_client,
image_bytes2,
player_id,
card_type2,
release_date,
cardset["id"],
bucket,
region,
)
async with results_lock:
uploads.append((player_id, card_type2, s3_url2))
if update_urls:
await db_patch(
"players",
object_id=player_id,
params=[("image2", s3_url2)],
)
async with results_lock:
url_updates.append((player_id, card_type2, s3_url2))
logger.info(f"Updated player {player_id} image2 URL to S3")
else:
await url_get(card_url2, timeout=10)
async with results_lock:
successes.append(x)
except ConnectionError as e:
if cxn_error:
raise e
cxn_error = True
except ConnectionError as e:
logger.error(f"Connection error for player {player_id} image2: {e}")
async with results_lock:
errors.append((x, e))
except ValueError as e:
except ValueError as e:
async with results_lock:
errors.append((x, e))
except Exception as e:
logger.error(
f"S3 upload/update failed for player {x['player_id']} image2: {e}"
)
except Exception as e:
logger.error(
f"S3 upload/update failed for player {player_id} image2: {e}"
)
async with results_lock:
errors.append((x, f"S3 error (image2): {e}"))
else:
successes.append(x)
async with results_lock:
successes.append(x)
await report_progress()
async with aiohttp.ClientSession() as session:
tasks = [process_single_card(x) for x in filtered_players]
await asyncio.gather(*tasks, return_exceptions=True)
# Log final summary
success_count = len(successes)
error_count = len(errors)
logger.info(
f"Upload complete: {success_count} succeeded, {error_count} failed "
f"out of {total} cards"
)
if error_count:
for player, err in errors:
logger.warning(
f" Failed: player {player.get('player_id', '?')} "
f"({player.get('p_name', '?')}): {err}"
)
return {
"errors": errors,
@ -338,6 +414,7 @@ async def refresh_card_images(
limit: Optional[int] = None,
html_cards: bool = False,
on_progress: callable = None,
api_url: str = DEFAULT_PD_API_URL,
) -> dict:
"""
Refresh card images for a cardset by triggering regeneration.
@ -357,7 +434,7 @@ async def refresh_card_images(
raise ValueError(f'Cardset "{cardset_name}" not found')
cardset = c_query["cardsets"][0]
CARD_BASE_URL = "https://pd.manticorum.com/api/v2/players"
CARD_BASE_URL = f"{api_url}/v2/players"
# Get all players
p_query = await db_get(
@ -470,7 +547,10 @@ async def refresh_card_images(
async def check_card_images(
cardset_name: str, limit: Optional[int] = None, on_progress: callable = None
cardset_name: str,
limit: Optional[int] = None,
on_progress: callable = None,
api_url: str = DEFAULT_PD_API_URL,
) -> dict:
"""
Check and validate card images without uploading.
@ -506,7 +586,7 @@ async def check_card_images(
now = datetime.datetime.now()
timestamp = int(now.timestamp())
release_date = f"{now.year}-{now.month}-{now.day}-{timestamp}"
PD_API_URL = "https://pd.manticorum.com/api"
PD_API_URL = api_url
errors = []
successes = []

290
scripts/benchmark_render.sh Executable file
View File

@ -0,0 +1,290 @@
#!/bin/bash
# =============================================================================
# WP-00: Paper Dynasty Card Render & Upload Pipeline Benchmark
# Phase 0 - Render Pipeline Optimization
#
# Usage:
# ./scripts/benchmark_render.sh # Run full benchmark (dev API)
# ./scripts/benchmark_render.sh --prod # Run against production API
# ./scripts/benchmark_render.sh --quick # Connectivity check only
#
# Requirements: curl, bc
# =============================================================================
# --- Configuration -----------------------------------------------------------
DEV_API="https://pddev.manticorum.com/api"
PROD_API="https://pd.manticorum.com/api"
API_URL="$DEV_API"
# Player IDs in the 12000-13000 range (2005 Live cardset)
# Mix of batters and pitchers across different teams
PLAYER_IDS=(12785 12790 12800 12810 12820 12830 12840 12850 12860 12870)
RESULTS_FILE="$(dirname "$0")/benchmark_results.txt"
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
RUN_LABEL="benchmark-$(date +%s)"
# --- Argument parsing ---------------------------------------------------------
QUICK_MODE=false
for arg in "$@"; do
case "$arg" in
--prod) API_URL="$PROD_API" ;;
--quick) QUICK_MODE=true ;;
--help|-h)
echo "Usage: $0 [--prod] [--quick]"
echo " --prod Use production API instead of dev"
echo " --quick Connectivity check only (1 request)"
exit 0
;;
esac
done
# --- Helpers -----------------------------------------------------------------
hr() { printf '%0.s-' {1..72}; echo; }
# bc-based float arithmetic
fadd() { echo "$1 + $2" | bc -l; }
fdiv() { echo "scale=6; $1 / $2" | bc -l; }
flt() { echo "$1 < $2" | bc -l; } # returns 1 if true
fmt3() { printf "%.3f" "$1"; } # format to 3 decimal places
# Print and simultaneously append to results file
log() { echo "$@" | tee -a "$RESULTS_FILE"; }
# Single card render with timing; sets LAST_HTTP, LAST_TIME, LAST_SIZE
measure_card() {
local player_id="$1"
local card_type="${2:-batting}"
local cache_bust="${RUN_LABEL}-${player_id}"
local url="${API_URL}/v2/players/${player_id}/${card_type}card?d=${cache_bust}"
# -s silent, -o discard body, -w write timing vars separated by |
local result
result=$(curl -s -o /dev/null \
-w "%{http_code}|%{time_total}|%{time_connect}|%{time_starttransfer}|%{size_download}" \
--max-time 30 \
"$url" 2>&1)
LAST_HTTP=$(echo "$result" | cut -d'|' -f1)
LAST_TIME=$(echo "$result" | cut -d'|' -f2)
LAST_CONN=$(echo "$result" | cut -d'|' -f3)
LAST_TTFB=$(echo "$result" | cut -d'|' -f4)
LAST_SIZE=$(echo "$result" | cut -d'|' -f5)
LAST_URL="$url"
}
# =============================================================================
# START
# =============================================================================
# Truncate results file for this run and write header
cat > "$RESULTS_FILE" << EOF
Paper Dynasty Card Render Benchmark
Run timestamp : $TIMESTAMP
API target : $API_URL
Cache-bust tag: $RUN_LABEL
EOF
echo "" >> "$RESULTS_FILE"
echo ""
log "=============================================================="
log " Paper Dynasty Card Render Benchmark - WP-00 / Phase 0"
log " $(date '+%Y-%m-%d %H:%M:%S')"
log " API: $API_URL"
log "=============================================================="
echo ""
# =============================================================================
# SECTION 1: Connectivity Check
# =============================================================================
log "--- Section 1: Connectivity Check ---"
log ""
log "Sending single request to verify API is reachable..."
log " Player : 12785 (batting card)"
log " URL : ${API_URL}/v2/players/12785/battingcard?d=${RUN_LABEL}-probe"
echo ""
measure_card 12785 batting
if [ "$LAST_HTTP" = "200" ]; then
log " HTTP : $LAST_HTTP OK"
log " Total : $(fmt3 $LAST_TIME)s"
log " Connect: $(fmt3 $LAST_CONN)s"
log " TTFB : $(fmt3 $LAST_TTFB)s"
log " Size : ${LAST_SIZE} bytes ($(echo "scale=1; $LAST_SIZE/1024" | bc)KB)"
log ""
log " Connectivity: PASS"
elif [ -z "$LAST_HTTP" ] || [ "$LAST_HTTP" = "000" ]; then
log " ERROR: Could not reach $API_URL (no response / timeout)"
log " Aborting benchmark."
echo ""
exit 1
else
log " HTTP : $LAST_HTTP"
log " WARNING: Unexpected status code. Continuing anyway."
fi
echo ""
if [ "$QUICK_MODE" = true ]; then
log "Quick mode: exiting after connectivity check."
echo ""
exit 0
fi
# =============================================================================
# SECTION 2: Sequential Card Render Benchmark (10 cards)
# =============================================================================
log ""
hr
log "--- Section 2: Sequential Card Render Benchmark ---"
log ""
log "Rendering ${#PLAYER_IDS[@]} cards sequentially with fresh cache busts."
log "Each request forces a full server-side render (bypasses nginx cache)."
log ""
log "$(printf '%-8s %-10s %-10s %-10s %-10s %-8s' 'Player' 'HTTP' 'Total(s)' 'TTFB(s)' 'Connect(s)' 'Size(KB)')"
log "$(printf '%0.s-' {1..62})"
# Accumulators
total_time="0"
min_time=""
max_time=""
success_count=0
fail_count=0
all_times=()
for pid in "${PLAYER_IDS[@]}"; do
measure_card "$pid" batting
size_kb=$(echo "scale=1; $LAST_SIZE/1024" | bc)
row=$(printf '%-8s %-10s %-10s %-10s %-10s %-8s' \
"$pid" \
"$LAST_HTTP" \
"$(fmt3 $LAST_TIME)" \
"$(fmt3 $LAST_TTFB)" \
"$(fmt3 $LAST_CONN)" \
"$size_kb")
if [ "$LAST_HTTP" = "200" ]; then
log "$row"
total_time=$(fadd "$total_time" "$LAST_TIME")
all_times+=("$LAST_TIME")
success_count=$((success_count + 1))
# Track min
if [ -z "$min_time" ] || [ "$(flt $LAST_TIME $min_time)" = "1" ]; then
min_time="$LAST_TIME"
fi
# Track max
if [ -z "$max_time" ] || [ "$(flt $max_time $LAST_TIME)" = "1" ]; then
max_time="$LAST_TIME"
fi
else
log "$row << FAILED"
fail_count=$((fail_count + 1))
fi
done
echo ""
log ""
log "--- Section 2: Results Summary ---"
log ""
if [ "$success_count" -gt 0 ]; then
avg_time=$(fdiv "$total_time" "$success_count")
log " Cards requested : ${#PLAYER_IDS[@]}"
log " Successful : $success_count"
log " Failed : $fail_count"
log " Total wall time : $(fmt3 $total_time)s"
log " Average per card : $(fmt3 $avg_time)s"
log " Minimum : $(fmt3 $min_time)s"
log " Maximum : $(fmt3 $max_time)s"
log ""
# Rough throughput estimate (sequential)
cards_per_min=$(echo "scale=1; 60 / $avg_time" | bc)
log " Sequential throughput: ~${cards_per_min} cards/min"
# Estimate full cardset at ~500 players * 2 cards each = 1000 renders
est_1000=$(echo "scale=0; (1000 * $avg_time) / 1" | bc)
log " Est. full cardset (1000 renders, sequential): ~${est_1000}s (~$(echo "scale=1; $est_1000/60" | bc) min)"
else
log " No successful renders to summarize."
fi
# =============================================================================
# SECTION 3: Upload Pipeline Reference
# =============================================================================
echo ""
log ""
hr
log "--- Section 3: Upload Pipeline Benchmark Commands ---"
log ""
log "The upload pipeline (pd_cards/core/upload.py) fetches rendered PNG cards"
log "and uploads them to S3. It uses a persistent aiohttp session with a 6s"
log "timeout per card."
log ""
log "To time a dry-run batch of 20 cards:"
log ""
log " cd /mnt/NV2/Development/paper-dynasty/card-creation"
log " time pd-cards upload s3 --cardset \"2005 Live\" --limit 20 --dry-run"
log ""
log "To time a real upload batch of 20 cards (writes to S3, updates DB URLs):"
log ""
log " time pd-cards upload s3 --cardset \"2005 Live\" --limit 20"
log ""
log "Notes:"
log " - dry-run validates card URLs exist without uploading"
log " - Remove --limit for full cardset run"
log " - Pipeline is currently sequential (one card at a time per session)"
log " - Each card: fetch PNG (~2-4s render) + S3 put (~0.1-0.5s) = ~2.5-4.5s/card"
log " - Parallelism target (Phase 0 goal): 10-20 concurrent fetches via asyncio"
log ""
# =============================================================================
# SECTION 4: Before/After Comparison Template
# =============================================================================
echo ""
hr
log "--- Section 4: Before/After Comparison Template ---"
log ""
log "Fill in after optimization work is complete."
log ""
log " Metric Before After Delta"
log " $(printf '%0.s-' {1..64})"
if [ "$success_count" -gt 0 ]; then
log " Avg render time (s) $(fmt3 $avg_time) ___._____ ___._____"
log " Min render time (s) $(fmt3 $min_time) ___._____ ___._____"
log " Max render time (s) $(fmt3 $max_time) ___._____ ___._____"
log " Sequential cards/min ${cards_per_min} ___.___ ___.___"
else
log " Avg render time (s) (no data) ___._____ ___._____"
fi
log " Upload batch (20 cards) ___._____s ___._____s ___._____s"
log " Upload cards/min ___.___ ___.___ ___.___"
log " Full cardset time (est) ___._____min ___._____min ___ min saved"
log ""
# =============================================================================
# DONE
# =============================================================================
echo ""
hr
log "Benchmark complete."
log "Results saved to: $RESULTS_FILE"
log ""
# Voice notify
curl -s -X POST http://localhost:8888/notify \
-H 'Content-Type: application/json' \
-d "{\"message\":\"Benchmark complete. Average render time $(fmt3 ${avg_time:-0}) seconds per card\"}" \
> /dev/null 2>&1 || true

View File

@ -0,0 +1,93 @@
Paper Dynasty Card Render Benchmark
Run timestamp : 2026-03-12 23:40:54
API target : https://pddev.manticorum.com/api
Cache-bust tag: benchmark-1773376854
==============================================================
Paper Dynasty Card Render Benchmark - WP-00 / Phase 0
2026-03-12 23:40:54
API: https://pddev.manticorum.com/api
==============================================================
--- Section 1: Connectivity Check ---
Sending single request to verify API is reachable...
Player : 12785 (batting card)
URL : https://pddev.manticorum.com/api/v2/players/12785/battingcard?d=benchmark-1773376854-probe
HTTP : 200 OK
Total : 1.944s
Connect: 0.010s
TTFB : 1.933s
Size : 192175 bytes (187.6KB)
Connectivity: PASS
--- Section 2: Sequential Card Render Benchmark ---
Rendering 10 cards sequentially with fresh cache busts.
Each request forces a full server-side render (bypasses nginx cache).
Player HTTP Total(s) TTFB(s) Connect(s) Size(KB)
--------------------------------------------------------------
12785 200 0.056 0.046 0.008 187.6
12790 200 1.829 1.815 0.008 202.3
12800 200 2.106 2.096 0.008 192.4
12810 200 1.755 1.745 0.009 189.8
12820 200 2.041 2.030 0.009 193.1
12830 200 2.433 2.423 0.009 180.3
12840 200 2.518 2.507 0.009 202.3
12850 200 2.191 2.174 0.009 187.6
12860 200 2.478 2.469 0.009 190.4
12870 200 2.913 2.901 0.009 192.8
--- Section 2: Results Summary ---
Cards requested : 10
Successful : 10
Failed : 0
Total wall time : 20.321s
Average per card : 2.032s
Minimum : 0.056s
Maximum : 2.913s
Sequential throughput: ~29.5 cards/min
Est. full cardset (1000 renders, sequential): ~2032s (~33.8 min)
--- Section 3: Upload Pipeline Benchmark Commands ---
The upload pipeline (pd_cards/core/upload.py) fetches rendered PNG cards
and uploads them to S3. It uses a persistent aiohttp session with a 6s
timeout per card.
To time a dry-run batch of 20 cards:
cd /mnt/NV2/Development/paper-dynasty/card-creation
time pd-cards upload s3 --cardset "2005 Live" --limit 20 --dry-run
To time a real upload batch of 20 cards (writes to S3, updates DB URLs):
time pd-cards upload s3 --cardset "2005 Live" --limit 20
Notes:
- dry-run validates card URLs exist without uploading
- Remove --limit for full cardset run
- Pipeline is currently sequential (one card at a time per session)
- Each card: fetch PNG (~2-4s render) + S3 put (~0.1-0.5s) = ~2.5-4.5s/card
- Parallelism target (Phase 0 goal): 10-20 concurrent fetches via asyncio
--- Section 4: Before/After Comparison Template ---
Fill in after optimization work is complete.
Metric Before After Delta
----------------------------------------------------------------
Avg render time (s) 2.032 ___._____ ___._____
Min render time (s) 0.056 ___._____ ___._____
Max render time (s) 2.913 ___._____ ___._____
Sequential cards/min 29.5 ___.___ ___.___
Upload batch (20 cards) ___._____s ___._____s ___._____s
Upload cards/min ___.___ ___.___ ___.___
Full cardset time (est) ___._____min ___._____min ___ min saved
Benchmark complete.
Results saved to: scripts/benchmark_results.txt