Merge branch 'main' into fix/2-3-security-hardcoded-secrets
This commit is contained in:
commit
6c20f93901
@ -118,6 +118,9 @@ pd-cards scouting all && pd-cards scouting upload
|
||||
pd-cards upload s3 --cardset "2005 Live" --dry-run
|
||||
pd-cards upload s3 --cardset "2005 Live" --limit 10
|
||||
|
||||
# High-concurrency local rendering (start API server locally first)
|
||||
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
|
||||
|
||||
# Check cards without uploading
|
||||
pd-cards upload check --cardset "2005 Live" --limit 10
|
||||
|
||||
@ -263,6 +266,7 @@ Before running retrosheet_data.py, verify these configuration settings:
|
||||
- `UPDATE_PLAYER_URLS`: Enable/disable updating player records with S3 URLs (careful - modifies database)
|
||||
- `AWS_BUCKET_NAME`: S3 bucket name (default: 'paper-dynasty')
|
||||
- `AWS_REGION`: AWS region (default: 'us-east-1')
|
||||
- `PD_API_URL` (env var): Override the API base URL for card rendering (default: `https://pd.manticorum.com/api`). Set to `http://localhost:8000/api` for local rendering.
|
||||
|
||||
**S3 URL Structure**: `cards/cardset-{cardset_id:03d}/player-{player_id}/{batting|pitching}card.png?d={release_date}`
|
||||
- Uses zero-padded 3-digit cardset ID for consistent sorting
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
import asyncio
|
||||
import datetime
|
||||
import functools
|
||||
import os
|
||||
import sys
|
||||
import boto3
|
||||
|
||||
@ -14,6 +16,9 @@ HTML_CARDS = False # boolean to only check and not generate cards
|
||||
SKIP_ARMS = False
|
||||
SKIP_BATS = False
|
||||
|
||||
# Concurrency
|
||||
CONCURRENCY = 8 # number of parallel card-processing tasks
|
||||
|
||||
# AWS Configuration
|
||||
AWS_BUCKET_NAME = "paper-dynasty" # Change to your bucket name
|
||||
AWS_REGION = "us-east-1" # Change to your region
|
||||
@ -23,11 +28,11 @@ UPLOAD_TO_S3 = (
|
||||
)
|
||||
UPDATE_PLAYER_URLS = True # Set to False to skip player URL updates (testing) - STEP 6: Update player URLs
|
||||
|
||||
# Initialize S3 client
|
||||
# Initialize S3 client (module-level; boto3 client is thread-safe for concurrent reads)
|
||||
s3_client = boto3.client("s3", region_name=AWS_REGION) if UPLOAD_TO_S3 else None
|
||||
|
||||
|
||||
async def fetch_card_image(session, card_url: str, timeout: int = 6) -> bytes:
|
||||
async def fetch_card_image(session, card_url: str, timeout: int = 10) -> bytes:
|
||||
"""
|
||||
Fetch card image from URL and return raw bytes.
|
||||
|
||||
@ -131,168 +136,220 @@ async def main(args):
|
||||
timestamp = int(now.timestamp())
|
||||
release_date = f"{now.year}-{now.month}-{now.day}-{timestamp}"
|
||||
|
||||
# PD API base URL for card generation
|
||||
PD_API_URL = "https://pd.manticorum.com/api"
|
||||
# PD API base URL for card generation (override with PD_API_URL env var for local rendering)
|
||||
PD_API_URL = os.environ.get("PD_API_URL", "https://pd.manticorum.com/api")
|
||||
|
||||
print(f"\nRelease date for cards: {release_date}")
|
||||
print(f"API URL: {PD_API_URL}")
|
||||
print(f"S3 Upload: {'ENABLED' if UPLOAD_TO_S3 else 'DISABLED'}")
|
||||
print(f"URL Update: {'ENABLED' if UPDATE_PLAYER_URLS else 'DISABLED'}")
|
||||
print(f"Concurrency: {CONCURRENCY} parallel tasks\n")
|
||||
|
||||
# Build filtered list respecting SKIP_ARMS, SKIP_BATS, START_ID, TEST_COUNT
|
||||
max_count = TEST_COUNT if TEST_COUNT is not None else 9999
|
||||
filtered_players = []
|
||||
for x in all_players:
|
||||
if len(filtered_players) >= max_count:
|
||||
break
|
||||
if "pitching" in x["image"] and SKIP_ARMS:
|
||||
continue
|
||||
if "batting" in x["image"] and SKIP_BATS:
|
||||
continue
|
||||
if START_ID is not None and START_ID > x["player_id"]:
|
||||
continue
|
||||
filtered_players.append(x)
|
||||
|
||||
total = len(filtered_players)
|
||||
logger.info(f"Processing {total} cards with concurrency={CONCURRENCY}")
|
||||
|
||||
# Shared mutable state protected by locks
|
||||
errors = []
|
||||
successes = []
|
||||
uploads = []
|
||||
url_updates = []
|
||||
cxn_error = False
|
||||
count = -1
|
||||
completed = 0
|
||||
progress_lock = asyncio.Lock()
|
||||
results_lock = asyncio.Lock()
|
||||
|
||||
start_time = datetime.datetime.now()
|
||||
loop = asyncio.get_running_loop()
|
||||
semaphore = asyncio.Semaphore(CONCURRENCY)
|
||||
|
||||
print(f"\nRelease date for cards: {release_date}")
|
||||
print(f"S3 Upload: {'ENABLED' if UPLOAD_TO_S3 else 'DISABLED'}")
|
||||
print(f"URL Update: {'ENABLED' if UPDATE_PLAYER_URLS else 'DISABLED'}\n")
|
||||
async def report_progress():
|
||||
"""Increment the completed counter and log/print every 20 completions."""
|
||||
nonlocal completed
|
||||
async with progress_lock:
|
||||
completed += 1
|
||||
if completed % 20 == 0 or completed == total:
|
||||
print(f"Progress: {completed}/{total} cards processed")
|
||||
logger.info(f"Progress: {completed}/{total} cards processed")
|
||||
|
||||
# Create persistent aiohttp session for all card fetches
|
||||
async with aiohttp.ClientSession() as session:
|
||||
for x in all_players:
|
||||
if "pitching" in x["image"] and SKIP_ARMS:
|
||||
pass
|
||||
elif "batting" in x["image"] and SKIP_BATS:
|
||||
pass
|
||||
elif START_ID is not None and START_ID > x["player_id"]:
|
||||
pass
|
||||
elif "sombaseball" in x["image"]:
|
||||
errors.append((x, f"Bad card url: {x['image']}"))
|
||||
async def process_single_card(x: dict) -> None:
|
||||
"""
|
||||
Process one player entry under the semaphore: fetch card image(s), upload
|
||||
to S3 (offloading the synchronous boto3 call to a thread pool), and
|
||||
optionally patch the player record with the new S3 URL.
|
||||
|
||||
Both the primary card (image) and the secondary card for two-way players
|
||||
(image2) are handled. Failures are appended to the shared errors list
|
||||
rather than re-raised so the overall batch continues.
|
||||
"""
|
||||
async with semaphore:
|
||||
player_id = x["player_id"]
|
||||
|
||||
# --- primary card ---
|
||||
if "sombaseball" in x["image"]:
|
||||
async with results_lock:
|
||||
errors.append((x, f"Bad card url: {x['image']}"))
|
||||
await report_progress()
|
||||
return
|
||||
|
||||
card_type = "pitching" if "pitching" in x["image"] else "batting"
|
||||
pd_card_url = (
|
||||
f"{PD_API_URL}/v2/players/{player_id}/{card_type}card?d={release_date}"
|
||||
)
|
||||
|
||||
if HTML_CARDS:
|
||||
card_url = f"{pd_card_url}&html=true"
|
||||
timeout = 2
|
||||
else:
|
||||
count += 1
|
||||
if count % 20 == 0:
|
||||
print(f"Card #{count + 1} being pulled is {x['p_name']}...")
|
||||
elif TEST_COUNT is not None and TEST_COUNT < count:
|
||||
print("Done test run")
|
||||
break
|
||||
card_url = pd_card_url
|
||||
timeout = 10
|
||||
|
||||
# Determine card type from existing image URL
|
||||
card_type = "pitching" if "pitching" in x["image"] else "batting"
|
||||
|
||||
# Generate card URL from PD API (forces fresh generation from database)
|
||||
pd_card_url = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type}card?d={release_date}"
|
||||
|
||||
if HTML_CARDS:
|
||||
card_url = f"{pd_card_url}&html=true"
|
||||
timeout = 2
|
||||
else:
|
||||
card_url = pd_card_url
|
||||
timeout = 6
|
||||
|
||||
try:
|
||||
# Upload to S3 if enabled
|
||||
if UPLOAD_TO_S3 and not HTML_CARDS:
|
||||
# Fetch card image bytes directly
|
||||
image_bytes = await fetch_card_image(
|
||||
session, card_url, timeout=timeout
|
||||
)
|
||||
s3_url = upload_card_to_s3(
|
||||
primary_ok = False
|
||||
try:
|
||||
if UPLOAD_TO_S3 and not HTML_CARDS:
|
||||
image_bytes = await fetch_card_image(
|
||||
session, card_url, timeout=timeout
|
||||
)
|
||||
# boto3 is synchronous — offload to thread pool so the event
|
||||
# loop is not blocked during the S3 PUT
|
||||
s3_url = await loop.run_in_executor(
|
||||
None,
|
||||
functools.partial(
|
||||
upload_card_to_s3,
|
||||
image_bytes,
|
||||
x["player_id"],
|
||||
player_id,
|
||||
card_type,
|
||||
release_date,
|
||||
cardset["id"],
|
||||
)
|
||||
uploads.append((x["player_id"], card_type, s3_url))
|
||||
),
|
||||
)
|
||||
async with results_lock:
|
||||
uploads.append((player_id, card_type, s3_url))
|
||||
|
||||
if UPDATE_PLAYER_URLS:
|
||||
await db_patch(
|
||||
"players",
|
||||
object_id=player_id,
|
||||
params=[("image", s3_url)],
|
||||
)
|
||||
async with results_lock:
|
||||
url_updates.append((player_id, card_type, s3_url))
|
||||
logger.info(f"Updated player {player_id} image URL to S3")
|
||||
else:
|
||||
# Just validate card exists (old behavior)
|
||||
logger.info("calling the card url")
|
||||
await url_get(card_url, timeout=timeout)
|
||||
|
||||
primary_ok = True
|
||||
|
||||
except ConnectionError as e:
|
||||
logger.error(f"Connection error for player {player_id}: {e}")
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except ValueError as e:
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"S3 upload/update failed for player {player_id}: {e}")
|
||||
async with results_lock:
|
||||
errors.append((x, f"S3 error: {e}"))
|
||||
|
||||
if not primary_ok:
|
||||
await report_progress()
|
||||
return
|
||||
|
||||
# --- secondary card (two-way players) ---
|
||||
if x["image2"] is not None:
|
||||
if "sombaseball" in x["image2"]:
|
||||
async with results_lock:
|
||||
errors.append((x, f"Bad card url: {x['image2']}"))
|
||||
await report_progress()
|
||||
return
|
||||
|
||||
card_type2 = "pitching" if "pitching" in x["image2"] else "batting"
|
||||
pd_card_url2 = f"{PD_API_URL}/v2/players/{player_id}/{card_type2}card?d={release_date}"
|
||||
|
||||
if HTML_CARDS:
|
||||
card_url2 = f"{pd_card_url2}&html=true"
|
||||
else:
|
||||
card_url2 = pd_card_url2
|
||||
|
||||
try:
|
||||
if UPLOAD_TO_S3 and not HTML_CARDS:
|
||||
image_bytes2 = await fetch_card_image(
|
||||
session, card_url2, timeout=10
|
||||
)
|
||||
s3_url2 = await loop.run_in_executor(
|
||||
None,
|
||||
functools.partial(
|
||||
upload_card_to_s3,
|
||||
image_bytes2,
|
||||
player_id,
|
||||
card_type2,
|
||||
release_date,
|
||||
cardset["id"],
|
||||
),
|
||||
)
|
||||
async with results_lock:
|
||||
uploads.append((player_id, card_type2, s3_url2))
|
||||
|
||||
# Update player record with new S3 URL
|
||||
if UPDATE_PLAYER_URLS:
|
||||
await db_patch(
|
||||
"players",
|
||||
object_id=x["player_id"],
|
||||
params=[("image", s3_url)],
|
||||
)
|
||||
url_updates.append((x["player_id"], card_type, s3_url))
|
||||
logger.info(
|
||||
f"Updated player {x['player_id']} image URL to S3"
|
||||
params=[("image2", s3_url2)],
|
||||
)
|
||||
async with results_lock:
|
||||
url_updates.append((player_id, card_type2, s3_url2))
|
||||
logger.info(f"Updated player {player_id} image2 URL to S3")
|
||||
else:
|
||||
# Just validate card exists (old behavior)
|
||||
logger.info("calling the card url")
|
||||
resp = await url_get(card_url, timeout=timeout)
|
||||
await url_get(card_url2, timeout=10)
|
||||
|
||||
async with results_lock:
|
||||
successes.append(x)
|
||||
|
||||
except ConnectionError as e:
|
||||
if cxn_error:
|
||||
raise e
|
||||
cxn_error = True
|
||||
errors.append((x, e))
|
||||
logger.error(f"Connection error for player {player_id} image2: {e}")
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except ValueError as e:
|
||||
errors.append((x, e))
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"S3 upload/update failed for player {x['player_id']}: {e}"
|
||||
f"S3 upload/update failed for player {player_id} image2: {e}"
|
||||
)
|
||||
errors.append((x, f"S3 error: {e}"))
|
||||
continue
|
||||
async with results_lock:
|
||||
errors.append((x, f"S3 error (image2): {e}"))
|
||||
|
||||
# Handle image2 (dual-position players)
|
||||
if x["image2"] is not None:
|
||||
# Determine second card type
|
||||
card_type2 = "pitching" if "pitching" in x["image2"] else "batting"
|
||||
|
||||
# Generate card URL from PD API (forces fresh generation from database)
|
||||
pd_card_url2 = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type2}card?d={release_date}"
|
||||
|
||||
if HTML_CARDS:
|
||||
card_url2 = f"{pd_card_url2}&html=true"
|
||||
else:
|
||||
card_url2 = pd_card_url2
|
||||
|
||||
if "sombaseball" in x["image2"]:
|
||||
errors.append((x, f"Bad card url: {x['image2']}"))
|
||||
else:
|
||||
try:
|
||||
if UPLOAD_TO_S3 and not HTML_CARDS:
|
||||
# Fetch second card image bytes directly from PD API
|
||||
image_bytes2 = await fetch_card_image(
|
||||
session, card_url2, timeout=6
|
||||
)
|
||||
s3_url2 = upload_card_to_s3(
|
||||
image_bytes2,
|
||||
x["player_id"],
|
||||
card_type2,
|
||||
release_date,
|
||||
cardset["id"],
|
||||
)
|
||||
uploads.append((x["player_id"], card_type2, s3_url2))
|
||||
|
||||
# Update player record with new S3 URL for image2
|
||||
if UPDATE_PLAYER_URLS:
|
||||
await db_patch(
|
||||
"players",
|
||||
object_id=x["player_id"],
|
||||
params=[("image2", s3_url2)],
|
||||
)
|
||||
url_updates.append(
|
||||
(x["player_id"], card_type2, s3_url2)
|
||||
)
|
||||
logger.info(
|
||||
f"Updated player {x['player_id']} image2 URL to S3"
|
||||
)
|
||||
else:
|
||||
# Just validate card exists (old behavior)
|
||||
resp = await url_get(card_url2, timeout=6)
|
||||
|
||||
successes.append(x)
|
||||
|
||||
except ConnectionError as e:
|
||||
if cxn_error:
|
||||
raise e
|
||||
cxn_error = True
|
||||
errors.append((x, e))
|
||||
|
||||
except ValueError as e:
|
||||
errors.append((x, e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"S3 upload/update failed for player {x['player_id']} image2: {e}"
|
||||
)
|
||||
errors.append((x, f"S3 error (image2): {e}"))
|
||||
else:
|
||||
else:
|
||||
async with results_lock:
|
||||
successes.append(x)
|
||||
|
||||
await report_progress()
|
||||
|
||||
# Create persistent aiohttp session shared across all concurrent tasks
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tasks = [process_single_card(x) for x in filtered_players]
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'=' * 60}")
|
||||
print("SUMMARY")
|
||||
|
||||
@ -404,17 +404,35 @@ pd-cards upload s3 --cardset <name> [OPTIONS]
|
||||
| `--upload/--no-upload` | | `True` | Upload to S3 |
|
||||
| `--update-urls/--no-update-urls` | | `True` | Update player URLs in database |
|
||||
| `--dry-run` | `-n` | `False` | Preview without uploading |
|
||||
| `--concurrency` | `-j` | `8` | Number of parallel uploads |
|
||||
| `--api-url` | | `https://pd.manticorum.com/api` | API base URL for card rendering |
|
||||
|
||||
**Prerequisites:** AWS CLI configured with credentials (`~/.aws/credentials`)
|
||||
|
||||
**S3 URL Structure:** `cards/cardset-{id:03d}/player-{player_id}/{batting|pitching}card.png?d={date}`
|
||||
|
||||
**Local Rendering:** For high-concurrency local rendering, start the Paper Dynasty API server locally and point uploads at it:
|
||||
|
||||
```bash
|
||||
# Terminal 1: Start local API server (from database repo)
|
||||
cd /mnt/NV2/Development/paper-dynasty/database
|
||||
DATABASE_TYPE=postgresql POSTGRES_HOST=10.10.0.42 POSTGRES_DB=paperdynasty_dev \
|
||||
POSTGRES_USER=sba_admin POSTGRES_PASSWORD=<pw> POSTGRES_PORT=5432 \
|
||||
API_TOKEN=Tp3aO3jhYve5NJF1IqOmJTmk \
|
||||
uvicorn app.main:app --host 0.0.0.0 --port 8000
|
||||
|
||||
# Terminal 2: Upload with local rendering
|
||||
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
|
||||
```
|
||||
|
||||
**Examples:**
|
||||
```bash
|
||||
pd-cards upload s3 --cardset "2005 Live" --dry-run
|
||||
pd-cards upload s3 --cardset "2005 Live" --limit 10
|
||||
pd-cards upload s3 --cardset "2005 Live" --start-id 5000
|
||||
pd-cards upload s3 --cardset "2005 Live" --skip-pitchers
|
||||
pd-cards upload s3 --cardset "2005 Live" --concurrency 16
|
||||
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
356
docs/prd-evolution/PHASE0_PROJECT_PLAN.md
Normal file
356
docs/prd-evolution/PHASE0_PROJECT_PLAN.md
Normal file
@ -0,0 +1,356 @@
|
||||
# Phase 0 — Render Pipeline Optimization: Project Plan
|
||||
|
||||
**Version:** 1.1
|
||||
**Date:** 2026-03-13
|
||||
**PRD Reference:** `docs/prd-evolution/02-architecture.md` § Card Render Pipeline Optimization, `13-implementation.md` § Phase 0
|
||||
**Status:** Complete — deployed to dev (PR #94), client-side concurrent uploads merged via PR #28 (card-creation repo)
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Phase 0 is independent of Card Evolution and benefits all existing card workflows immediately. The goal is to reduce per-card render time and full cardset uploads significantly by eliminating browser spawn overhead, CDN dependencies, and sequential processing.
|
||||
|
||||
**Bottlenecks addressed:**
|
||||
1. New Chromium process spawned per render request (~1.0-1.5s overhead)
|
||||
2. Google Fonts CDN fetched over network on every render (~0.3-0.5s) — no persistent cache since browser is destroyed after each render
|
||||
3. Upload pipeline is fully sequential — one card at a time, blocking S3 upload via synchronous boto3
|
||||
|
||||
**Results:**
|
||||
|
||||
| Metric | Before | Target | Actual |
|
||||
|--------|--------|--------|--------|
|
||||
| Per-card render (fresh) | ~2.0s (benchmark avg) | <1.0s | **~0.98s avg** (range 0.63-1.44s, **~51% reduction**) |
|
||||
| Per-card render (cached) | N/A | — | **~0.1s** |
|
||||
| External dependencies during render | Google Fonts CDN | None | **None** |
|
||||
| Chromium processes per 800-card run | 800 | 1 | **1** |
|
||||
| 800-card upload (sequential, estimated) | ~27 min | ~8-13 min | ~13 min (estimated at 0.98s/card) |
|
||||
| 800-card upload (concurrent 8x, estimated) | N/A | ~2-4 min | ~2-3 min (estimated) |
|
||||
|
||||
**Benchmark details (7 fresh renders on dev, 2026-03-13):**
|
||||
|
||||
| Player | Type | Time |
|
||||
|--------|------|------|
|
||||
| Michael Young (12726) | Batting | 0.96s |
|
||||
| Darin Erstad (12729) | Batting | 0.78s |
|
||||
| Wilson Valdez (12746) | Batting | 1.44s |
|
||||
| Player 12750 | Batting | 0.76s |
|
||||
| Jarrod Washburn (12880) | Pitching | 0.63s |
|
||||
| Ryan Drese (12879) | Pitching | 1.25s |
|
||||
| Player 12890 | Pitching | 1.07s |
|
||||
|
||||
**Average: 0.98s** — meets the <1s target. Occasional spikes to ~1.4s from Chromium GC pressure. Pitching cards tend to render slightly faster due to less template data.
|
||||
|
||||
**Optimization breakdown:**
|
||||
- Persistent browser (WP-02): eliminated ~1.0s spawn overhead
|
||||
- Variable font deduplication (WP-01 fix): eliminated ~163KB redundant base64 parsing, saved ~0.4s
|
||||
- Remaining ~0.98s is Playwright page creation, HTML parsing, and PNG screenshot — not reducible without GPU acceleration or a different rendering approach
|
||||
|
||||
---
|
||||
|
||||
## Work Packages (6 WPs)
|
||||
|
||||
### WP-00: Baseline Benchmarks
|
||||
|
||||
**Repo:** `database` + `card-creation`
|
||||
**Complexity:** XS
|
||||
**Dependencies:** None
|
||||
|
||||
Capture before-metrics so we can measure improvement.
|
||||
|
||||
#### Tasks
|
||||
1. Time 10 sequential card renders via the API (curl with timing)
|
||||
2. Time a small batch S3 upload (e.g., 20 cards) via `pd-cards upload`
|
||||
3. Record results in a benchmark log
|
||||
|
||||
#### Tests
|
||||
- [ ] Benchmark script or documented curl commands exist and are repeatable
|
||||
|
||||
#### Acceptance Criteria
|
||||
1. Baseline numbers recorded for per-card render time
|
||||
2. Baseline numbers recorded for batch upload time
|
||||
3. Methodology is repeatable for post-optimization comparison
|
||||
|
||||
---
|
||||
|
||||
### WP-01: Self-Hosted Fonts
|
||||
|
||||
**Repo:** `database`
|
||||
**Complexity:** S
|
||||
**Dependencies:** None (can run in parallel with WP-02)
|
||||
|
||||
Replace Google Fonts CDN with locally embedded WOFF2 fonts. Eliminates ~0.3-0.5s network round-trip per render and removes external dependency.
|
||||
|
||||
#### Current State
|
||||
- `storage/templates/player_card.html` lines 5-7: `<link>` tags to `fonts.googleapis.com`
|
||||
- `storage/templates/style.html`: References `"Open Sans"` and `"Source Sans 3"` font-families
|
||||
- Two fonts used: Open Sans (300, 400, 700) and Source Sans 3 (400, 700)
|
||||
|
||||
#### Implementation
|
||||
1. Download WOFF2 files for both fonts (5 files total: Open Sans 300/400/700, Source Sans 3 400/700)
|
||||
2. Base64-encode each WOFF2 file
|
||||
3. Add `@font-face` declarations with base64 data URIs to `style.html`
|
||||
4. Remove the three `<link>` tags from `player_card.html`
|
||||
5. Visual diff: render the same card before/after and verify identical output
|
||||
|
||||
#### Files
|
||||
- **Create:** `database/storage/fonts/` directory with raw WOFF2 files (source archive, not deployed)
|
||||
- **Modify:** `database/storage/templates/style.html` — add `@font-face` declarations
|
||||
- **Modify:** `database/storage/templates/player_card.html` — remove `<link>` tags (lines 5-7)
|
||||
|
||||
#### Tests
|
||||
- [ ] Unit: `style.html` contains no `fonts.googleapis.com` references
|
||||
- [ ] Unit: `player_card.html` contains no `<link>` to external font CDNs
|
||||
- [ ] Unit: `@font-face` declarations present for all 5 font variants
|
||||
- [ ] Visual: rendered card is pixel-identical to pre-change output (manual check)
|
||||
|
||||
#### Acceptance Criteria
|
||||
1. No external network requests during card render
|
||||
2. All 5 font weights render correctly
|
||||
3. Card appearance unchanged
|
||||
|
||||
---
|
||||
|
||||
### WP-02: Persistent Browser Instance
|
||||
|
||||
**Repo:** `database`
|
||||
**Complexity:** M
|
||||
**Dependencies:** None (can run in parallel with WP-01)
|
||||
|
||||
Replace per-request Chromium launch/teardown with a persistent browser that lives for the lifetime of the API process. Eliminates ~1.0-1.5s spawn overhead per render.
|
||||
|
||||
#### Current State
|
||||
- `app/routers_v2/players.py` lines 801-826: `async with async_playwright() as p:` block creates and destroys a browser per request
|
||||
- No browser reuse, no connection pooling
|
||||
|
||||
#### Implementation
|
||||
1. Add module-level `_browser` and `_playwright` globals to `players.py`
|
||||
2. Implement `get_browser()` — lazy-init with `is_connected()` auto-reconnect
|
||||
3. Implement `shutdown_browser()` — clean teardown for API shutdown
|
||||
4. Replace the `async with async_playwright()` block with page-per-request pattern:
|
||||
```python
|
||||
browser = await get_browser()
|
||||
page = await browser.new_page(viewport={"width": 1280, "height": 720})
|
||||
try:
|
||||
await page.set_content(html_string)
|
||||
await page.screenshot(path=file_path, type="png", clip={...})
|
||||
finally:
|
||||
await page.close()
|
||||
```
|
||||
5. Ensure page is always closed in `finally` block to prevent memory leaks
|
||||
|
||||
#### Files
|
||||
- **Modify:** `database/app/routers_v2/players.py` — persistent browser, page-per-request
|
||||
|
||||
#### Tests
|
||||
- [ ] Unit: `get_browser()` returns a connected browser
|
||||
- [ ] Unit: `get_browser()` returns same instance on second call
|
||||
- [ ] Unit: `get_browser()` relaunches if browser disconnected
|
||||
- [ ] Integration: render 10 cards sequentially, no browser leaks (page count returns to 0 between renders)
|
||||
- [ ] Integration: concurrent renders (4 simultaneous requests) complete without errors
|
||||
- [ ] Integration: `shutdown_browser()` cleanly closes browser and playwright
|
||||
|
||||
#### Acceptance Criteria
|
||||
1. Only 1 Chromium process running regardless of render count
|
||||
2. Page count returns to 0 between renders (no leaks)
|
||||
3. Auto-reconnect works if browser crashes
|
||||
4. ~~Per-card render time drops to ~1.0-1.5s~~ **Actual: ~0.98s avg fresh render (from ~2.0s baseline) — target met**
|
||||
|
||||
---
|
||||
|
||||
### WP-03: FastAPI Lifespan Hooks
|
||||
|
||||
**Repo:** `database`
|
||||
**Complexity:** S
|
||||
**Dependencies:** WP-02
|
||||
|
||||
Wire `get_browser()` and `shutdown_browser()` into FastAPI's lifespan so the browser warms up on startup and cleans up on shutdown.
|
||||
|
||||
#### Current State
|
||||
- `app/main.py` line 54: plain `FastAPI(...)` constructor with no lifespan
|
||||
- Only middleware is the DB session handler (lines 97-105)
|
||||
|
||||
#### Implementation
|
||||
1. Add `@asynccontextmanager` lifespan function that calls `get_browser()` on startup and `shutdown_browser()` on shutdown
|
||||
2. Pass `lifespan=lifespan` to `FastAPI()` constructor
|
||||
3. Verify existing middleware is unaffected
|
||||
|
||||
#### Files
|
||||
- **Modify:** `database/app/main.py` — add lifespan hook, pass to FastAPI constructor
|
||||
- **Modify:** `database/app/routers_v2/players.py` — export `get_browser`/`shutdown_browser` (if not already importable)
|
||||
|
||||
#### Tests
|
||||
- [ ] Integration: browser is connected immediately after API startup (before any render request)
|
||||
- [ ] Integration: browser is closed after API shutdown (no orphan processes)
|
||||
- [ ] Integration: existing DB middleware still functions correctly
|
||||
- [ ] Integration: API health endpoint still responds
|
||||
|
||||
#### Acceptance Criteria
|
||||
1. Browser pre-warmed on startup — first render request has no cold-start penalty
|
||||
2. Clean shutdown — no orphan Chromium processes after API stop
|
||||
3. No regression in existing API behavior
|
||||
|
||||
---
|
||||
|
||||
### WP-04: Concurrent Upload Pipeline
|
||||
|
||||
**Repo:** `card-creation`
|
||||
**Complexity:** M
|
||||
**Dependencies:** WP-02 (persistent browser must be deployed for concurrent renders to work)
|
||||
|
||||
Replace the sequential upload loop with semaphore-bounded `asyncio.gather` for parallel card fetching, rendering, and S3 upload.
|
||||
|
||||
#### Current State
|
||||
- `pd_cards/core/upload.py` `upload_cards_to_s3()` (lines 109-333): sequential `for x in all_players:` loop
|
||||
- `fetch_card_image` timeout hardcoded to 6s (line 28)
|
||||
- `upload_card_to_s3()` uses synchronous `boto3.put_object` — blocks the event loop
|
||||
- Single `aiohttp.ClientSession` is reused (good)
|
||||
|
||||
#### Implementation
|
||||
1. Wrap per-card processing in an `async def process_card(player)` coroutine
|
||||
2. Add `asyncio.Semaphore(concurrency)` guard (default concurrency=8)
|
||||
3. Replace sequential loop with `asyncio.gather(*[process_card(p) for p in all_players], return_exceptions=True)`
|
||||
4. Offload synchronous `upload_card_to_s3()` to thread pool via `asyncio.get_event_loop().run_in_executor(None, upload_card_to_s3, ...)`
|
||||
5. Increase `fetch_card_image` timeout from 6s to 10s
|
||||
6. Add error handling: individual card failures logged but don't abort the batch
|
||||
7. Add progress reporting: log completion count every N cards (not every start)
|
||||
8. Add `--concurrency` CLI argument to `pd-cards upload` command
|
||||
|
||||
#### Files
|
||||
- **Modify:** `pd_cards/core/upload.py` — concurrent pipeline, timeout increase
|
||||
- **Modify:** `pd_cards/cli/upload.py` (or wherever CLI args are defined) — add `--concurrency` flag
|
||||
|
||||
#### Tests
|
||||
- [ ] Unit: semaphore limits concurrent tasks to specified count
|
||||
- [ ] Unit: individual card failure doesn't abort batch (return_exceptions=True)
|
||||
- [ ] Unit: progress logging fires at correct intervals
|
||||
- [ ] Integration: 20-card concurrent upload completes successfully
|
||||
- [ ] Integration: S3 URLs are correct after concurrent upload
|
||||
- [ ] Integration: `--concurrency 1` behaves like sequential (regression safety)
|
||||
|
||||
#### Acceptance Criteria
|
||||
1. Default concurrency of 8 parallel card processes
|
||||
2. Individual failures logged, don't abort batch
|
||||
3. `fetch_card_image` timeout is 10s
|
||||
4. 800-card upload estimated at ~3-4 minutes with 8x concurrency (with WP-01 + WP-02 deployed)
|
||||
5. `--concurrency` flag available on CLI
|
||||
|
||||
---
|
||||
|
||||
### WP-05: Legacy Upload Script Update
|
||||
|
||||
**Repo:** `card-creation`
|
||||
**Complexity:** S
|
||||
**Dependencies:** WP-04
|
||||
|
||||
Apply the same concurrency pattern to `check_cards_and_upload.py` for users who still use the legacy script.
|
||||
|
||||
#### Current State
|
||||
- `check_cards_and_upload.py` lines 150-293: identical sequential pattern to `pd_cards/core/upload.py`
|
||||
- Module-level boto3 client (line 27)
|
||||
|
||||
#### Implementation
|
||||
1. Refactor the sequential loop to use `asyncio.gather` + `Semaphore` (same pattern as WP-04)
|
||||
2. Offload synchronous S3 calls to thread pool
|
||||
3. Increase fetch timeout to 10s
|
||||
4. Add progress reporting
|
||||
|
||||
#### Files
|
||||
- **Modify:** `check_cards_and_upload.py`
|
||||
|
||||
#### Tests
|
||||
- [ ] Integration: legacy script uploads 10 cards concurrently without errors
|
||||
- [ ] Integration: S3 URLs match expected format
|
||||
|
||||
#### Acceptance Criteria
|
||||
1. Same concurrency behavior as WP-04
|
||||
2. No regression in existing functionality
|
||||
|
||||
---
|
||||
|
||||
## WP Summary
|
||||
|
||||
| WP | Title | Repo | Size | Dependencies | Tests |
|
||||
|----|-------|------|------|-------------|-------|
|
||||
| WP-00 | Baseline Benchmarks | both | XS | — | 1 |
|
||||
| WP-01 | Self-Hosted Fonts | database | S | — | 4 |
|
||||
| WP-02 | Persistent Browser Instance | database | M | — | 6 |
|
||||
| WP-03 | FastAPI Lifespan Hooks | database | S | WP-02 | 4 |
|
||||
| WP-04 | Concurrent Upload Pipeline | card-creation | M | WP-02 | 6 |
|
||||
| WP-05 | Legacy Upload Script Update | card-creation | S | WP-04 | 2 |
|
||||
|
||||
**Total: 6 WPs, ~23 tests**
|
||||
|
||||
---
|
||||
|
||||
## Dependency Graph
|
||||
|
||||
```
|
||||
WP-00 (benchmarks)
|
||||
|
|
||||
v
|
||||
WP-01 (fonts) ──────┐
|
||||
├──> WP-03 (lifespan) ──> Deploy to dev ──> WP-04 (concurrent upload)
|
||||
WP-02 (browser) ────┘ |
|
||||
v
|
||||
WP-05 (legacy script)
|
||||
|
|
||||
v
|
||||
Re-run benchmarks
|
||||
```
|
||||
|
||||
**Parallelization:**
|
||||
- WP-00, WP-01, WP-02 can all start immediately in parallel
|
||||
- WP-03 needs WP-02
|
||||
- WP-04 needs WP-02 deployed (persistent browser must be running server-side for concurrent fetches to work)
|
||||
- WP-05 needs WP-04 (reuse the pattern)
|
||||
|
||||
---
|
||||
|
||||
## Risks
|
||||
|
||||
| Risk | Likelihood | Impact | Mitigation |
|
||||
|------|-----------|--------|------------|
|
||||
| Base64-embedded fonts bloat template HTML | Medium | Low | WOFF2 files are small (~20-40KB each). Total ~150KB base64 added to template. Acceptable since template is loaded once into Playwright, not transmitted to clients. |
|
||||
| Persistent browser memory leak | Medium | Medium | Always close pages in `finally` block. Monitor RSS after sustained renders. Add `is_connected()` check for crash recovery. |
|
||||
| Concurrent renders overload API server | Low | High | Semaphore bounds concurrency. Start at 8, tune based on server RAM (~100MB per page). 8 pages = ~800MB, well within 16GB. |
|
||||
| Synchronous boto3 blocks event loop under concurrency | Medium | Medium | Use `run_in_executor` to offload to thread pool. Consider `aioboto3` if thread pool proves insufficient. |
|
||||
| Visual regression from font change | Low | High | Visual diff test before/after. Render same card with both approaches and compare pixel output. |
|
||||
|
||||
---
|
||||
|
||||
## Open Questions
|
||||
|
||||
None — Phase 0 is straightforward infrastructure optimization with no design decisions pending.
|
||||
|
||||
---
|
||||
|
||||
## Follow-On: Local High-Concurrency Rendering (2026-03-14)
|
||||
|
||||
After Phase 0 was deployed, a follow-on improvement was implemented: **configurable API URL** for card rendering. This enables running the Paper Dynasty API server locally on the workstation and pointing upload scripts at `localhost` for dramatically higher concurrency.
|
||||
|
||||
### Changes
|
||||
- `pd_cards/core/upload.py` — `upload_cards_to_s3()`, `refresh_card_images()`, `check_card_images()` accept `api_url` parameter (defaults to production)
|
||||
- `pd_cards/commands/upload.py` — `--api-url` CLI option on `upload s3` command
|
||||
- `check_cards_and_upload.py` — `PD_API_URL` env var override (legacy script)
|
||||
|
||||
### Expected Performance
|
||||
|
||||
| Scenario | Per-card | 800 cards |
|
||||
|----------|----------|-----------|
|
||||
| Remote server, 8x concurrency (current) | ~0.98s render + network | ~2-3 min |
|
||||
| Local server, 32x concurrency | ~0.98s render, 32 parallel | ~30-45 sec |
|
||||
|
||||
### Usage
|
||||
```bash
|
||||
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Notes
|
||||
|
||||
- Phase 0 is a prerequisite for Phase 4 (Animated Cosmetics) which needs the persistent browser for efficient multi-frame APNG capture
|
||||
- The persistent browser also benefits Phase 2/3 variant rendering
|
||||
- GPU acceleration was evaluated and rejected — see PRD `02-architecture.md` § Optimization 4
|
||||
- Consider `aioboto3` as a future enhancement if `run_in_executor` thread pool becomes a bottleneck
|
||||
@ -5,6 +5,7 @@ Commands for uploading card images to AWS S3.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
@ -40,14 +41,27 @@ def s3(
|
||||
dry_run: bool = typer.Option(
|
||||
False, "--dry-run", "-n", help="Preview without uploading"
|
||||
),
|
||||
concurrency: int = typer.Option(
|
||||
8, "--concurrency", "-j", help="Number of parallel uploads (default: 8)"
|
||||
),
|
||||
api_url: str = typer.Option(
|
||||
"https://pd.manticorum.com/api",
|
||||
"--api-url",
|
||||
help="API base URL for card rendering (use http://localhost:8000/api for local server)",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Upload card images to AWS S3.
|
||||
|
||||
Fetches card images from Paper Dynasty API and uploads to S3 bucket.
|
||||
Cards are processed concurrently; use --concurrency to tune parallelism.
|
||||
|
||||
For high-concurrency local rendering, start the API server locally and use:
|
||||
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
|
||||
|
||||
Example:
|
||||
pd-cards upload s3 --cardset "2005 Live" --limit 10
|
||||
pd-cards upload s3 --cardset "2005 Live" --concurrency 16
|
||||
"""
|
||||
console.print()
|
||||
console.print("=" * 70)
|
||||
@ -65,8 +79,10 @@ def s3(
|
||||
console.print("Skipping: Batting cards")
|
||||
if skip_pitchers:
|
||||
console.print("Skipping: Pitching cards")
|
||||
console.print(f"API URL: {api_url}")
|
||||
console.print(f"Upload to S3: {upload and not dry_run}")
|
||||
console.print(f"Update URLs: {update_urls and not dry_run}")
|
||||
console.print(f"Concurrency: {concurrency} parallel tasks")
|
||||
console.print()
|
||||
|
||||
if dry_run:
|
||||
@ -76,39 +92,53 @@ def s3(
|
||||
raise typer.Exit(0)
|
||||
|
||||
try:
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
import check_cards_and_upload as ccu
|
||||
from pd_cards.core.upload import upload_cards_to_s3
|
||||
|
||||
# Configure the module's globals
|
||||
ccu.CARDSET_NAME = cardset
|
||||
ccu.START_ID = start_id
|
||||
ccu.TEST_COUNT = limit if limit else 9999
|
||||
ccu.HTML_CARDS = html
|
||||
ccu.SKIP_BATS = skip_batters
|
||||
ccu.SKIP_ARMS = skip_pitchers
|
||||
ccu.UPLOAD_TO_S3 = upload
|
||||
ccu.UPDATE_PLAYER_URLS = update_urls
|
||||
|
||||
# Re-initialize S3 client if uploading
|
||||
if upload:
|
||||
import boto3
|
||||
|
||||
ccu.s3_client = boto3.client("s3", region_name=ccu.AWS_REGION)
|
||||
else:
|
||||
ccu.s3_client = None
|
||||
def progress_callback(_count: int, label: str) -> None:
|
||||
console.print(f" Progress: {label}")
|
||||
|
||||
console.print("[bold]Starting S3 upload...[/bold]")
|
||||
console.print()
|
||||
|
||||
asyncio.run(ccu.main([]))
|
||||
result = asyncio.run(
|
||||
upload_cards_to_s3(
|
||||
cardset_name=cardset,
|
||||
start_id=start_id,
|
||||
limit=limit,
|
||||
html_cards=html,
|
||||
skip_batters=skip_batters,
|
||||
skip_pitchers=skip_pitchers,
|
||||
upload=upload,
|
||||
update_urls=update_urls,
|
||||
on_progress=progress_callback,
|
||||
concurrency=concurrency,
|
||||
api_url=api_url,
|
||||
)
|
||||
)
|
||||
|
||||
success_count = len(result["successes"])
|
||||
error_count = len(result["errors"])
|
||||
upload_count = len(result["uploads"])
|
||||
url_update_count = len(result["url_updates"])
|
||||
|
||||
console.print()
|
||||
console.print("=" * 70)
|
||||
console.print("[bold green]✓ S3 UPLOAD COMPLETE[/bold green]")
|
||||
console.print("=" * 70)
|
||||
console.print(f" Successes: {success_count}")
|
||||
console.print(f" S3 uploads: {upload_count}")
|
||||
console.print(f" URL updates: {url_update_count}")
|
||||
if error_count:
|
||||
console.print(f" [red]Errors: {error_count}[/red]")
|
||||
for player, err in result["errors"][:10]:
|
||||
console.print(
|
||||
f" - player {player.get('player_id', '?')} "
|
||||
f"({player.get('p_name', '?')}): {err}"
|
||||
)
|
||||
if error_count > 10:
|
||||
console.print(f" ... and {error_count - 10} more (see logs)")
|
||||
|
||||
except ImportError as e:
|
||||
console.print(f"[red]Error importing modules: {e}[/red]")
|
||||
|
||||
@ -4,6 +4,7 @@ Card image upload and management core logic.
|
||||
Business logic for uploading card images to AWS S3 and managing card URLs.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import datetime
|
||||
from typing import Optional
|
||||
import urllib.parse
|
||||
@ -25,7 +26,7 @@ def get_s3_base_url(
|
||||
return f"https://{bucket}.s3.{region}.amazonaws.com"
|
||||
|
||||
|
||||
async def fetch_card_image(session, card_url: str, timeout: int = 6) -> bytes:
|
||||
async def fetch_card_image(session, card_url: str, timeout: int = 10) -> bytes:
|
||||
"""
|
||||
Fetch card image from URL and return raw bytes.
|
||||
|
||||
@ -106,6 +107,9 @@ def upload_card_to_s3(
|
||||
raise
|
||||
|
||||
|
||||
DEFAULT_PD_API_URL = "https://pd.manticorum.com/api"
|
||||
|
||||
|
||||
async def upload_cards_to_s3(
|
||||
cardset_name: str,
|
||||
start_id: Optional[int] = None,
|
||||
@ -118,9 +122,18 @@ async def upload_cards_to_s3(
|
||||
bucket: str = DEFAULT_AWS_BUCKET,
|
||||
region: str = DEFAULT_AWS_REGION,
|
||||
on_progress: callable = None,
|
||||
concurrency: int = 8,
|
||||
api_url: str = DEFAULT_PD_API_URL,
|
||||
) -> dict:
|
||||
"""
|
||||
Upload card images to S3 for a cardset.
|
||||
Upload card images to S3 for a cardset using concurrent async tasks.
|
||||
|
||||
Cards are fetched and uploaded in parallel, bounded by ``concurrency``
|
||||
semaphore slots. boto3 S3 calls (synchronous) are offloaded to a thread
|
||||
pool via ``loop.run_in_executor`` so they do not block the event loop.
|
||||
|
||||
Individual card failures are collected and do NOT abort the batch;
|
||||
a summary is logged once all tasks complete.
|
||||
|
||||
Args:
|
||||
cardset_name: Name of the cardset to process
|
||||
@ -134,6 +147,7 @@ async def upload_cards_to_s3(
|
||||
bucket: S3 bucket name
|
||||
region: AWS region
|
||||
on_progress: Callback function for progress updates
|
||||
concurrency: Number of parallel card-processing tasks (default 8)
|
||||
|
||||
Returns:
|
||||
Dict with counts of errors, successes, uploads, url_updates
|
||||
@ -165,163 +179,225 @@ async def upload_cards_to_s3(
|
||||
timestamp = int(now.timestamp())
|
||||
release_date = f"{now.year}-{now.month}-{now.day}-{timestamp}"
|
||||
|
||||
# PD API base URL for card generation
|
||||
PD_API_URL = "https://pd.manticorum.com/api"
|
||||
# PD API base URL for card generation (configurable for local rendering)
|
||||
PD_API_URL = api_url
|
||||
logger.info(f"Using API URL: {PD_API_URL}")
|
||||
|
||||
# Initialize S3 client if uploading
|
||||
# Initialize S3 client if uploading (boto3 client is thread-safe for reads;
|
||||
# we will call it from a thread pool so we create it once here)
|
||||
s3_client = boto3.client("s3", region_name=region) if upload else None
|
||||
|
||||
# Build the filtered list of players to process, respecting start_id / limit
|
||||
max_count = limit or 9999
|
||||
filtered_players = []
|
||||
for x in all_players:
|
||||
if len(filtered_players) >= max_count:
|
||||
break
|
||||
if "pitching" in x["image"] and skip_pitchers:
|
||||
continue
|
||||
if "batting" in x["image"] and skip_batters:
|
||||
continue
|
||||
if start_id is not None and start_id > x["player_id"]:
|
||||
continue
|
||||
filtered_players.append(x)
|
||||
|
||||
total = len(filtered_players)
|
||||
logger.info(f"Processing {total} cards with concurrency={concurrency}")
|
||||
|
||||
# Shared mutable state protected by a lock
|
||||
errors = []
|
||||
successes = []
|
||||
uploads = []
|
||||
url_updates = []
|
||||
cxn_error = False
|
||||
count = 0
|
||||
max_count = limit or 9999
|
||||
completed = 0
|
||||
progress_lock = asyncio.Lock()
|
||||
results_lock = asyncio.Lock()
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
for x in all_players:
|
||||
# Apply filters
|
||||
if "pitching" in x["image"] and skip_pitchers:
|
||||
continue
|
||||
if "batting" in x["image"] and skip_batters:
|
||||
continue
|
||||
if start_id is not None and start_id > x["player_id"]:
|
||||
continue
|
||||
loop = asyncio.get_running_loop()
|
||||
semaphore = asyncio.Semaphore(concurrency)
|
||||
|
||||
async def report_progress():
|
||||
"""Increment the completed counter and log every 20 completions."""
|
||||
nonlocal completed
|
||||
async with progress_lock:
|
||||
completed += 1
|
||||
if completed % 20 == 0 or completed == total:
|
||||
logger.info(f"Progress: {completed}/{total} cards processed")
|
||||
if on_progress:
|
||||
on_progress(completed, f"{completed}/{total}")
|
||||
|
||||
async def process_single_card(x: dict) -> None:
|
||||
"""
|
||||
Process one player entry: fetch card image(s), upload to S3, and
|
||||
optionally patch the player record with the new S3 URL.
|
||||
|
||||
Both the primary card (image) and the secondary card for two-way
|
||||
players (image2) are handled here. Errors are appended to the
|
||||
shared ``errors`` list rather than re-raised so the batch continues.
|
||||
"""
|
||||
async with semaphore:
|
||||
player_id = x["player_id"]
|
||||
|
||||
# --- primary card ---
|
||||
if "sombaseball" in x["image"]:
|
||||
errors.append((x, f"Bad card url: {x['image']}"))
|
||||
continue
|
||||
if count >= max_count:
|
||||
break
|
||||
async with results_lock:
|
||||
errors.append((x, f"Bad card url: {x['image']}"))
|
||||
await report_progress()
|
||||
return
|
||||
|
||||
count += 1
|
||||
if on_progress and count % 20 == 0:
|
||||
on_progress(count, x["p_name"])
|
||||
|
||||
# Determine card type from existing image URL
|
||||
card_type = "pitching" if "pitching" in x["image"] else "batting"
|
||||
|
||||
# Generate card URL from PD API (forces fresh generation from database)
|
||||
pd_card_url = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type}card?d={release_date}"
|
||||
pd_card_url = (
|
||||
f"{PD_API_URL}/v2/players/{player_id}/{card_type}card?d={release_date}"
|
||||
)
|
||||
|
||||
if html_cards:
|
||||
card_url = f"{pd_card_url}&html=true"
|
||||
timeout = 2
|
||||
else:
|
||||
card_url = pd_card_url
|
||||
timeout = 6
|
||||
timeout = 10
|
||||
|
||||
primary_ok = False
|
||||
try:
|
||||
if upload and not html_cards:
|
||||
# Fetch card image bytes directly
|
||||
image_bytes = await fetch_card_image(
|
||||
session, card_url, timeout=timeout
|
||||
)
|
||||
s3_url = upload_card_to_s3(
|
||||
# boto3 is synchronous — offload to thread pool
|
||||
s3_url = await loop.run_in_executor(
|
||||
None,
|
||||
upload_card_to_s3,
|
||||
s3_client,
|
||||
image_bytes,
|
||||
x["player_id"],
|
||||
player_id,
|
||||
card_type,
|
||||
release_date,
|
||||
cardset["id"],
|
||||
bucket,
|
||||
region,
|
||||
)
|
||||
uploads.append((x["player_id"], card_type, s3_url))
|
||||
async with results_lock:
|
||||
uploads.append((player_id, card_type, s3_url))
|
||||
|
||||
# Update player record with new S3 URL
|
||||
if update_urls:
|
||||
await db_patch(
|
||||
"players",
|
||||
object_id=x["player_id"],
|
||||
object_id=player_id,
|
||||
params=[("image", s3_url)],
|
||||
)
|
||||
url_updates.append((x["player_id"], card_type, s3_url))
|
||||
logger.info(f"Updated player {x['player_id']} image URL to S3")
|
||||
async with results_lock:
|
||||
url_updates.append((player_id, card_type, s3_url))
|
||||
logger.info(f"Updated player {player_id} image URL to S3")
|
||||
else:
|
||||
# Just validate card exists
|
||||
logger.info(f"Validating card URL: {card_url}")
|
||||
await url_get(card_url, timeout=timeout)
|
||||
|
||||
primary_ok = True
|
||||
|
||||
except ConnectionError as e:
|
||||
if cxn_error:
|
||||
raise e
|
||||
cxn_error = True
|
||||
errors.append((x, e))
|
||||
logger.error(f"Connection error for player {player_id}: {e}")
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except ValueError as e:
|
||||
errors.append((x, e))
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"S3 upload/update failed for player {x['player_id']}: {e}"
|
||||
)
|
||||
errors.append((x, f"S3 error: {e}"))
|
||||
continue
|
||||
logger.error(f"S3 upload/update failed for player {player_id}: {e}")
|
||||
async with results_lock:
|
||||
errors.append((x, f"S3 error: {e}"))
|
||||
|
||||
# Handle image2 (dual-position players)
|
||||
if not primary_ok:
|
||||
await report_progress()
|
||||
return
|
||||
|
||||
# --- secondary card (two-way players) ---
|
||||
if x["image2"] is not None:
|
||||
card_type2 = "pitching" if "pitching" in x["image2"] else "batting"
|
||||
pd_card_url2 = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type2}card?d={release_date}"
|
||||
|
||||
if html_cards:
|
||||
card_url2 = f"{pd_card_url2}&html=true"
|
||||
else:
|
||||
card_url2 = pd_card_url2
|
||||
|
||||
if "sombaseball" in x["image2"]:
|
||||
errors.append((x, f"Bad card url: {x['image2']}"))
|
||||
else:
|
||||
try:
|
||||
if upload and not html_cards:
|
||||
image_bytes2 = await fetch_card_image(
|
||||
session, card_url2, timeout=6
|
||||
)
|
||||
s3_url2 = upload_card_to_s3(
|
||||
s3_client,
|
||||
image_bytes2,
|
||||
x["player_id"],
|
||||
card_type2,
|
||||
release_date,
|
||||
cardset["id"],
|
||||
bucket,
|
||||
region,
|
||||
)
|
||||
uploads.append((x["player_id"], card_type2, s3_url2))
|
||||
async with results_lock:
|
||||
errors.append((x, f"Bad card url: {x['image2']}"))
|
||||
await report_progress()
|
||||
return
|
||||
|
||||
if update_urls:
|
||||
await db_patch(
|
||||
"players",
|
||||
object_id=x["player_id"],
|
||||
params=[("image2", s3_url2)],
|
||||
)
|
||||
url_updates.append(
|
||||
(x["player_id"], card_type2, s3_url2)
|
||||
)
|
||||
logger.info(
|
||||
f"Updated player {x['player_id']} image2 URL to S3"
|
||||
)
|
||||
else:
|
||||
await url_get(card_url2, timeout=6)
|
||||
card_type2 = "pitching" if "pitching" in x["image2"] else "batting"
|
||||
pd_card_url2 = f"{PD_API_URL}/v2/players/{player_id}/{card_type2}card?d={release_date}"
|
||||
card_url2 = f"{pd_card_url2}&html=true" if html_cards else pd_card_url2
|
||||
|
||||
try:
|
||||
if upload and not html_cards:
|
||||
image_bytes2 = await fetch_card_image(
|
||||
session, card_url2, timeout=10
|
||||
)
|
||||
s3_url2 = await loop.run_in_executor(
|
||||
None,
|
||||
upload_card_to_s3,
|
||||
s3_client,
|
||||
image_bytes2,
|
||||
player_id,
|
||||
card_type2,
|
||||
release_date,
|
||||
cardset["id"],
|
||||
bucket,
|
||||
region,
|
||||
)
|
||||
async with results_lock:
|
||||
uploads.append((player_id, card_type2, s3_url2))
|
||||
|
||||
if update_urls:
|
||||
await db_patch(
|
||||
"players",
|
||||
object_id=player_id,
|
||||
params=[("image2", s3_url2)],
|
||||
)
|
||||
async with results_lock:
|
||||
url_updates.append((player_id, card_type2, s3_url2))
|
||||
logger.info(f"Updated player {player_id} image2 URL to S3")
|
||||
else:
|
||||
await url_get(card_url2, timeout=10)
|
||||
|
||||
async with results_lock:
|
||||
successes.append(x)
|
||||
|
||||
except ConnectionError as e:
|
||||
if cxn_error:
|
||||
raise e
|
||||
cxn_error = True
|
||||
except ConnectionError as e:
|
||||
logger.error(f"Connection error for player {player_id} image2: {e}")
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except ValueError as e:
|
||||
except ValueError as e:
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"S3 upload/update failed for player {x['player_id']} image2: {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"S3 upload/update failed for player {player_id} image2: {e}"
|
||||
)
|
||||
async with results_lock:
|
||||
errors.append((x, f"S3 error (image2): {e}"))
|
||||
|
||||
else:
|
||||
successes.append(x)
|
||||
async with results_lock:
|
||||
successes.append(x)
|
||||
|
||||
await report_progress()
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tasks = [process_single_card(x) for x in filtered_players]
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Log final summary
|
||||
success_count = len(successes)
|
||||
error_count = len(errors)
|
||||
logger.info(
|
||||
f"Upload complete: {success_count} succeeded, {error_count} failed "
|
||||
f"out of {total} cards"
|
||||
)
|
||||
if error_count:
|
||||
for player, err in errors:
|
||||
logger.warning(
|
||||
f" Failed: player {player.get('player_id', '?')} "
|
||||
f"({player.get('p_name', '?')}): {err}"
|
||||
)
|
||||
|
||||
return {
|
||||
"errors": errors,
|
||||
@ -338,6 +414,7 @@ async def refresh_card_images(
|
||||
limit: Optional[int] = None,
|
||||
html_cards: bool = False,
|
||||
on_progress: callable = None,
|
||||
api_url: str = DEFAULT_PD_API_URL,
|
||||
) -> dict:
|
||||
"""
|
||||
Refresh card images for a cardset by triggering regeneration.
|
||||
@ -357,7 +434,7 @@ async def refresh_card_images(
|
||||
raise ValueError(f'Cardset "{cardset_name}" not found')
|
||||
cardset = c_query["cardsets"][0]
|
||||
|
||||
CARD_BASE_URL = "https://pd.manticorum.com/api/v2/players"
|
||||
CARD_BASE_URL = f"{api_url}/v2/players"
|
||||
|
||||
# Get all players
|
||||
p_query = await db_get(
|
||||
@ -470,7 +547,10 @@ async def refresh_card_images(
|
||||
|
||||
|
||||
async def check_card_images(
|
||||
cardset_name: str, limit: Optional[int] = None, on_progress: callable = None
|
||||
cardset_name: str,
|
||||
limit: Optional[int] = None,
|
||||
on_progress: callable = None,
|
||||
api_url: str = DEFAULT_PD_API_URL,
|
||||
) -> dict:
|
||||
"""
|
||||
Check and validate card images without uploading.
|
||||
@ -506,7 +586,7 @@ async def check_card_images(
|
||||
now = datetime.datetime.now()
|
||||
timestamp = int(now.timestamp())
|
||||
release_date = f"{now.year}-{now.month}-{now.day}-{timestamp}"
|
||||
PD_API_URL = "https://pd.manticorum.com/api"
|
||||
PD_API_URL = api_url
|
||||
|
||||
errors = []
|
||||
successes = []
|
||||
|
||||
290
scripts/benchmark_render.sh
Executable file
290
scripts/benchmark_render.sh
Executable file
@ -0,0 +1,290 @@
|
||||
#!/bin/bash
|
||||
# =============================================================================
|
||||
# WP-00: Paper Dynasty Card Render & Upload Pipeline Benchmark
|
||||
# Phase 0 - Render Pipeline Optimization
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/benchmark_render.sh # Run full benchmark (dev API)
|
||||
# ./scripts/benchmark_render.sh --prod # Run against production API
|
||||
# ./scripts/benchmark_render.sh --quick # Connectivity check only
|
||||
#
|
||||
# Requirements: curl, bc
|
||||
# =============================================================================
|
||||
|
||||
# --- Configuration -----------------------------------------------------------
|
||||
|
||||
DEV_API="https://pddev.manticorum.com/api"
|
||||
PROD_API="https://pd.manticorum.com/api"
|
||||
API_URL="$DEV_API"
|
||||
|
||||
# Player IDs in the 12000-13000 range (2005 Live cardset)
|
||||
# Mix of batters and pitchers across different teams
|
||||
PLAYER_IDS=(12785 12790 12800 12810 12820 12830 12840 12850 12860 12870)
|
||||
|
||||
RESULTS_FILE="$(dirname "$0")/benchmark_results.txt"
|
||||
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
RUN_LABEL="benchmark-$(date +%s)"
|
||||
|
||||
# --- Argument parsing ---------------------------------------------------------
|
||||
|
||||
QUICK_MODE=false
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--prod) API_URL="$PROD_API" ;;
|
||||
--quick) QUICK_MODE=true ;;
|
||||
--help|-h)
|
||||
echo "Usage: $0 [--prod] [--quick]"
|
||||
echo " --prod Use production API instead of dev"
|
||||
echo " --quick Connectivity check only (1 request)"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# --- Helpers -----------------------------------------------------------------
|
||||
|
||||
hr() { printf '%0.s-' {1..72}; echo; }
|
||||
|
||||
# bc-based float arithmetic
|
||||
fadd() { echo "$1 + $2" | bc -l; }
|
||||
fdiv() { echo "scale=6; $1 / $2" | bc -l; }
|
||||
flt() { echo "$1 < $2" | bc -l; } # returns 1 if true
|
||||
fmt3() { printf "%.3f" "$1"; } # format to 3 decimal places
|
||||
|
||||
# Print and simultaneously append to results file
|
||||
log() { echo "$@" | tee -a "$RESULTS_FILE"; }
|
||||
|
||||
# Single card render with timing; sets LAST_HTTP, LAST_TIME, LAST_SIZE
|
||||
measure_card() {
|
||||
local player_id="$1"
|
||||
local card_type="${2:-batting}"
|
||||
local cache_bust="${RUN_LABEL}-${player_id}"
|
||||
local url="${API_URL}/v2/players/${player_id}/${card_type}card?d=${cache_bust}"
|
||||
|
||||
# -s silent, -o discard body, -w write timing vars separated by |
|
||||
local result
|
||||
result=$(curl -s -o /dev/null \
|
||||
-w "%{http_code}|%{time_total}|%{time_connect}|%{time_starttransfer}|%{size_download}" \
|
||||
--max-time 30 \
|
||||
"$url" 2>&1)
|
||||
|
||||
LAST_HTTP=$(echo "$result" | cut -d'|' -f1)
|
||||
LAST_TIME=$(echo "$result" | cut -d'|' -f2)
|
||||
LAST_CONN=$(echo "$result" | cut -d'|' -f3)
|
||||
LAST_TTFB=$(echo "$result" | cut -d'|' -f4)
|
||||
LAST_SIZE=$(echo "$result" | cut -d'|' -f5)
|
||||
LAST_URL="$url"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# START
|
||||
# =============================================================================
|
||||
|
||||
# Truncate results file for this run and write header
|
||||
cat > "$RESULTS_FILE" << EOF
|
||||
Paper Dynasty Card Render Benchmark
|
||||
Run timestamp : $TIMESTAMP
|
||||
API target : $API_URL
|
||||
Cache-bust tag: $RUN_LABEL
|
||||
EOF
|
||||
echo "" >> "$RESULTS_FILE"
|
||||
|
||||
echo ""
|
||||
log "=============================================================="
|
||||
log " Paper Dynasty Card Render Benchmark - WP-00 / Phase 0"
|
||||
log " $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
log " API: $API_URL"
|
||||
log "=============================================================="
|
||||
echo ""
|
||||
|
||||
# =============================================================================
|
||||
# SECTION 1: Connectivity Check
|
||||
# =============================================================================
|
||||
|
||||
log "--- Section 1: Connectivity Check ---"
|
||||
log ""
|
||||
log "Sending single request to verify API is reachable..."
|
||||
log " Player : 12785 (batting card)"
|
||||
log " URL : ${API_URL}/v2/players/12785/battingcard?d=${RUN_LABEL}-probe"
|
||||
echo ""
|
||||
|
||||
measure_card 12785 batting
|
||||
|
||||
if [ "$LAST_HTTP" = "200" ]; then
|
||||
log " HTTP : $LAST_HTTP OK"
|
||||
log " Total : $(fmt3 $LAST_TIME)s"
|
||||
log " Connect: $(fmt3 $LAST_CONN)s"
|
||||
log " TTFB : $(fmt3 $LAST_TTFB)s"
|
||||
log " Size : ${LAST_SIZE} bytes ($(echo "scale=1; $LAST_SIZE/1024" | bc)KB)"
|
||||
log ""
|
||||
log " Connectivity: PASS"
|
||||
elif [ -z "$LAST_HTTP" ] || [ "$LAST_HTTP" = "000" ]; then
|
||||
log " ERROR: Could not reach $API_URL (no response / timeout)"
|
||||
log " Aborting benchmark."
|
||||
echo ""
|
||||
exit 1
|
||||
else
|
||||
log " HTTP : $LAST_HTTP"
|
||||
log " WARNING: Unexpected status code. Continuing anyway."
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
if [ "$QUICK_MODE" = true ]; then
|
||||
log "Quick mode: exiting after connectivity check."
|
||||
echo ""
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# SECTION 2: Sequential Card Render Benchmark (10 cards)
|
||||
# =============================================================================
|
||||
|
||||
log ""
|
||||
hr
|
||||
log "--- Section 2: Sequential Card Render Benchmark ---"
|
||||
log ""
|
||||
log "Rendering ${#PLAYER_IDS[@]} cards sequentially with fresh cache busts."
|
||||
log "Each request forces a full server-side render (bypasses nginx cache)."
|
||||
log ""
|
||||
log "$(printf '%-8s %-10s %-10s %-10s %-10s %-8s' 'Player' 'HTTP' 'Total(s)' 'TTFB(s)' 'Connect(s)' 'Size(KB)')"
|
||||
log "$(printf '%0.s-' {1..62})"
|
||||
|
||||
# Accumulators
|
||||
total_time="0"
|
||||
min_time=""
|
||||
max_time=""
|
||||
success_count=0
|
||||
fail_count=0
|
||||
all_times=()
|
||||
|
||||
for pid in "${PLAYER_IDS[@]}"; do
|
||||
measure_card "$pid" batting
|
||||
|
||||
size_kb=$(echo "scale=1; $LAST_SIZE/1024" | bc)
|
||||
row=$(printf '%-8s %-10s %-10s %-10s %-10s %-8s' \
|
||||
"$pid" \
|
||||
"$LAST_HTTP" \
|
||||
"$(fmt3 $LAST_TIME)" \
|
||||
"$(fmt3 $LAST_TTFB)" \
|
||||
"$(fmt3 $LAST_CONN)" \
|
||||
"$size_kb")
|
||||
|
||||
if [ "$LAST_HTTP" = "200" ]; then
|
||||
log "$row"
|
||||
total_time=$(fadd "$total_time" "$LAST_TIME")
|
||||
all_times+=("$LAST_TIME")
|
||||
success_count=$((success_count + 1))
|
||||
|
||||
# Track min
|
||||
if [ -z "$min_time" ] || [ "$(flt $LAST_TIME $min_time)" = "1" ]; then
|
||||
min_time="$LAST_TIME"
|
||||
fi
|
||||
# Track max
|
||||
if [ -z "$max_time" ] || [ "$(flt $max_time $LAST_TIME)" = "1" ]; then
|
||||
max_time="$LAST_TIME"
|
||||
fi
|
||||
else
|
||||
log "$row << FAILED"
|
||||
fail_count=$((fail_count + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
log ""
|
||||
log "--- Section 2: Results Summary ---"
|
||||
log ""
|
||||
|
||||
if [ "$success_count" -gt 0 ]; then
|
||||
avg_time=$(fdiv "$total_time" "$success_count")
|
||||
log " Cards requested : ${#PLAYER_IDS[@]}"
|
||||
log " Successful : $success_count"
|
||||
log " Failed : $fail_count"
|
||||
log " Total wall time : $(fmt3 $total_time)s"
|
||||
log " Average per card : $(fmt3 $avg_time)s"
|
||||
log " Minimum : $(fmt3 $min_time)s"
|
||||
log " Maximum : $(fmt3 $max_time)s"
|
||||
log ""
|
||||
|
||||
# Rough throughput estimate (sequential)
|
||||
cards_per_min=$(echo "scale=1; 60 / $avg_time" | bc)
|
||||
log " Sequential throughput: ~${cards_per_min} cards/min"
|
||||
|
||||
# Estimate full cardset at ~500 players * 2 cards each = 1000 renders
|
||||
est_1000=$(echo "scale=0; (1000 * $avg_time) / 1" | bc)
|
||||
log " Est. full cardset (1000 renders, sequential): ~${est_1000}s (~$(echo "scale=1; $est_1000/60" | bc) min)"
|
||||
else
|
||||
log " No successful renders to summarize."
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# SECTION 3: Upload Pipeline Reference
|
||||
# =============================================================================
|
||||
|
||||
echo ""
|
||||
log ""
|
||||
hr
|
||||
log "--- Section 3: Upload Pipeline Benchmark Commands ---"
|
||||
log ""
|
||||
log "The upload pipeline (pd_cards/core/upload.py) fetches rendered PNG cards"
|
||||
log "and uploads them to S3. It uses a persistent aiohttp session with a 6s"
|
||||
log "timeout per card."
|
||||
log ""
|
||||
log "To time a dry-run batch of 20 cards:"
|
||||
log ""
|
||||
log " cd /mnt/NV2/Development/paper-dynasty/card-creation"
|
||||
log " time pd-cards upload s3 --cardset \"2005 Live\" --limit 20 --dry-run"
|
||||
log ""
|
||||
log "To time a real upload batch of 20 cards (writes to S3, updates DB URLs):"
|
||||
log ""
|
||||
log " time pd-cards upload s3 --cardset \"2005 Live\" --limit 20"
|
||||
log ""
|
||||
log "Notes:"
|
||||
log " - dry-run validates card URLs exist without uploading"
|
||||
log " - Remove --limit for full cardset run"
|
||||
log " - Pipeline is currently sequential (one card at a time per session)"
|
||||
log " - Each card: fetch PNG (~2-4s render) + S3 put (~0.1-0.5s) = ~2.5-4.5s/card"
|
||||
log " - Parallelism target (Phase 0 goal): 10-20 concurrent fetches via asyncio"
|
||||
log ""
|
||||
|
||||
# =============================================================================
|
||||
# SECTION 4: Before/After Comparison Template
|
||||
# =============================================================================
|
||||
|
||||
echo ""
|
||||
hr
|
||||
log "--- Section 4: Before/After Comparison Template ---"
|
||||
log ""
|
||||
log "Fill in after optimization work is complete."
|
||||
log ""
|
||||
log " Metric Before After Delta"
|
||||
log " $(printf '%0.s-' {1..64})"
|
||||
|
||||
if [ "$success_count" -gt 0 ]; then
|
||||
log " Avg render time (s) $(fmt3 $avg_time) ___._____ ___._____"
|
||||
log " Min render time (s) $(fmt3 $min_time) ___._____ ___._____"
|
||||
log " Max render time (s) $(fmt3 $max_time) ___._____ ___._____"
|
||||
log " Sequential cards/min ${cards_per_min} ___.___ ___.___"
|
||||
else
|
||||
log " Avg render time (s) (no data) ___._____ ___._____"
|
||||
fi
|
||||
log " Upload batch (20 cards) ___._____s ___._____s ___._____s"
|
||||
log " Upload cards/min ___.___ ___.___ ___.___"
|
||||
log " Full cardset time (est) ___._____min ___._____min ___ min saved"
|
||||
log ""
|
||||
|
||||
# =============================================================================
|
||||
# DONE
|
||||
# =============================================================================
|
||||
|
||||
echo ""
|
||||
hr
|
||||
log "Benchmark complete."
|
||||
log "Results saved to: $RESULTS_FILE"
|
||||
log ""
|
||||
|
||||
# Voice notify
|
||||
curl -s -X POST http://localhost:8888/notify \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "{\"message\":\"Benchmark complete. Average render time $(fmt3 ${avg_time:-0}) seconds per card\"}" \
|
||||
> /dev/null 2>&1 || true
|
||||
93
scripts/benchmark_results.txt
Normal file
93
scripts/benchmark_results.txt
Normal file
@ -0,0 +1,93 @@
|
||||
Paper Dynasty Card Render Benchmark
|
||||
Run timestamp : 2026-03-12 23:40:54
|
||||
API target : https://pddev.manticorum.com/api
|
||||
Cache-bust tag: benchmark-1773376854
|
||||
|
||||
==============================================================
|
||||
Paper Dynasty Card Render Benchmark - WP-00 / Phase 0
|
||||
2026-03-12 23:40:54
|
||||
API: https://pddev.manticorum.com/api
|
||||
==============================================================
|
||||
--- Section 1: Connectivity Check ---
|
||||
|
||||
Sending single request to verify API is reachable...
|
||||
Player : 12785 (batting card)
|
||||
URL : https://pddev.manticorum.com/api/v2/players/12785/battingcard?d=benchmark-1773376854-probe
|
||||
HTTP : 200 OK
|
||||
Total : 1.944s
|
||||
Connect: 0.010s
|
||||
TTFB : 1.933s
|
||||
Size : 192175 bytes (187.6KB)
|
||||
|
||||
Connectivity: PASS
|
||||
|
||||
--- Section 2: Sequential Card Render Benchmark ---
|
||||
|
||||
Rendering 10 cards sequentially with fresh cache busts.
|
||||
Each request forces a full server-side render (bypasses nginx cache).
|
||||
|
||||
Player HTTP Total(s) TTFB(s) Connect(s) Size(KB)
|
||||
--------------------------------------------------------------
|
||||
12785 200 0.056 0.046 0.008 187.6
|
||||
12790 200 1.829 1.815 0.008 202.3
|
||||
12800 200 2.106 2.096 0.008 192.4
|
||||
12810 200 1.755 1.745 0.009 189.8
|
||||
12820 200 2.041 2.030 0.009 193.1
|
||||
12830 200 2.433 2.423 0.009 180.3
|
||||
12840 200 2.518 2.507 0.009 202.3
|
||||
12850 200 2.191 2.174 0.009 187.6
|
||||
12860 200 2.478 2.469 0.009 190.4
|
||||
12870 200 2.913 2.901 0.009 192.8
|
||||
|
||||
--- Section 2: Results Summary ---
|
||||
|
||||
Cards requested : 10
|
||||
Successful : 10
|
||||
Failed : 0
|
||||
Total wall time : 20.321s
|
||||
Average per card : 2.032s
|
||||
Minimum : 0.056s
|
||||
Maximum : 2.913s
|
||||
|
||||
Sequential throughput: ~29.5 cards/min
|
||||
Est. full cardset (1000 renders, sequential): ~2032s (~33.8 min)
|
||||
|
||||
--- Section 3: Upload Pipeline Benchmark Commands ---
|
||||
|
||||
The upload pipeline (pd_cards/core/upload.py) fetches rendered PNG cards
|
||||
and uploads them to S3. It uses a persistent aiohttp session with a 6s
|
||||
timeout per card.
|
||||
|
||||
To time a dry-run batch of 20 cards:
|
||||
|
||||
cd /mnt/NV2/Development/paper-dynasty/card-creation
|
||||
time pd-cards upload s3 --cardset "2005 Live" --limit 20 --dry-run
|
||||
|
||||
To time a real upload batch of 20 cards (writes to S3, updates DB URLs):
|
||||
|
||||
time pd-cards upload s3 --cardset "2005 Live" --limit 20
|
||||
|
||||
Notes:
|
||||
- dry-run validates card URLs exist without uploading
|
||||
- Remove --limit for full cardset run
|
||||
- Pipeline is currently sequential (one card at a time per session)
|
||||
- Each card: fetch PNG (~2-4s render) + S3 put (~0.1-0.5s) = ~2.5-4.5s/card
|
||||
- Parallelism target (Phase 0 goal): 10-20 concurrent fetches via asyncio
|
||||
|
||||
--- Section 4: Before/After Comparison Template ---
|
||||
|
||||
Fill in after optimization work is complete.
|
||||
|
||||
Metric Before After Delta
|
||||
----------------------------------------------------------------
|
||||
Avg render time (s) 2.032 ___._____ ___._____
|
||||
Min render time (s) 0.056 ___._____ ___._____
|
||||
Max render time (s) 2.913 ___._____ ___._____
|
||||
Sequential cards/min 29.5 ___.___ ___.___
|
||||
Upload batch (20 cards) ___._____s ___._____s ___._____s
|
||||
Upload cards/min ___.___ ___.___ ___.___
|
||||
Full cardset time (est) ___._____min ___._____min ___ min saved
|
||||
|
||||
Benchmark complete.
|
||||
Results saved to: scripts/benchmark_results.txt
|
||||
|
||||
Loading…
Reference in New Issue
Block a user