feat: configurable API URL for local high-concurrency card rendering
Allow upload scripts to target a local API server instead of the remote production server, enabling 32x+ concurrency for dramatically faster full-cardset uploads (~30-45s vs ~2-3min for 800 cards). - pd_cards/core/upload.py: add api_url param to upload_cards_to_s3(), refresh_card_images(), and check_card_images() - pd_cards/commands/upload.py: add --api-url CLI option to upload s3 - check_cards_and_upload.py: read PD_API_URL env var with prod fallback - Update CLAUDE.md, CLI reference, and Phase 0 project plan docs Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ed1daa20b0
commit
8bddf31bf6
@ -118,6 +118,9 @@ pd-cards scouting all && pd-cards scouting upload
|
|||||||
pd-cards upload s3 --cardset "2005 Live" --dry-run
|
pd-cards upload s3 --cardset "2005 Live" --dry-run
|
||||||
pd-cards upload s3 --cardset "2005 Live" --limit 10
|
pd-cards upload s3 --cardset "2005 Live" --limit 10
|
||||||
|
|
||||||
|
# High-concurrency local rendering (start API server locally first)
|
||||||
|
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
|
||||||
|
|
||||||
# Check cards without uploading
|
# Check cards without uploading
|
||||||
pd-cards upload check --cardset "2005 Live" --limit 10
|
pd-cards upload check --cardset "2005 Live" --limit 10
|
||||||
|
|
||||||
@ -263,6 +266,7 @@ Before running retrosheet_data.py, verify these configuration settings:
|
|||||||
- `UPDATE_PLAYER_URLS`: Enable/disable updating player records with S3 URLs (careful - modifies database)
|
- `UPDATE_PLAYER_URLS`: Enable/disable updating player records with S3 URLs (careful - modifies database)
|
||||||
- `AWS_BUCKET_NAME`: S3 bucket name (default: 'paper-dynasty')
|
- `AWS_BUCKET_NAME`: S3 bucket name (default: 'paper-dynasty')
|
||||||
- `AWS_REGION`: AWS region (default: 'us-east-1')
|
- `AWS_REGION`: AWS region (default: 'us-east-1')
|
||||||
|
- `PD_API_URL` (env var): Override the API base URL for card rendering (default: `https://pd.manticorum.com/api`). Set to `http://localhost:8000/api` for local rendering.
|
||||||
|
|
||||||
**S3 URL Structure**: `cards/cardset-{cardset_id:03d}/player-{player_id}/{batting|pitching}card.png?d={release_date}`
|
**S3 URL Structure**: `cards/cardset-{cardset_id:03d}/player-{player_id}/{batting|pitching}card.png?d={release_date}`
|
||||||
- Uses zero-padded 3-digit cardset ID for consistent sorting
|
- Uses zero-padded 3-digit cardset ID for consistent sorting
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import datetime
|
import datetime
|
||||||
import functools
|
import functools
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
import boto3
|
import boto3
|
||||||
|
|
||||||
@ -135,10 +136,11 @@ async def main(args):
|
|||||||
timestamp = int(now.timestamp())
|
timestamp = int(now.timestamp())
|
||||||
release_date = f"{now.year}-{now.month}-{now.day}-{timestamp}"
|
release_date = f"{now.year}-{now.month}-{now.day}-{timestamp}"
|
||||||
|
|
||||||
# PD API base URL for card generation
|
# PD API base URL for card generation (override with PD_API_URL env var for local rendering)
|
||||||
PD_API_URL = "https://pd.manticorum.com/api"
|
PD_API_URL = os.environ.get("PD_API_URL", "https://pd.manticorum.com/api")
|
||||||
|
|
||||||
print(f"\nRelease date for cards: {release_date}")
|
print(f"\nRelease date for cards: {release_date}")
|
||||||
|
print(f"API URL: {PD_API_URL}")
|
||||||
print(f"S3 Upload: {'ENABLED' if UPLOAD_TO_S3 else 'DISABLED'}")
|
print(f"S3 Upload: {'ENABLED' if UPLOAD_TO_S3 else 'DISABLED'}")
|
||||||
print(f"URL Update: {'ENABLED' if UPDATE_PLAYER_URLS else 'DISABLED'}")
|
print(f"URL Update: {'ENABLED' if UPDATE_PLAYER_URLS else 'DISABLED'}")
|
||||||
print(f"Concurrency: {CONCURRENCY} parallel tasks\n")
|
print(f"Concurrency: {CONCURRENCY} parallel tasks\n")
|
||||||
|
|||||||
@ -404,17 +404,35 @@ pd-cards upload s3 --cardset <name> [OPTIONS]
|
|||||||
| `--upload/--no-upload` | | `True` | Upload to S3 |
|
| `--upload/--no-upload` | | `True` | Upload to S3 |
|
||||||
| `--update-urls/--no-update-urls` | | `True` | Update player URLs in database |
|
| `--update-urls/--no-update-urls` | | `True` | Update player URLs in database |
|
||||||
| `--dry-run` | `-n` | `False` | Preview without uploading |
|
| `--dry-run` | `-n` | `False` | Preview without uploading |
|
||||||
|
| `--concurrency` | `-j` | `8` | Number of parallel uploads |
|
||||||
|
| `--api-url` | | `https://pd.manticorum.com/api` | API base URL for card rendering |
|
||||||
|
|
||||||
**Prerequisites:** AWS CLI configured with credentials (`~/.aws/credentials`)
|
**Prerequisites:** AWS CLI configured with credentials (`~/.aws/credentials`)
|
||||||
|
|
||||||
**S3 URL Structure:** `cards/cardset-{id:03d}/player-{player_id}/{batting|pitching}card.png?d={date}`
|
**S3 URL Structure:** `cards/cardset-{id:03d}/player-{player_id}/{batting|pitching}card.png?d={date}`
|
||||||
|
|
||||||
|
**Local Rendering:** For high-concurrency local rendering, start the Paper Dynasty API server locally and point uploads at it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Terminal 1: Start local API server (from database repo)
|
||||||
|
cd /mnt/NV2/Development/paper-dynasty/database
|
||||||
|
DATABASE_TYPE=postgresql POSTGRES_HOST=10.10.0.42 POSTGRES_DB=paperdynasty_dev \
|
||||||
|
POSTGRES_USER=sba_admin POSTGRES_PASSWORD=<pw> POSTGRES_PORT=5432 \
|
||||||
|
API_TOKEN=Tp3aO3jhYve5NJF1IqOmJTmk \
|
||||||
|
uvicorn app.main:app --host 0.0.0.0 --port 8000
|
||||||
|
|
||||||
|
# Terminal 2: Upload with local rendering
|
||||||
|
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
|
||||||
|
```
|
||||||
|
|
||||||
**Examples:**
|
**Examples:**
|
||||||
```bash
|
```bash
|
||||||
pd-cards upload s3 --cardset "2005 Live" --dry-run
|
pd-cards upload s3 --cardset "2005 Live" --dry-run
|
||||||
pd-cards upload s3 --cardset "2005 Live" --limit 10
|
pd-cards upload s3 --cardset "2005 Live" --limit 10
|
||||||
pd-cards upload s3 --cardset "2005 Live" --start-id 5000
|
pd-cards upload s3 --cardset "2005 Live" --start-id 5000
|
||||||
pd-cards upload s3 --cardset "2005 Live" --skip-pitchers
|
pd-cards upload s3 --cardset "2005 Live" --skip-pitchers
|
||||||
|
pd-cards upload s3 --cardset "2005 Live" --concurrency 16
|
||||||
|
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
356
docs/prd-evolution/PHASE0_PROJECT_PLAN.md
Normal file
356
docs/prd-evolution/PHASE0_PROJECT_PLAN.md
Normal file
@ -0,0 +1,356 @@
|
|||||||
|
# Phase 0 — Render Pipeline Optimization: Project Plan
|
||||||
|
|
||||||
|
**Version:** 1.1
|
||||||
|
**Date:** 2026-03-13
|
||||||
|
**PRD Reference:** `docs/prd-evolution/02-architecture.md` § Card Render Pipeline Optimization, `13-implementation.md` § Phase 0
|
||||||
|
**Status:** Implemented — deployed to dev, PR #94 open for production
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Phase 0 is independent of Card Evolution and benefits all existing card workflows immediately. The goal is to reduce per-card render time and full cardset uploads significantly by eliminating browser spawn overhead, CDN dependencies, and sequential processing.
|
||||||
|
|
||||||
|
**Bottlenecks addressed:**
|
||||||
|
1. New Chromium process spawned per render request (~1.0-1.5s overhead)
|
||||||
|
2. Google Fonts CDN fetched over network on every render (~0.3-0.5s) — no persistent cache since browser is destroyed after each render
|
||||||
|
3. Upload pipeline is fully sequential — one card at a time, blocking S3 upload via synchronous boto3
|
||||||
|
|
||||||
|
**Results:**
|
||||||
|
|
||||||
|
| Metric | Before | Target | Actual |
|
||||||
|
|--------|--------|--------|--------|
|
||||||
|
| Per-card render (fresh) | ~2.0s (benchmark avg) | <1.0s | **~0.98s avg** (range 0.63-1.44s, **~51% reduction**) |
|
||||||
|
| Per-card render (cached) | N/A | — | **~0.1s** |
|
||||||
|
| External dependencies during render | Google Fonts CDN | None | **None** |
|
||||||
|
| Chromium processes per 800-card run | 800 | 1 | **1** |
|
||||||
|
| 800-card upload (sequential, estimated) | ~27 min | ~8-13 min | ~13 min (estimated at 0.98s/card) |
|
||||||
|
| 800-card upload (concurrent 8x, estimated) | N/A | ~2-4 min | ~2-3 min (estimated) |
|
||||||
|
|
||||||
|
**Benchmark details (7 fresh renders on dev, 2026-03-13):**
|
||||||
|
|
||||||
|
| Player | Type | Time |
|
||||||
|
|--------|------|------|
|
||||||
|
| Michael Young (12726) | Batting | 0.96s |
|
||||||
|
| Darin Erstad (12729) | Batting | 0.78s |
|
||||||
|
| Wilson Valdez (12746) | Batting | 1.44s |
|
||||||
|
| Player 12750 | Batting | 0.76s |
|
||||||
|
| Jarrod Washburn (12880) | Pitching | 0.63s |
|
||||||
|
| Ryan Drese (12879) | Pitching | 1.25s |
|
||||||
|
| Player 12890 | Pitching | 1.07s |
|
||||||
|
|
||||||
|
**Average: 0.98s** — meets the <1s target. Occasional spikes to ~1.4s from Chromium GC pressure. Pitching cards tend to render slightly faster due to less template data.
|
||||||
|
|
||||||
|
**Optimization breakdown:**
|
||||||
|
- Persistent browser (WP-02): eliminated ~1.0s spawn overhead
|
||||||
|
- Variable font deduplication (WP-01 fix): eliminated ~163KB redundant base64 parsing, saved ~0.4s
|
||||||
|
- Remaining ~0.98s is Playwright page creation, HTML parsing, and PNG screenshot — not reducible without GPU acceleration or a different rendering approach
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Work Packages (6 WPs)
|
||||||
|
|
||||||
|
### WP-00: Baseline Benchmarks
|
||||||
|
|
||||||
|
**Repo:** `database` + `card-creation`
|
||||||
|
**Complexity:** XS
|
||||||
|
**Dependencies:** None
|
||||||
|
|
||||||
|
Capture before-metrics so we can measure improvement.
|
||||||
|
|
||||||
|
#### Tasks
|
||||||
|
1. Time 10 sequential card renders via the API (curl with timing)
|
||||||
|
2. Time a small batch S3 upload (e.g., 20 cards) via `pd-cards upload`
|
||||||
|
3. Record results in a benchmark log
|
||||||
|
|
||||||
|
#### Tests
|
||||||
|
- [ ] Benchmark script or documented curl commands exist and are repeatable
|
||||||
|
|
||||||
|
#### Acceptance Criteria
|
||||||
|
1. Baseline numbers recorded for per-card render time
|
||||||
|
2. Baseline numbers recorded for batch upload time
|
||||||
|
3. Methodology is repeatable for post-optimization comparison
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### WP-01: Self-Hosted Fonts
|
||||||
|
|
||||||
|
**Repo:** `database`
|
||||||
|
**Complexity:** S
|
||||||
|
**Dependencies:** None (can run in parallel with WP-02)
|
||||||
|
|
||||||
|
Replace Google Fonts CDN with locally embedded WOFF2 fonts. Eliminates ~0.3-0.5s network round-trip per render and removes external dependency.
|
||||||
|
|
||||||
|
#### Current State
|
||||||
|
- `storage/templates/player_card.html` lines 5-7: `<link>` tags to `fonts.googleapis.com`
|
||||||
|
- `storage/templates/style.html`: References `"Open Sans"` and `"Source Sans 3"` font-families
|
||||||
|
- Two fonts used: Open Sans (300, 400, 700) and Source Sans 3 (400, 700)
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
1. Download WOFF2 files for both fonts (5 files total: Open Sans 300/400/700, Source Sans 3 400/700)
|
||||||
|
2. Base64-encode each WOFF2 file
|
||||||
|
3. Add `@font-face` declarations with base64 data URIs to `style.html`
|
||||||
|
4. Remove the three `<link>` tags from `player_card.html`
|
||||||
|
5. Visual diff: render the same card before/after and verify identical output
|
||||||
|
|
||||||
|
#### Files
|
||||||
|
- **Create:** `database/storage/fonts/` directory with raw WOFF2 files (source archive, not deployed)
|
||||||
|
- **Modify:** `database/storage/templates/style.html` — add `@font-face` declarations
|
||||||
|
- **Modify:** `database/storage/templates/player_card.html` — remove `<link>` tags (lines 5-7)
|
||||||
|
|
||||||
|
#### Tests
|
||||||
|
- [ ] Unit: `style.html` contains no `fonts.googleapis.com` references
|
||||||
|
- [ ] Unit: `player_card.html` contains no `<link>` to external font CDNs
|
||||||
|
- [ ] Unit: `@font-face` declarations present for all 5 font variants
|
||||||
|
- [ ] Visual: rendered card is pixel-identical to pre-change output (manual check)
|
||||||
|
|
||||||
|
#### Acceptance Criteria
|
||||||
|
1. No external network requests during card render
|
||||||
|
2. All 5 font weights render correctly
|
||||||
|
3. Card appearance unchanged
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### WP-02: Persistent Browser Instance
|
||||||
|
|
||||||
|
**Repo:** `database`
|
||||||
|
**Complexity:** M
|
||||||
|
**Dependencies:** None (can run in parallel with WP-01)
|
||||||
|
|
||||||
|
Replace per-request Chromium launch/teardown with a persistent browser that lives for the lifetime of the API process. Eliminates ~1.0-1.5s spawn overhead per render.
|
||||||
|
|
||||||
|
#### Current State
|
||||||
|
- `app/routers_v2/players.py` lines 801-826: `async with async_playwright() as p:` block creates and destroys a browser per request
|
||||||
|
- No browser reuse, no connection pooling
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
1. Add module-level `_browser` and `_playwright` globals to `players.py`
|
||||||
|
2. Implement `get_browser()` — lazy-init with `is_connected()` auto-reconnect
|
||||||
|
3. Implement `shutdown_browser()` — clean teardown for API shutdown
|
||||||
|
4. Replace the `async with async_playwright()` block with page-per-request pattern:
|
||||||
|
```python
|
||||||
|
browser = await get_browser()
|
||||||
|
page = await browser.new_page(viewport={"width": 1280, "height": 720})
|
||||||
|
try:
|
||||||
|
await page.set_content(html_string)
|
||||||
|
await page.screenshot(path=file_path, type="png", clip={...})
|
||||||
|
finally:
|
||||||
|
await page.close()
|
||||||
|
```
|
||||||
|
5. Ensure page is always closed in `finally` block to prevent memory leaks
|
||||||
|
|
||||||
|
#### Files
|
||||||
|
- **Modify:** `database/app/routers_v2/players.py` — persistent browser, page-per-request
|
||||||
|
|
||||||
|
#### Tests
|
||||||
|
- [ ] Unit: `get_browser()` returns a connected browser
|
||||||
|
- [ ] Unit: `get_browser()` returns same instance on second call
|
||||||
|
- [ ] Unit: `get_browser()` relaunches if browser disconnected
|
||||||
|
- [ ] Integration: render 10 cards sequentially, no browser leaks (page count returns to 0 between renders)
|
||||||
|
- [ ] Integration: concurrent renders (4 simultaneous requests) complete without errors
|
||||||
|
- [ ] Integration: `shutdown_browser()` cleanly closes browser and playwright
|
||||||
|
|
||||||
|
#### Acceptance Criteria
|
||||||
|
1. Only 1 Chromium process running regardless of render count
|
||||||
|
2. Page count returns to 0 between renders (no leaks)
|
||||||
|
3. Auto-reconnect works if browser crashes
|
||||||
|
4. ~~Per-card render time drops to ~1.0-1.5s~~ **Actual: ~0.98s avg fresh render (from ~2.0s baseline) — target met**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### WP-03: FastAPI Lifespan Hooks
|
||||||
|
|
||||||
|
**Repo:** `database`
|
||||||
|
**Complexity:** S
|
||||||
|
**Dependencies:** WP-02
|
||||||
|
|
||||||
|
Wire `get_browser()` and `shutdown_browser()` into FastAPI's lifespan so the browser warms up on startup and cleans up on shutdown.
|
||||||
|
|
||||||
|
#### Current State
|
||||||
|
- `app/main.py` line 54: plain `FastAPI(...)` constructor with no lifespan
|
||||||
|
- Only middleware is the DB session handler (lines 97-105)
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
1. Add `@asynccontextmanager` lifespan function that calls `get_browser()` on startup and `shutdown_browser()` on shutdown
|
||||||
|
2. Pass `lifespan=lifespan` to `FastAPI()` constructor
|
||||||
|
3. Verify existing middleware is unaffected
|
||||||
|
|
||||||
|
#### Files
|
||||||
|
- **Modify:** `database/app/main.py` — add lifespan hook, pass to FastAPI constructor
|
||||||
|
- **Modify:** `database/app/routers_v2/players.py` — export `get_browser`/`shutdown_browser` (if not already importable)
|
||||||
|
|
||||||
|
#### Tests
|
||||||
|
- [ ] Integration: browser is connected immediately after API startup (before any render request)
|
||||||
|
- [ ] Integration: browser is closed after API shutdown (no orphan processes)
|
||||||
|
- [ ] Integration: existing DB middleware still functions correctly
|
||||||
|
- [ ] Integration: API health endpoint still responds
|
||||||
|
|
||||||
|
#### Acceptance Criteria
|
||||||
|
1. Browser pre-warmed on startup — first render request has no cold-start penalty
|
||||||
|
2. Clean shutdown — no orphan Chromium processes after API stop
|
||||||
|
3. No regression in existing API behavior
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### WP-04: Concurrent Upload Pipeline
|
||||||
|
|
||||||
|
**Repo:** `card-creation`
|
||||||
|
**Complexity:** M
|
||||||
|
**Dependencies:** WP-02 (persistent browser must be deployed for concurrent renders to work)
|
||||||
|
|
||||||
|
Replace the sequential upload loop with semaphore-bounded `asyncio.gather` for parallel card fetching, rendering, and S3 upload.
|
||||||
|
|
||||||
|
#### Current State
|
||||||
|
- `pd_cards/core/upload.py` `upload_cards_to_s3()` (lines 109-333): sequential `for x in all_players:` loop
|
||||||
|
- `fetch_card_image` timeout hardcoded to 6s (line 28)
|
||||||
|
- `upload_card_to_s3()` uses synchronous `boto3.put_object` — blocks the event loop
|
||||||
|
- Single `aiohttp.ClientSession` is reused (good)
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
1. Wrap per-card processing in an `async def process_card(player)` coroutine
|
||||||
|
2. Add `asyncio.Semaphore(concurrency)` guard (default concurrency=8)
|
||||||
|
3. Replace sequential loop with `asyncio.gather(*[process_card(p) for p in all_players], return_exceptions=True)`
|
||||||
|
4. Offload synchronous `upload_card_to_s3()` to thread pool via `asyncio.get_event_loop().run_in_executor(None, upload_card_to_s3, ...)`
|
||||||
|
5. Increase `fetch_card_image` timeout from 6s to 10s
|
||||||
|
6. Add error handling: individual card failures logged but don't abort the batch
|
||||||
|
7. Add progress reporting: log completion count every N cards (not every start)
|
||||||
|
8. Add `--concurrency` CLI argument to `pd-cards upload` command
|
||||||
|
|
||||||
|
#### Files
|
||||||
|
- **Modify:** `pd_cards/core/upload.py` — concurrent pipeline, timeout increase
|
||||||
|
- **Modify:** `pd_cards/cli/upload.py` (or wherever CLI args are defined) — add `--concurrency` flag
|
||||||
|
|
||||||
|
#### Tests
|
||||||
|
- [ ] Unit: semaphore limits concurrent tasks to specified count
|
||||||
|
- [ ] Unit: individual card failure doesn't abort batch (return_exceptions=True)
|
||||||
|
- [ ] Unit: progress logging fires at correct intervals
|
||||||
|
- [ ] Integration: 20-card concurrent upload completes successfully
|
||||||
|
- [ ] Integration: S3 URLs are correct after concurrent upload
|
||||||
|
- [ ] Integration: `--concurrency 1` behaves like sequential (regression safety)
|
||||||
|
|
||||||
|
#### Acceptance Criteria
|
||||||
|
1. Default concurrency of 8 parallel card processes
|
||||||
|
2. Individual failures logged, don't abort batch
|
||||||
|
3. `fetch_card_image` timeout is 10s
|
||||||
|
4. 800-card upload estimated at ~3-4 minutes with 8x concurrency (with WP-01 + WP-02 deployed)
|
||||||
|
5. `--concurrency` flag available on CLI
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### WP-05: Legacy Upload Script Update
|
||||||
|
|
||||||
|
**Repo:** `card-creation`
|
||||||
|
**Complexity:** S
|
||||||
|
**Dependencies:** WP-04
|
||||||
|
|
||||||
|
Apply the same concurrency pattern to `check_cards_and_upload.py` for users who still use the legacy script.
|
||||||
|
|
||||||
|
#### Current State
|
||||||
|
- `check_cards_and_upload.py` lines 150-293: identical sequential pattern to `pd_cards/core/upload.py`
|
||||||
|
- Module-level boto3 client (line 27)
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
1. Refactor the sequential loop to use `asyncio.gather` + `Semaphore` (same pattern as WP-04)
|
||||||
|
2. Offload synchronous S3 calls to thread pool
|
||||||
|
3. Increase fetch timeout to 10s
|
||||||
|
4. Add progress reporting
|
||||||
|
|
||||||
|
#### Files
|
||||||
|
- **Modify:** `check_cards_and_upload.py`
|
||||||
|
|
||||||
|
#### Tests
|
||||||
|
- [ ] Integration: legacy script uploads 10 cards concurrently without errors
|
||||||
|
- [ ] Integration: S3 URLs match expected format
|
||||||
|
|
||||||
|
#### Acceptance Criteria
|
||||||
|
1. Same concurrency behavior as WP-04
|
||||||
|
2. No regression in existing functionality
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## WP Summary
|
||||||
|
|
||||||
|
| WP | Title | Repo | Size | Dependencies | Tests |
|
||||||
|
|----|-------|------|------|-------------|-------|
|
||||||
|
| WP-00 | Baseline Benchmarks | both | XS | — | 1 |
|
||||||
|
| WP-01 | Self-Hosted Fonts | database | S | — | 4 |
|
||||||
|
| WP-02 | Persistent Browser Instance | database | M | — | 6 |
|
||||||
|
| WP-03 | FastAPI Lifespan Hooks | database | S | WP-02 | 4 |
|
||||||
|
| WP-04 | Concurrent Upload Pipeline | card-creation | M | WP-02 | 6 |
|
||||||
|
| WP-05 | Legacy Upload Script Update | card-creation | S | WP-04 | 2 |
|
||||||
|
|
||||||
|
**Total: 6 WPs, ~23 tests**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Dependency Graph
|
||||||
|
|
||||||
|
```
|
||||||
|
WP-00 (benchmarks)
|
||||||
|
|
|
||||||
|
v
|
||||||
|
WP-01 (fonts) ──────┐
|
||||||
|
├──> WP-03 (lifespan) ──> Deploy to dev ──> WP-04 (concurrent upload)
|
||||||
|
WP-02 (browser) ────┘ |
|
||||||
|
v
|
||||||
|
WP-05 (legacy script)
|
||||||
|
|
|
||||||
|
v
|
||||||
|
Re-run benchmarks
|
||||||
|
```
|
||||||
|
|
||||||
|
**Parallelization:**
|
||||||
|
- WP-00, WP-01, WP-02 can all start immediately in parallel
|
||||||
|
- WP-03 needs WP-02
|
||||||
|
- WP-04 needs WP-02 deployed (persistent browser must be running server-side for concurrent fetches to work)
|
||||||
|
- WP-05 needs WP-04 (reuse the pattern)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Risks
|
||||||
|
|
||||||
|
| Risk | Likelihood | Impact | Mitigation |
|
||||||
|
|------|-----------|--------|------------|
|
||||||
|
| Base64-embedded fonts bloat template HTML | Medium | Low | WOFF2 files are small (~20-40KB each). Total ~150KB base64 added to template. Acceptable since template is loaded once into Playwright, not transmitted to clients. |
|
||||||
|
| Persistent browser memory leak | Medium | Medium | Always close pages in `finally` block. Monitor RSS after sustained renders. Add `is_connected()` check for crash recovery. |
|
||||||
|
| Concurrent renders overload API server | Low | High | Semaphore bounds concurrency. Start at 8, tune based on server RAM (~100MB per page). 8 pages = ~800MB, well within 16GB. |
|
||||||
|
| Synchronous boto3 blocks event loop under concurrency | Medium | Medium | Use `run_in_executor` to offload to thread pool. Consider `aioboto3` if thread pool proves insufficient. |
|
||||||
|
| Visual regression from font change | Low | High | Visual diff test before/after. Render same card with both approaches and compare pixel output. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Open Questions
|
||||||
|
|
||||||
|
None — Phase 0 is straightforward infrastructure optimization with no design decisions pending.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Follow-On: Local High-Concurrency Rendering (2026-03-14)
|
||||||
|
|
||||||
|
After Phase 0 was deployed, a follow-on improvement was implemented: **configurable API URL** for card rendering. This enables running the Paper Dynasty API server locally on the workstation and pointing upload scripts at `localhost` for dramatically higher concurrency.
|
||||||
|
|
||||||
|
### Changes
|
||||||
|
- `pd_cards/core/upload.py` — `upload_cards_to_s3()`, `refresh_card_images()`, `check_card_images()` accept `api_url` parameter (defaults to production)
|
||||||
|
- `pd_cards/commands/upload.py` — `--api-url` CLI option on `upload s3` command
|
||||||
|
- `check_cards_and_upload.py` — `PD_API_URL` env var override (legacy script)
|
||||||
|
|
||||||
|
### Expected Performance
|
||||||
|
|
||||||
|
| Scenario | Per-card | 800 cards |
|
||||||
|
|----------|----------|-----------|
|
||||||
|
| Remote server, 8x concurrency (current) | ~0.98s render + network | ~2-3 min |
|
||||||
|
| Local server, 32x concurrency | ~0.98s render, 32 parallel | ~30-45 sec |
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
```bash
|
||||||
|
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- Phase 0 is a prerequisite for Phase 4 (Animated Cosmetics) which needs the persistent browser for efficient multi-frame APNG capture
|
||||||
|
- The persistent browser also benefits Phase 2/3 variant rendering
|
||||||
|
- GPU acceleration was evaluated and rejected — see PRD `02-architecture.md` § Optimization 4
|
||||||
|
- Consider `aioboto3` as a future enhancement if `run_in_executor` thread pool becomes a bottleneck
|
||||||
@ -44,6 +44,11 @@ def s3(
|
|||||||
concurrency: int = typer.Option(
|
concurrency: int = typer.Option(
|
||||||
8, "--concurrency", "-j", help="Number of parallel uploads (default: 8)"
|
8, "--concurrency", "-j", help="Number of parallel uploads (default: 8)"
|
||||||
),
|
),
|
||||||
|
api_url: str = typer.Option(
|
||||||
|
"https://pd.manticorum.com/api",
|
||||||
|
"--api-url",
|
||||||
|
help="API base URL for card rendering (use http://localhost:8000/api for local server)",
|
||||||
|
),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Upload card images to AWS S3.
|
Upload card images to AWS S3.
|
||||||
@ -51,6 +56,9 @@ def s3(
|
|||||||
Fetches card images from Paper Dynasty API and uploads to S3 bucket.
|
Fetches card images from Paper Dynasty API and uploads to S3 bucket.
|
||||||
Cards are processed concurrently; use --concurrency to tune parallelism.
|
Cards are processed concurrently; use --concurrency to tune parallelism.
|
||||||
|
|
||||||
|
For high-concurrency local rendering, start the API server locally and use:
|
||||||
|
pd-cards upload s3 --cardset "2005 Live" --api-url http://localhost:8000/api --concurrency 32
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
pd-cards upload s3 --cardset "2005 Live" --limit 10
|
pd-cards upload s3 --cardset "2005 Live" --limit 10
|
||||||
pd-cards upload s3 --cardset "2005 Live" --concurrency 16
|
pd-cards upload s3 --cardset "2005 Live" --concurrency 16
|
||||||
@ -71,6 +79,7 @@ def s3(
|
|||||||
console.print("Skipping: Batting cards")
|
console.print("Skipping: Batting cards")
|
||||||
if skip_pitchers:
|
if skip_pitchers:
|
||||||
console.print("Skipping: Pitching cards")
|
console.print("Skipping: Pitching cards")
|
||||||
|
console.print(f"API URL: {api_url}")
|
||||||
console.print(f"Upload to S3: {upload and not dry_run}")
|
console.print(f"Upload to S3: {upload and not dry_run}")
|
||||||
console.print(f"Update URLs: {update_urls and not dry_run}")
|
console.print(f"Update URLs: {update_urls and not dry_run}")
|
||||||
console.print(f"Concurrency: {concurrency} parallel tasks")
|
console.print(f"Concurrency: {concurrency} parallel tasks")
|
||||||
@ -105,6 +114,7 @@ def s3(
|
|||||||
update_urls=update_urls,
|
update_urls=update_urls,
|
||||||
on_progress=progress_callback,
|
on_progress=progress_callback,
|
||||||
concurrency=concurrency,
|
concurrency=concurrency,
|
||||||
|
api_url=api_url,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -107,6 +107,9 @@ def upload_card_to_s3(
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_PD_API_URL = "https://pd.manticorum.com/api"
|
||||||
|
|
||||||
|
|
||||||
async def upload_cards_to_s3(
|
async def upload_cards_to_s3(
|
||||||
cardset_name: str,
|
cardset_name: str,
|
||||||
start_id: Optional[int] = None,
|
start_id: Optional[int] = None,
|
||||||
@ -120,6 +123,7 @@ async def upload_cards_to_s3(
|
|||||||
region: str = DEFAULT_AWS_REGION,
|
region: str = DEFAULT_AWS_REGION,
|
||||||
on_progress: callable = None,
|
on_progress: callable = None,
|
||||||
concurrency: int = 8,
|
concurrency: int = 8,
|
||||||
|
api_url: str = DEFAULT_PD_API_URL,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
Upload card images to S3 for a cardset using concurrent async tasks.
|
Upload card images to S3 for a cardset using concurrent async tasks.
|
||||||
@ -175,8 +179,9 @@ async def upload_cards_to_s3(
|
|||||||
timestamp = int(now.timestamp())
|
timestamp = int(now.timestamp())
|
||||||
release_date = f"{now.year}-{now.month}-{now.day}-{timestamp}"
|
release_date = f"{now.year}-{now.month}-{now.day}-{timestamp}"
|
||||||
|
|
||||||
# PD API base URL for card generation
|
# PD API base URL for card generation (configurable for local rendering)
|
||||||
PD_API_URL = "https://pd.manticorum.com/api"
|
PD_API_URL = api_url
|
||||||
|
logger.info(f"Using API URL: {PD_API_URL}")
|
||||||
|
|
||||||
# Initialize S3 client if uploading (boto3 client is thread-safe for reads;
|
# Initialize S3 client if uploading (boto3 client is thread-safe for reads;
|
||||||
# we will call it from a thread pool so we create it once here)
|
# we will call it from a thread pool so we create it once here)
|
||||||
@ -409,6 +414,7 @@ async def refresh_card_images(
|
|||||||
limit: Optional[int] = None,
|
limit: Optional[int] = None,
|
||||||
html_cards: bool = False,
|
html_cards: bool = False,
|
||||||
on_progress: callable = None,
|
on_progress: callable = None,
|
||||||
|
api_url: str = DEFAULT_PD_API_URL,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
Refresh card images for a cardset by triggering regeneration.
|
Refresh card images for a cardset by triggering regeneration.
|
||||||
@ -428,7 +434,7 @@ async def refresh_card_images(
|
|||||||
raise ValueError(f'Cardset "{cardset_name}" not found')
|
raise ValueError(f'Cardset "{cardset_name}" not found')
|
||||||
cardset = c_query["cardsets"][0]
|
cardset = c_query["cardsets"][0]
|
||||||
|
|
||||||
CARD_BASE_URL = "https://pd.manticorum.com/api/v2/players"
|
CARD_BASE_URL = f"{api_url}/v2/players"
|
||||||
|
|
||||||
# Get all players
|
# Get all players
|
||||||
p_query = await db_get(
|
p_query = await db_get(
|
||||||
@ -541,7 +547,10 @@ async def refresh_card_images(
|
|||||||
|
|
||||||
|
|
||||||
async def check_card_images(
|
async def check_card_images(
|
||||||
cardset_name: str, limit: Optional[int] = None, on_progress: callable = None
|
cardset_name: str,
|
||||||
|
limit: Optional[int] = None,
|
||||||
|
on_progress: callable = None,
|
||||||
|
api_url: str = DEFAULT_PD_API_URL,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
Check and validate card images without uploading.
|
Check and validate card images without uploading.
|
||||||
@ -577,7 +586,7 @@ async def check_card_images(
|
|||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
timestamp = int(now.timestamp())
|
timestamp = int(now.timestamp())
|
||||||
release_date = f"{now.year}-{now.month}-{now.day}-{timestamp}"
|
release_date = f"{now.year}-{now.month}-{now.day}-{timestamp}"
|
||||||
PD_API_URL = "https://pd.manticorum.com/api"
|
PD_API_URL = api_url
|
||||||
|
|
||||||
errors = []
|
errors = []
|
||||||
successes = []
|
successes = []
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user