feat: concurrent upload pipeline and benchmarks (Phase 0)
- Replace sequential upload loop with asyncio.gather + Semaphore(8) (WP-04) - Offload synchronous boto3 S3 calls to thread pool executor - Increase fetch_card_image timeout from 6s to 10s - Add --concurrency/-j CLI flag to pd-cards upload - Add progress reporting every 20 completions - Individual card failures no longer abort batch - Apply same concurrency pattern to legacy check_cards_and_upload.py (WP-05) - Add benchmark script for render pipeline measurements (WP-00) Target: 800-card upload from ~40 min to <5 min (with server-side persistent browser deployed). Refs: #87, #91, #92 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
336014b689
commit
979f3080d5
@ -1,5 +1,6 @@
|
||||
import asyncio
|
||||
import datetime
|
||||
import functools
|
||||
import sys
|
||||
import boto3
|
||||
|
||||
@ -14,6 +15,9 @@ HTML_CARDS = False # boolean to only check and not generate cards
|
||||
SKIP_ARMS = False
|
||||
SKIP_BATS = False
|
||||
|
||||
# Concurrency
|
||||
CONCURRENCY = 8 # number of parallel card-processing tasks
|
||||
|
||||
# AWS Configuration
|
||||
AWS_BUCKET_NAME = "paper-dynasty" # Change to your bucket name
|
||||
AWS_REGION = "us-east-1" # Change to your region
|
||||
@ -23,11 +27,11 @@ UPLOAD_TO_S3 = (
|
||||
)
|
||||
UPDATE_PLAYER_URLS = True # Set to False to skip player URL updates (testing) - STEP 6: Update player URLs
|
||||
|
||||
# Initialize S3 client
|
||||
# Initialize S3 client (module-level; boto3 client is thread-safe for concurrent reads)
|
||||
s3_client = boto3.client("s3", region_name=AWS_REGION) if UPLOAD_TO_S3 else None
|
||||
|
||||
|
||||
async def fetch_card_image(session, card_url: str, timeout: int = 6) -> bytes:
|
||||
async def fetch_card_image(session, card_url: str, timeout: int = 10) -> bytes:
|
||||
"""
|
||||
Fetch card image from URL and return raw bytes.
|
||||
|
||||
@ -134,165 +138,216 @@ async def main(args):
|
||||
# PD API base URL for card generation
|
||||
PD_API_URL = "https://pd.manticorum.com/api"
|
||||
|
||||
print(f"\nRelease date for cards: {release_date}")
|
||||
print(f"S3 Upload: {'ENABLED' if UPLOAD_TO_S3 else 'DISABLED'}")
|
||||
print(f"URL Update: {'ENABLED' if UPDATE_PLAYER_URLS else 'DISABLED'}")
|
||||
print(f"Concurrency: {CONCURRENCY} parallel tasks\n")
|
||||
|
||||
# Build filtered list respecting SKIP_ARMS, SKIP_BATS, START_ID, TEST_COUNT
|
||||
max_count = TEST_COUNT if TEST_COUNT is not None else 9999
|
||||
filtered_players = []
|
||||
for x in all_players:
|
||||
if len(filtered_players) >= max_count:
|
||||
break
|
||||
if "pitching" in x["image"] and SKIP_ARMS:
|
||||
continue
|
||||
if "batting" in x["image"] and SKIP_BATS:
|
||||
continue
|
||||
if START_ID is not None and START_ID > x["player_id"]:
|
||||
continue
|
||||
filtered_players.append(x)
|
||||
|
||||
total = len(filtered_players)
|
||||
logger.info(f"Processing {total} cards with concurrency={CONCURRENCY}")
|
||||
|
||||
# Shared mutable state protected by locks
|
||||
errors = []
|
||||
successes = []
|
||||
uploads = []
|
||||
url_updates = []
|
||||
cxn_error = False
|
||||
count = -1
|
||||
completed = 0
|
||||
progress_lock = asyncio.Lock()
|
||||
results_lock = asyncio.Lock()
|
||||
|
||||
start_time = datetime.datetime.now()
|
||||
loop = asyncio.get_event_loop()
|
||||
semaphore = asyncio.Semaphore(CONCURRENCY)
|
||||
|
||||
print(f"\nRelease date for cards: {release_date}")
|
||||
print(f"S3 Upload: {'ENABLED' if UPLOAD_TO_S3 else 'DISABLED'}")
|
||||
print(f"URL Update: {'ENABLED' if UPDATE_PLAYER_URLS else 'DISABLED'}\n")
|
||||
async def report_progress():
|
||||
"""Increment the completed counter and log/print every 20 completions."""
|
||||
nonlocal completed
|
||||
async with progress_lock:
|
||||
completed += 1
|
||||
if completed % 20 == 0 or completed == total:
|
||||
print(f"Progress: {completed}/{total} cards processed")
|
||||
logger.info(f"Progress: {completed}/{total} cards processed")
|
||||
|
||||
# Create persistent aiohttp session for all card fetches
|
||||
async with aiohttp.ClientSession() as session:
|
||||
for x in all_players:
|
||||
if "pitching" in x["image"] and SKIP_ARMS:
|
||||
pass
|
||||
elif "batting" in x["image"] and SKIP_BATS:
|
||||
pass
|
||||
elif START_ID is not None and START_ID > x["player_id"]:
|
||||
pass
|
||||
elif "sombaseball" in x["image"]:
|
||||
errors.append((x, f"Bad card url: {x['image']}"))
|
||||
async def process_single_card(x: dict) -> None:
|
||||
"""
|
||||
Process one player entry under the semaphore: fetch card image(s), upload
|
||||
to S3 (offloading the synchronous boto3 call to a thread pool), and
|
||||
optionally patch the player record with the new S3 URL.
|
||||
|
||||
Both the primary card (image) and the secondary card for two-way players
|
||||
(image2) are handled. Failures are appended to the shared errors list
|
||||
rather than re-raised so the overall batch continues.
|
||||
"""
|
||||
async with semaphore:
|
||||
player_id = x["player_id"]
|
||||
|
||||
# --- primary card ---
|
||||
if "sombaseball" in x["image"]:
|
||||
async with results_lock:
|
||||
errors.append((x, f"Bad card url: {x['image']}"))
|
||||
await report_progress()
|
||||
return
|
||||
|
||||
card_type = "pitching" if "pitching" in x["image"] else "batting"
|
||||
pd_card_url = (
|
||||
f"{PD_API_URL}/v2/players/{player_id}/{card_type}card?d={release_date}"
|
||||
)
|
||||
|
||||
if HTML_CARDS:
|
||||
card_url = f"{pd_card_url}&html=true"
|
||||
timeout = 2
|
||||
else:
|
||||
count += 1
|
||||
if count % 20 == 0:
|
||||
print(f"Card #{count + 1} being pulled is {x['p_name']}...")
|
||||
elif TEST_COUNT is not None and TEST_COUNT < count:
|
||||
print("Done test run")
|
||||
break
|
||||
card_url = pd_card_url
|
||||
timeout = 10
|
||||
|
||||
# Determine card type from existing image URL
|
||||
card_type = "pitching" if "pitching" in x["image"] else "batting"
|
||||
|
||||
# Generate card URL from PD API (forces fresh generation from database)
|
||||
pd_card_url = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type}card?d={release_date}"
|
||||
|
||||
if HTML_CARDS:
|
||||
card_url = f"{pd_card_url}&html=true"
|
||||
timeout = 2
|
||||
else:
|
||||
card_url = pd_card_url
|
||||
timeout = 6
|
||||
|
||||
try:
|
||||
# Upload to S3 if enabled
|
||||
if UPLOAD_TO_S3 and not HTML_CARDS:
|
||||
# Fetch card image bytes directly
|
||||
image_bytes = await fetch_card_image(
|
||||
session, card_url, timeout=timeout
|
||||
)
|
||||
s3_url = upload_card_to_s3(
|
||||
primary_ok = False
|
||||
try:
|
||||
if UPLOAD_TO_S3 and not HTML_CARDS:
|
||||
image_bytes = await fetch_card_image(
|
||||
session, card_url, timeout=timeout
|
||||
)
|
||||
# boto3 is synchronous — offload to thread pool so the event
|
||||
# loop is not blocked during the S3 PUT
|
||||
s3_url = await loop.run_in_executor(
|
||||
None,
|
||||
functools.partial(
|
||||
upload_card_to_s3,
|
||||
image_bytes,
|
||||
x["player_id"],
|
||||
player_id,
|
||||
card_type,
|
||||
release_date,
|
||||
cardset["id"],
|
||||
)
|
||||
uploads.append((x["player_id"], card_type, s3_url))
|
||||
),
|
||||
)
|
||||
async with results_lock:
|
||||
uploads.append((player_id, card_type, s3_url))
|
||||
|
||||
if UPDATE_PLAYER_URLS:
|
||||
await db_patch(
|
||||
"players",
|
||||
object_id=player_id,
|
||||
params=[("image", s3_url)],
|
||||
)
|
||||
async with results_lock:
|
||||
url_updates.append((player_id, card_type, s3_url))
|
||||
logger.info(f"Updated player {player_id} image URL to S3")
|
||||
else:
|
||||
# Just validate card exists (old behavior)
|
||||
logger.info("calling the card url")
|
||||
await url_get(card_url, timeout=timeout)
|
||||
|
||||
primary_ok = True
|
||||
|
||||
except ConnectionError as e:
|
||||
logger.error(f"Connection error for player {player_id}: {e}")
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except ValueError as e:
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"S3 upload/update failed for player {player_id}: {e}")
|
||||
async with results_lock:
|
||||
errors.append((x, f"S3 error: {e}"))
|
||||
|
||||
if not primary_ok:
|
||||
await report_progress()
|
||||
return
|
||||
|
||||
# --- secondary card (two-way players) ---
|
||||
if x["image2"] is not None:
|
||||
if "sombaseball" in x["image2"]:
|
||||
async with results_lock:
|
||||
errors.append((x, f"Bad card url: {x['image2']}"))
|
||||
await report_progress()
|
||||
return
|
||||
|
||||
card_type2 = "pitching" if "pitching" in x["image2"] else "batting"
|
||||
pd_card_url2 = f"{PD_API_URL}/v2/players/{player_id}/{card_type2}card?d={release_date}"
|
||||
|
||||
if HTML_CARDS:
|
||||
card_url2 = f"{pd_card_url2}&html=true"
|
||||
else:
|
||||
card_url2 = pd_card_url2
|
||||
|
||||
try:
|
||||
if UPLOAD_TO_S3 and not HTML_CARDS:
|
||||
image_bytes2 = await fetch_card_image(
|
||||
session, card_url2, timeout=10
|
||||
)
|
||||
s3_url2 = await loop.run_in_executor(
|
||||
None,
|
||||
functools.partial(
|
||||
upload_card_to_s3,
|
||||
image_bytes2,
|
||||
player_id,
|
||||
card_type2,
|
||||
release_date,
|
||||
cardset["id"],
|
||||
),
|
||||
)
|
||||
async with results_lock:
|
||||
uploads.append((player_id, card_type2, s3_url2))
|
||||
|
||||
# Update player record with new S3 URL
|
||||
if UPDATE_PLAYER_URLS:
|
||||
await db_patch(
|
||||
"players",
|
||||
object_id=x["player_id"],
|
||||
params=[("image", s3_url)],
|
||||
)
|
||||
url_updates.append((x["player_id"], card_type, s3_url))
|
||||
logger.info(
|
||||
f"Updated player {x['player_id']} image URL to S3"
|
||||
params=[("image2", s3_url2)],
|
||||
)
|
||||
async with results_lock:
|
||||
url_updates.append((player_id, card_type2, s3_url2))
|
||||
logger.info(f"Updated player {player_id} image2 URL to S3")
|
||||
else:
|
||||
# Just validate card exists (old behavior)
|
||||
logger.info("calling the card url")
|
||||
resp = await url_get(card_url, timeout=timeout)
|
||||
await url_get(card_url2, timeout=10)
|
||||
|
||||
async with results_lock:
|
||||
successes.append(x)
|
||||
|
||||
except ConnectionError as e:
|
||||
if cxn_error:
|
||||
raise e
|
||||
cxn_error = True
|
||||
errors.append((x, e))
|
||||
logger.error(f"Connection error for player {player_id} image2: {e}")
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except ValueError as e:
|
||||
errors.append((x, e))
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"S3 upload/update failed for player {x['player_id']}: {e}"
|
||||
f"S3 upload/update failed for player {player_id} image2: {e}"
|
||||
)
|
||||
errors.append((x, f"S3 error: {e}"))
|
||||
continue
|
||||
async with results_lock:
|
||||
errors.append((x, f"S3 error (image2): {e}"))
|
||||
|
||||
# Handle image2 (dual-position players)
|
||||
if x["image2"] is not None:
|
||||
# Determine second card type
|
||||
card_type2 = "pitching" if "pitching" in x["image2"] else "batting"
|
||||
|
||||
# Generate card URL from PD API (forces fresh generation from database)
|
||||
pd_card_url2 = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type2}card?d={release_date}"
|
||||
|
||||
if HTML_CARDS:
|
||||
card_url2 = f"{pd_card_url2}&html=true"
|
||||
else:
|
||||
card_url2 = pd_card_url2
|
||||
|
||||
if "sombaseball" in x["image2"]:
|
||||
errors.append((x, f"Bad card url: {x['image2']}"))
|
||||
else:
|
||||
try:
|
||||
if UPLOAD_TO_S3 and not HTML_CARDS:
|
||||
# Fetch second card image bytes directly from PD API
|
||||
image_bytes2 = await fetch_card_image(
|
||||
session, card_url2, timeout=6
|
||||
)
|
||||
s3_url2 = upload_card_to_s3(
|
||||
image_bytes2,
|
||||
x["player_id"],
|
||||
card_type2,
|
||||
release_date,
|
||||
cardset["id"],
|
||||
)
|
||||
uploads.append((x["player_id"], card_type2, s3_url2))
|
||||
|
||||
# Update player record with new S3 URL for image2
|
||||
if UPDATE_PLAYER_URLS:
|
||||
await db_patch(
|
||||
"players",
|
||||
object_id=x["player_id"],
|
||||
params=[("image2", s3_url2)],
|
||||
)
|
||||
url_updates.append(
|
||||
(x["player_id"], card_type2, s3_url2)
|
||||
)
|
||||
logger.info(
|
||||
f"Updated player {x['player_id']} image2 URL to S3"
|
||||
)
|
||||
else:
|
||||
# Just validate card exists (old behavior)
|
||||
resp = await url_get(card_url2, timeout=6)
|
||||
|
||||
successes.append(x)
|
||||
|
||||
except ConnectionError as e:
|
||||
if cxn_error:
|
||||
raise e
|
||||
cxn_error = True
|
||||
errors.append((x, e))
|
||||
|
||||
except ValueError as e:
|
||||
errors.append((x, e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"S3 upload/update failed for player {x['player_id']} image2: {e}"
|
||||
)
|
||||
errors.append((x, f"S3 error (image2): {e}"))
|
||||
else:
|
||||
else:
|
||||
async with results_lock:
|
||||
successes.append(x)
|
||||
|
||||
await report_progress()
|
||||
|
||||
# Create persistent aiohttp session shared across all concurrent tasks
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tasks = [process_single_card(x) for x in filtered_players]
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'=' * 60}")
|
||||
print("SUMMARY")
|
||||
|
||||
@ -5,6 +5,7 @@ Commands for uploading card images to AWS S3.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
@ -40,14 +41,19 @@ def s3(
|
||||
dry_run: bool = typer.Option(
|
||||
False, "--dry-run", "-n", help="Preview without uploading"
|
||||
),
|
||||
concurrency: int = typer.Option(
|
||||
8, "--concurrency", "-j", help="Number of parallel uploads (default: 8)"
|
||||
),
|
||||
):
|
||||
"""
|
||||
Upload card images to AWS S3.
|
||||
|
||||
Fetches card images from Paper Dynasty API and uploads to S3 bucket.
|
||||
Cards are processed concurrently; use --concurrency to tune parallelism.
|
||||
|
||||
Example:
|
||||
pd-cards upload s3 --cardset "2005 Live" --limit 10
|
||||
pd-cards upload s3 --cardset "2005 Live" --concurrency 16
|
||||
"""
|
||||
console.print()
|
||||
console.print("=" * 70)
|
||||
@ -67,6 +73,7 @@ def s3(
|
||||
console.print("Skipping: Pitching cards")
|
||||
console.print(f"Upload to S3: {upload and not dry_run}")
|
||||
console.print(f"Update URLs: {update_urls and not dry_run}")
|
||||
console.print(f"Concurrency: {concurrency} parallel tasks")
|
||||
console.print()
|
||||
|
||||
if dry_run:
|
||||
@ -76,39 +83,52 @@ def s3(
|
||||
raise typer.Exit(0)
|
||||
|
||||
try:
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
import check_cards_and_upload as ccu
|
||||
from pd_cards.core.upload import upload_cards_to_s3
|
||||
|
||||
# Configure the module's globals
|
||||
ccu.CARDSET_NAME = cardset
|
||||
ccu.START_ID = start_id
|
||||
ccu.TEST_COUNT = limit if limit else 9999
|
||||
ccu.HTML_CARDS = html
|
||||
ccu.SKIP_BATS = skip_batters
|
||||
ccu.SKIP_ARMS = skip_pitchers
|
||||
ccu.UPLOAD_TO_S3 = upload
|
||||
ccu.UPDATE_PLAYER_URLS = update_urls
|
||||
|
||||
# Re-initialize S3 client if uploading
|
||||
if upload:
|
||||
import boto3
|
||||
|
||||
ccu.s3_client = boto3.client("s3", region_name=ccu.AWS_REGION)
|
||||
else:
|
||||
ccu.s3_client = None
|
||||
def progress_callback(_count: int, label: str) -> None:
|
||||
console.print(f" Progress: {label}")
|
||||
|
||||
console.print("[bold]Starting S3 upload...[/bold]")
|
||||
console.print()
|
||||
|
||||
asyncio.run(ccu.main([]))
|
||||
result = asyncio.run(
|
||||
upload_cards_to_s3(
|
||||
cardset_name=cardset,
|
||||
start_id=start_id,
|
||||
limit=limit,
|
||||
html_cards=html,
|
||||
skip_batters=skip_batters,
|
||||
skip_pitchers=skip_pitchers,
|
||||
upload=upload,
|
||||
update_urls=update_urls,
|
||||
on_progress=progress_callback,
|
||||
concurrency=concurrency,
|
||||
)
|
||||
)
|
||||
|
||||
success_count = len(result["successes"])
|
||||
error_count = len(result["errors"])
|
||||
upload_count = len(result["uploads"])
|
||||
url_update_count = len(result["url_updates"])
|
||||
|
||||
console.print()
|
||||
console.print("=" * 70)
|
||||
console.print("[bold green]✓ S3 UPLOAD COMPLETE[/bold green]")
|
||||
console.print("=" * 70)
|
||||
console.print(f" Successes: {success_count}")
|
||||
console.print(f" S3 uploads: {upload_count}")
|
||||
console.print(f" URL updates: {url_update_count}")
|
||||
if error_count:
|
||||
console.print(f" [red]Errors: {error_count}[/red]")
|
||||
for player, err in result["errors"][:10]:
|
||||
console.print(
|
||||
f" - player {player.get('player_id', '?')} "
|
||||
f"({player.get('p_name', '?')}): {err}"
|
||||
)
|
||||
if error_count > 10:
|
||||
console.print(f" ... and {error_count - 10} more (see logs)")
|
||||
|
||||
except ImportError as e:
|
||||
console.print(f"[red]Error importing modules: {e}[/red]")
|
||||
|
||||
@ -4,6 +4,7 @@ Card image upload and management core logic.
|
||||
Business logic for uploading card images to AWS S3 and managing card URLs.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import datetime
|
||||
from typing import Optional
|
||||
import urllib.parse
|
||||
@ -25,7 +26,7 @@ def get_s3_base_url(
|
||||
return f"https://{bucket}.s3.{region}.amazonaws.com"
|
||||
|
||||
|
||||
async def fetch_card_image(session, card_url: str, timeout: int = 6) -> bytes:
|
||||
async def fetch_card_image(session, card_url: str, timeout: int = 10) -> bytes:
|
||||
"""
|
||||
Fetch card image from URL and return raw bytes.
|
||||
|
||||
@ -118,9 +119,17 @@ async def upload_cards_to_s3(
|
||||
bucket: str = DEFAULT_AWS_BUCKET,
|
||||
region: str = DEFAULT_AWS_REGION,
|
||||
on_progress: callable = None,
|
||||
concurrency: int = 8,
|
||||
) -> dict:
|
||||
"""
|
||||
Upload card images to S3 for a cardset.
|
||||
Upload card images to S3 for a cardset using concurrent async tasks.
|
||||
|
||||
Cards are fetched and uploaded in parallel, bounded by ``concurrency``
|
||||
semaphore slots. boto3 S3 calls (synchronous) are offloaded to a thread
|
||||
pool via ``loop.run_in_executor`` so they do not block the event loop.
|
||||
|
||||
Individual card failures are collected and do NOT abort the batch;
|
||||
a summary is logged once all tasks complete.
|
||||
|
||||
Args:
|
||||
cardset_name: Name of the cardset to process
|
||||
@ -134,6 +143,7 @@ async def upload_cards_to_s3(
|
||||
bucket: S3 bucket name
|
||||
region: AWS region
|
||||
on_progress: Callback function for progress updates
|
||||
concurrency: Number of parallel card-processing tasks (default 8)
|
||||
|
||||
Returns:
|
||||
Dict with counts of errors, successes, uploads, url_updates
|
||||
@ -168,160 +178,221 @@ async def upload_cards_to_s3(
|
||||
# PD API base URL for card generation
|
||||
PD_API_URL = "https://pd.manticorum.com/api"
|
||||
|
||||
# Initialize S3 client if uploading
|
||||
# Initialize S3 client if uploading (boto3 client is thread-safe for reads;
|
||||
# we will call it from a thread pool so we create it once here)
|
||||
s3_client = boto3.client("s3", region_name=region) if upload else None
|
||||
|
||||
# Build the filtered list of players to process, respecting start_id / limit
|
||||
max_count = limit or 9999
|
||||
filtered_players = []
|
||||
for x in all_players:
|
||||
if len(filtered_players) >= max_count:
|
||||
break
|
||||
if "pitching" in x["image"] and skip_pitchers:
|
||||
continue
|
||||
if "batting" in x["image"] and skip_batters:
|
||||
continue
|
||||
if start_id is not None and start_id > x["player_id"]:
|
||||
continue
|
||||
filtered_players.append(x)
|
||||
|
||||
total = len(filtered_players)
|
||||
logger.info(f"Processing {total} cards with concurrency={concurrency}")
|
||||
|
||||
# Shared mutable state protected by a lock
|
||||
errors = []
|
||||
successes = []
|
||||
uploads = []
|
||||
url_updates = []
|
||||
cxn_error = False
|
||||
count = 0
|
||||
max_count = limit or 9999
|
||||
completed = 0
|
||||
progress_lock = asyncio.Lock()
|
||||
results_lock = asyncio.Lock()
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
for x in all_players:
|
||||
# Apply filters
|
||||
if "pitching" in x["image"] and skip_pitchers:
|
||||
continue
|
||||
if "batting" in x["image"] and skip_batters:
|
||||
continue
|
||||
if start_id is not None and start_id > x["player_id"]:
|
||||
continue
|
||||
loop = asyncio.get_event_loop()
|
||||
semaphore = asyncio.Semaphore(concurrency)
|
||||
|
||||
async def report_progress():
|
||||
"""Increment the completed counter and log every 20 completions."""
|
||||
nonlocal completed
|
||||
async with progress_lock:
|
||||
completed += 1
|
||||
if completed % 20 == 0 or completed == total:
|
||||
logger.info(f"Progress: {completed}/{total} cards processed")
|
||||
if on_progress:
|
||||
on_progress(completed, f"{completed}/{total}")
|
||||
|
||||
async def process_single_card(x: dict) -> None:
|
||||
"""
|
||||
Process one player entry: fetch card image(s), upload to S3, and
|
||||
optionally patch the player record with the new S3 URL.
|
||||
|
||||
Both the primary card (image) and the secondary card for two-way
|
||||
players (image2) are handled here. Errors are appended to the
|
||||
shared ``errors`` list rather than re-raised so the batch continues.
|
||||
"""
|
||||
async with semaphore:
|
||||
player_id = x["player_id"]
|
||||
|
||||
# --- primary card ---
|
||||
if "sombaseball" in x["image"]:
|
||||
errors.append((x, f"Bad card url: {x['image']}"))
|
||||
continue
|
||||
if count >= max_count:
|
||||
break
|
||||
async with results_lock:
|
||||
errors.append((x, f"Bad card url: {x['image']}"))
|
||||
await report_progress()
|
||||
return
|
||||
|
||||
count += 1
|
||||
if on_progress and count % 20 == 0:
|
||||
on_progress(count, x["p_name"])
|
||||
|
||||
# Determine card type from existing image URL
|
||||
card_type = "pitching" if "pitching" in x["image"] else "batting"
|
||||
|
||||
# Generate card URL from PD API (forces fresh generation from database)
|
||||
pd_card_url = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type}card?d={release_date}"
|
||||
pd_card_url = (
|
||||
f"{PD_API_URL}/v2/players/{player_id}/{card_type}card?d={release_date}"
|
||||
)
|
||||
|
||||
if html_cards:
|
||||
card_url = f"{pd_card_url}&html=true"
|
||||
timeout = 2
|
||||
else:
|
||||
card_url = pd_card_url
|
||||
timeout = 6
|
||||
timeout = 10
|
||||
|
||||
primary_ok = False
|
||||
try:
|
||||
if upload and not html_cards:
|
||||
# Fetch card image bytes directly
|
||||
image_bytes = await fetch_card_image(
|
||||
session, card_url, timeout=timeout
|
||||
)
|
||||
s3_url = upload_card_to_s3(
|
||||
# boto3 is synchronous — offload to thread pool
|
||||
s3_url = await loop.run_in_executor(
|
||||
None,
|
||||
upload_card_to_s3,
|
||||
s3_client,
|
||||
image_bytes,
|
||||
x["player_id"],
|
||||
player_id,
|
||||
card_type,
|
||||
release_date,
|
||||
cardset["id"],
|
||||
bucket,
|
||||
region,
|
||||
)
|
||||
uploads.append((x["player_id"], card_type, s3_url))
|
||||
async with results_lock:
|
||||
uploads.append((player_id, card_type, s3_url))
|
||||
|
||||
# Update player record with new S3 URL
|
||||
if update_urls:
|
||||
await db_patch(
|
||||
"players",
|
||||
object_id=x["player_id"],
|
||||
object_id=player_id,
|
||||
params=[("image", s3_url)],
|
||||
)
|
||||
url_updates.append((x["player_id"], card_type, s3_url))
|
||||
logger.info(f"Updated player {x['player_id']} image URL to S3")
|
||||
async with results_lock:
|
||||
url_updates.append((player_id, card_type, s3_url))
|
||||
logger.info(f"Updated player {player_id} image URL to S3")
|
||||
else:
|
||||
# Just validate card exists
|
||||
logger.info(f"Validating card URL: {card_url}")
|
||||
await url_get(card_url, timeout=timeout)
|
||||
|
||||
primary_ok = True
|
||||
|
||||
except ConnectionError as e:
|
||||
if cxn_error:
|
||||
raise e
|
||||
cxn_error = True
|
||||
errors.append((x, e))
|
||||
logger.error(f"Connection error for player {player_id}: {e}")
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except ValueError as e:
|
||||
errors.append((x, e))
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"S3 upload/update failed for player {x['player_id']}: {e}"
|
||||
)
|
||||
errors.append((x, f"S3 error: {e}"))
|
||||
continue
|
||||
logger.error(f"S3 upload/update failed for player {player_id}: {e}")
|
||||
async with results_lock:
|
||||
errors.append((x, f"S3 error: {e}"))
|
||||
|
||||
# Handle image2 (dual-position players)
|
||||
if not primary_ok:
|
||||
await report_progress()
|
||||
return
|
||||
|
||||
# --- secondary card (two-way players) ---
|
||||
if x["image2"] is not None:
|
||||
card_type2 = "pitching" if "pitching" in x["image2"] else "batting"
|
||||
pd_card_url2 = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type2}card?d={release_date}"
|
||||
|
||||
if html_cards:
|
||||
card_url2 = f"{pd_card_url2}&html=true"
|
||||
else:
|
||||
card_url2 = pd_card_url2
|
||||
|
||||
if "sombaseball" in x["image2"]:
|
||||
errors.append((x, f"Bad card url: {x['image2']}"))
|
||||
else:
|
||||
try:
|
||||
if upload and not html_cards:
|
||||
image_bytes2 = await fetch_card_image(
|
||||
session, card_url2, timeout=6
|
||||
)
|
||||
s3_url2 = upload_card_to_s3(
|
||||
s3_client,
|
||||
image_bytes2,
|
||||
x["player_id"],
|
||||
card_type2,
|
||||
release_date,
|
||||
cardset["id"],
|
||||
bucket,
|
||||
region,
|
||||
)
|
||||
uploads.append((x["player_id"], card_type2, s3_url2))
|
||||
async with results_lock:
|
||||
errors.append((x, f"Bad card url: {x['image2']}"))
|
||||
await report_progress()
|
||||
return
|
||||
|
||||
if update_urls:
|
||||
await db_patch(
|
||||
"players",
|
||||
object_id=x["player_id"],
|
||||
params=[("image2", s3_url2)],
|
||||
)
|
||||
url_updates.append(
|
||||
(x["player_id"], card_type2, s3_url2)
|
||||
)
|
||||
logger.info(
|
||||
f"Updated player {x['player_id']} image2 URL to S3"
|
||||
)
|
||||
else:
|
||||
await url_get(card_url2, timeout=6)
|
||||
card_type2 = "pitching" if "pitching" in x["image2"] else "batting"
|
||||
pd_card_url2 = f"{PD_API_URL}/v2/players/{player_id}/{card_type2}card?d={release_date}"
|
||||
card_url2 = f"{pd_card_url2}&html=true" if html_cards else pd_card_url2
|
||||
|
||||
try:
|
||||
if upload and not html_cards:
|
||||
image_bytes2 = await fetch_card_image(
|
||||
session, card_url2, timeout=10
|
||||
)
|
||||
s3_url2 = await loop.run_in_executor(
|
||||
None,
|
||||
upload_card_to_s3,
|
||||
s3_client,
|
||||
image_bytes2,
|
||||
player_id,
|
||||
card_type2,
|
||||
release_date,
|
||||
cardset["id"],
|
||||
bucket,
|
||||
region,
|
||||
)
|
||||
async with results_lock:
|
||||
uploads.append((player_id, card_type2, s3_url2))
|
||||
|
||||
if update_urls:
|
||||
await db_patch(
|
||||
"players",
|
||||
object_id=player_id,
|
||||
params=[("image2", s3_url2)],
|
||||
)
|
||||
async with results_lock:
|
||||
url_updates.append((player_id, card_type2, s3_url2))
|
||||
logger.info(f"Updated player {player_id} image2 URL to S3")
|
||||
else:
|
||||
await url_get(card_url2, timeout=10)
|
||||
|
||||
async with results_lock:
|
||||
successes.append(x)
|
||||
|
||||
except ConnectionError as e:
|
||||
if cxn_error:
|
||||
raise e
|
||||
cxn_error = True
|
||||
except ConnectionError as e:
|
||||
logger.error(f"Connection error for player {player_id} image2: {e}")
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except ValueError as e:
|
||||
except ValueError as e:
|
||||
async with results_lock:
|
||||
errors.append((x, e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"S3 upload/update failed for player {x['player_id']} image2: {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"S3 upload/update failed for player {player_id} image2: {e}"
|
||||
)
|
||||
async with results_lock:
|
||||
errors.append((x, f"S3 error (image2): {e}"))
|
||||
|
||||
else:
|
||||
successes.append(x)
|
||||
async with results_lock:
|
||||
successes.append(x)
|
||||
|
||||
await report_progress()
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tasks = [process_single_card(x) for x in filtered_players]
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Log final summary
|
||||
success_count = len(successes)
|
||||
error_count = len(errors)
|
||||
logger.info(
|
||||
f"Upload complete: {success_count} succeeded, {error_count} failed "
|
||||
f"out of {total} cards"
|
||||
)
|
||||
if error_count:
|
||||
for player, err in errors:
|
||||
logger.warning(
|
||||
f" Failed: player {player.get('player_id', '?')} "
|
||||
f"({player.get('p_name', '?')}): {err}"
|
||||
)
|
||||
|
||||
return {
|
||||
"errors": errors,
|
||||
|
||||
290
scripts/benchmark_render.sh
Executable file
290
scripts/benchmark_render.sh
Executable file
@ -0,0 +1,290 @@
|
||||
#!/bin/bash
|
||||
# =============================================================================
|
||||
# WP-00: Paper Dynasty Card Render & Upload Pipeline Benchmark
|
||||
# Phase 0 - Render Pipeline Optimization
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/benchmark_render.sh # Run full benchmark (dev API)
|
||||
# ./scripts/benchmark_render.sh --prod # Run against production API
|
||||
# ./scripts/benchmark_render.sh --quick # Connectivity check only
|
||||
#
|
||||
# Requirements: curl, bc
|
||||
# =============================================================================
|
||||
|
||||
# --- Configuration -----------------------------------------------------------
|
||||
|
||||
DEV_API="https://pddev.manticorum.com/api"
|
||||
PROD_API="https://pd.manticorum.com/api"
|
||||
API_URL="$DEV_API"
|
||||
|
||||
# Player IDs in the 12000-13000 range (2005 Live cardset)
|
||||
# Mix of batters and pitchers across different teams
|
||||
PLAYER_IDS=(12785 12790 12800 12810 12820 12830 12840 12850 12860 12870)
|
||||
|
||||
RESULTS_FILE="$(dirname "$0")/benchmark_results.txt"
|
||||
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
|
||||
RUN_LABEL="benchmark-$(date +%s)"
|
||||
|
||||
# --- Argument parsing ---------------------------------------------------------
|
||||
|
||||
QUICK_MODE=false
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--prod) API_URL="$PROD_API" ;;
|
||||
--quick) QUICK_MODE=true ;;
|
||||
--help|-h)
|
||||
echo "Usage: $0 [--prod] [--quick]"
|
||||
echo " --prod Use production API instead of dev"
|
||||
echo " --quick Connectivity check only (1 request)"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# --- Helpers -----------------------------------------------------------------
|
||||
|
||||
hr() { printf '%0.s-' {1..72}; echo; }
|
||||
|
||||
# bc-based float arithmetic
|
||||
fadd() { echo "$1 + $2" | bc -l; }
|
||||
fdiv() { echo "scale=6; $1 / $2" | bc -l; }
|
||||
flt() { echo "$1 < $2" | bc -l; } # returns 1 if true
|
||||
fmt3() { printf "%.3f" "$1"; } # format to 3 decimal places
|
||||
|
||||
# Print and simultaneously append to results file
|
||||
log() { echo "$@" | tee -a "$RESULTS_FILE"; }
|
||||
|
||||
# Single card render with timing; sets LAST_HTTP, LAST_TIME, LAST_SIZE
|
||||
measure_card() {
|
||||
local player_id="$1"
|
||||
local card_type="${2:-batting}"
|
||||
local cache_bust="${RUN_LABEL}-${player_id}"
|
||||
local url="${API_URL}/v2/players/${player_id}/${card_type}card?d=${cache_bust}"
|
||||
|
||||
# -s silent, -o discard body, -w write timing vars separated by |
|
||||
local result
|
||||
result=$(curl -s -o /dev/null \
|
||||
-w "%{http_code}|%{time_total}|%{time_connect}|%{time_starttransfer}|%{size_download}" \
|
||||
--max-time 30 \
|
||||
"$url" 2>&1)
|
||||
|
||||
LAST_HTTP=$(echo "$result" | cut -d'|' -f1)
|
||||
LAST_TIME=$(echo "$result" | cut -d'|' -f2)
|
||||
LAST_CONN=$(echo "$result" | cut -d'|' -f3)
|
||||
LAST_TTFB=$(echo "$result" | cut -d'|' -f4)
|
||||
LAST_SIZE=$(echo "$result" | cut -d'|' -f5)
|
||||
LAST_URL="$url"
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# START
|
||||
# =============================================================================
|
||||
|
||||
# Truncate results file for this run and write header
|
||||
cat > "$RESULTS_FILE" << EOF
|
||||
Paper Dynasty Card Render Benchmark
|
||||
Run timestamp : $TIMESTAMP
|
||||
API target : $API_URL
|
||||
Cache-bust tag: $RUN_LABEL
|
||||
EOF
|
||||
echo "" >> "$RESULTS_FILE"
|
||||
|
||||
echo ""
|
||||
log "=============================================================="
|
||||
log " Paper Dynasty Card Render Benchmark - WP-00 / Phase 0"
|
||||
log " $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
log " API: $API_URL"
|
||||
log "=============================================================="
|
||||
echo ""
|
||||
|
||||
# =============================================================================
|
||||
# SECTION 1: Connectivity Check
|
||||
# =============================================================================
|
||||
|
||||
log "--- Section 1: Connectivity Check ---"
|
||||
log ""
|
||||
log "Sending single request to verify API is reachable..."
|
||||
log " Player : 12785 (batting card)"
|
||||
log " URL : ${API_URL}/v2/players/12785/battingcard?d=${RUN_LABEL}-probe"
|
||||
echo ""
|
||||
|
||||
measure_card 12785 batting
|
||||
|
||||
if [ "$LAST_HTTP" = "200" ]; then
|
||||
log " HTTP : $LAST_HTTP OK"
|
||||
log " Total : $(fmt3 $LAST_TIME)s"
|
||||
log " Connect: $(fmt3 $LAST_CONN)s"
|
||||
log " TTFB : $(fmt3 $LAST_TTFB)s"
|
||||
log " Size : ${LAST_SIZE} bytes ($(echo "scale=1; $LAST_SIZE/1024" | bc)KB)"
|
||||
log ""
|
||||
log " Connectivity: PASS"
|
||||
elif [ -z "$LAST_HTTP" ] || [ "$LAST_HTTP" = "000" ]; then
|
||||
log " ERROR: Could not reach $API_URL (no response / timeout)"
|
||||
log " Aborting benchmark."
|
||||
echo ""
|
||||
exit 1
|
||||
else
|
||||
log " HTTP : $LAST_HTTP"
|
||||
log " WARNING: Unexpected status code. Continuing anyway."
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
if [ "$QUICK_MODE" = true ]; then
|
||||
log "Quick mode: exiting after connectivity check."
|
||||
echo ""
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# SECTION 2: Sequential Card Render Benchmark (10 cards)
|
||||
# =============================================================================
|
||||
|
||||
log ""
|
||||
hr
|
||||
log "--- Section 2: Sequential Card Render Benchmark ---"
|
||||
log ""
|
||||
log "Rendering ${#PLAYER_IDS[@]} cards sequentially with fresh cache busts."
|
||||
log "Each request forces a full server-side render (bypasses nginx cache)."
|
||||
log ""
|
||||
log "$(printf '%-8s %-10s %-10s %-10s %-10s %-8s' 'Player' 'HTTP' 'Total(s)' 'TTFB(s)' 'Connect(s)' 'Size(KB)')"
|
||||
log "$(printf '%0.s-' {1..62})"
|
||||
|
||||
# Accumulators
|
||||
total_time="0"
|
||||
min_time=""
|
||||
max_time=""
|
||||
success_count=0
|
||||
fail_count=0
|
||||
all_times=()
|
||||
|
||||
for pid in "${PLAYER_IDS[@]}"; do
|
||||
measure_card "$pid" batting
|
||||
|
||||
size_kb=$(echo "scale=1; $LAST_SIZE/1024" | bc)
|
||||
row=$(printf '%-8s %-10s %-10s %-10s %-10s %-8s' \
|
||||
"$pid" \
|
||||
"$LAST_HTTP" \
|
||||
"$(fmt3 $LAST_TIME)" \
|
||||
"$(fmt3 $LAST_TTFB)" \
|
||||
"$(fmt3 $LAST_CONN)" \
|
||||
"$size_kb")
|
||||
|
||||
if [ "$LAST_HTTP" = "200" ]; then
|
||||
log "$row"
|
||||
total_time=$(fadd "$total_time" "$LAST_TIME")
|
||||
all_times+=("$LAST_TIME")
|
||||
success_count=$((success_count + 1))
|
||||
|
||||
# Track min
|
||||
if [ -z "$min_time" ] || [ "$(flt $LAST_TIME $min_time)" = "1" ]; then
|
||||
min_time="$LAST_TIME"
|
||||
fi
|
||||
# Track max
|
||||
if [ -z "$max_time" ] || [ "$(flt $max_time $LAST_TIME)" = "1" ]; then
|
||||
max_time="$LAST_TIME"
|
||||
fi
|
||||
else
|
||||
log "$row << FAILED"
|
||||
fail_count=$((fail_count + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
log ""
|
||||
log "--- Section 2: Results Summary ---"
|
||||
log ""
|
||||
|
||||
if [ "$success_count" -gt 0 ]; then
|
||||
avg_time=$(fdiv "$total_time" "$success_count")
|
||||
log " Cards requested : ${#PLAYER_IDS[@]}"
|
||||
log " Successful : $success_count"
|
||||
log " Failed : $fail_count"
|
||||
log " Total wall time : $(fmt3 $total_time)s"
|
||||
log " Average per card : $(fmt3 $avg_time)s"
|
||||
log " Minimum : $(fmt3 $min_time)s"
|
||||
log " Maximum : $(fmt3 $max_time)s"
|
||||
log ""
|
||||
|
||||
# Rough throughput estimate (sequential)
|
||||
cards_per_min=$(echo "scale=1; 60 / $avg_time" | bc)
|
||||
log " Sequential throughput: ~${cards_per_min} cards/min"
|
||||
|
||||
# Estimate full cardset at ~500 players * 2 cards each = 1000 renders
|
||||
est_1000=$(echo "scale=0; (1000 * $avg_time) / 1" | bc)
|
||||
log " Est. full cardset (1000 renders, sequential): ~${est_1000}s (~$(echo "scale=1; $est_1000/60" | bc) min)"
|
||||
else
|
||||
log " No successful renders to summarize."
|
||||
fi
|
||||
|
||||
# =============================================================================
|
||||
# SECTION 3: Upload Pipeline Reference
|
||||
# =============================================================================
|
||||
|
||||
echo ""
|
||||
log ""
|
||||
hr
|
||||
log "--- Section 3: Upload Pipeline Benchmark Commands ---"
|
||||
log ""
|
||||
log "The upload pipeline (pd_cards/core/upload.py) fetches rendered PNG cards"
|
||||
log "and uploads them to S3. It uses a persistent aiohttp session with a 6s"
|
||||
log "timeout per card."
|
||||
log ""
|
||||
log "To time a dry-run batch of 20 cards:"
|
||||
log ""
|
||||
log " cd /mnt/NV2/Development/paper-dynasty/card-creation"
|
||||
log " time pd-cards upload s3 --cardset \"2005 Live\" --limit 20 --dry-run"
|
||||
log ""
|
||||
log "To time a real upload batch of 20 cards (writes to S3, updates DB URLs):"
|
||||
log ""
|
||||
log " time pd-cards upload s3 --cardset \"2005 Live\" --limit 20"
|
||||
log ""
|
||||
log "Notes:"
|
||||
log " - dry-run validates card URLs exist without uploading"
|
||||
log " - Remove --limit for full cardset run"
|
||||
log " - Pipeline is currently sequential (one card at a time per session)"
|
||||
log " - Each card: fetch PNG (~2-4s render) + S3 put (~0.1-0.5s) = ~2.5-4.5s/card"
|
||||
log " - Parallelism target (Phase 0 goal): 10-20 concurrent fetches via asyncio"
|
||||
log ""
|
||||
|
||||
# =============================================================================
|
||||
# SECTION 4: Before/After Comparison Template
|
||||
# =============================================================================
|
||||
|
||||
echo ""
|
||||
hr
|
||||
log "--- Section 4: Before/After Comparison Template ---"
|
||||
log ""
|
||||
log "Fill in after optimization work is complete."
|
||||
log ""
|
||||
log " Metric Before After Delta"
|
||||
log " $(printf '%0.s-' {1..64})"
|
||||
|
||||
if [ "$success_count" -gt 0 ]; then
|
||||
log " Avg render time (s) $(fmt3 $avg_time) ___._____ ___._____"
|
||||
log " Min render time (s) $(fmt3 $min_time) ___._____ ___._____"
|
||||
log " Max render time (s) $(fmt3 $max_time) ___._____ ___._____"
|
||||
log " Sequential cards/min ${cards_per_min} ___.___ ___.___"
|
||||
else
|
||||
log " Avg render time (s) (no data) ___._____ ___._____"
|
||||
fi
|
||||
log " Upload batch (20 cards) ___._____s ___._____s ___._____s"
|
||||
log " Upload cards/min ___.___ ___.___ ___.___"
|
||||
log " Full cardset time (est) ___._____min ___._____min ___ min saved"
|
||||
log ""
|
||||
|
||||
# =============================================================================
|
||||
# DONE
|
||||
# =============================================================================
|
||||
|
||||
echo ""
|
||||
hr
|
||||
log "Benchmark complete."
|
||||
log "Results saved to: $RESULTS_FILE"
|
||||
log ""
|
||||
|
||||
# Voice notify
|
||||
curl -s -X POST http://localhost:8888/notify \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "{\"message\":\"Benchmark complete. Average render time $(fmt3 ${avg_time:-0}) seconds per card\"}" \
|
||||
> /dev/null 2>&1 || true
|
||||
93
scripts/benchmark_results.txt
Normal file
93
scripts/benchmark_results.txt
Normal file
@ -0,0 +1,93 @@
|
||||
Paper Dynasty Card Render Benchmark
|
||||
Run timestamp : 2026-03-12 23:40:54
|
||||
API target : https://pddev.manticorum.com/api
|
||||
Cache-bust tag: benchmark-1773376854
|
||||
|
||||
==============================================================
|
||||
Paper Dynasty Card Render Benchmark - WP-00 / Phase 0
|
||||
2026-03-12 23:40:54
|
||||
API: https://pddev.manticorum.com/api
|
||||
==============================================================
|
||||
--- Section 1: Connectivity Check ---
|
||||
|
||||
Sending single request to verify API is reachable...
|
||||
Player : 12785 (batting card)
|
||||
URL : https://pddev.manticorum.com/api/v2/players/12785/battingcard?d=benchmark-1773376854-probe
|
||||
HTTP : 200 OK
|
||||
Total : 1.944s
|
||||
Connect: 0.010s
|
||||
TTFB : 1.933s
|
||||
Size : 192175 bytes (187.6KB)
|
||||
|
||||
Connectivity: PASS
|
||||
|
||||
--- Section 2: Sequential Card Render Benchmark ---
|
||||
|
||||
Rendering 10 cards sequentially with fresh cache busts.
|
||||
Each request forces a full server-side render (bypasses nginx cache).
|
||||
|
||||
Player HTTP Total(s) TTFB(s) Connect(s) Size(KB)
|
||||
--------------------------------------------------------------
|
||||
12785 200 0.056 0.046 0.008 187.6
|
||||
12790 200 1.829 1.815 0.008 202.3
|
||||
12800 200 2.106 2.096 0.008 192.4
|
||||
12810 200 1.755 1.745 0.009 189.8
|
||||
12820 200 2.041 2.030 0.009 193.1
|
||||
12830 200 2.433 2.423 0.009 180.3
|
||||
12840 200 2.518 2.507 0.009 202.3
|
||||
12850 200 2.191 2.174 0.009 187.6
|
||||
12860 200 2.478 2.469 0.009 190.4
|
||||
12870 200 2.913 2.901 0.009 192.8
|
||||
|
||||
--- Section 2: Results Summary ---
|
||||
|
||||
Cards requested : 10
|
||||
Successful : 10
|
||||
Failed : 0
|
||||
Total wall time : 20.321s
|
||||
Average per card : 2.032s
|
||||
Minimum : 0.056s
|
||||
Maximum : 2.913s
|
||||
|
||||
Sequential throughput: ~29.5 cards/min
|
||||
Est. full cardset (1000 renders, sequential): ~2032s (~33.8 min)
|
||||
|
||||
--- Section 3: Upload Pipeline Benchmark Commands ---
|
||||
|
||||
The upload pipeline (pd_cards/core/upload.py) fetches rendered PNG cards
|
||||
and uploads them to S3. It uses a persistent aiohttp session with a 6s
|
||||
timeout per card.
|
||||
|
||||
To time a dry-run batch of 20 cards:
|
||||
|
||||
cd /mnt/NV2/Development/paper-dynasty/card-creation
|
||||
time pd-cards upload s3 --cardset "2005 Live" --limit 20 --dry-run
|
||||
|
||||
To time a real upload batch of 20 cards (writes to S3, updates DB URLs):
|
||||
|
||||
time pd-cards upload s3 --cardset "2005 Live" --limit 20
|
||||
|
||||
Notes:
|
||||
- dry-run validates card URLs exist without uploading
|
||||
- Remove --limit for full cardset run
|
||||
- Pipeline is currently sequential (one card at a time per session)
|
||||
- Each card: fetch PNG (~2-4s render) + S3 put (~0.1-0.5s) = ~2.5-4.5s/card
|
||||
- Parallelism target (Phase 0 goal): 10-20 concurrent fetches via asyncio
|
||||
|
||||
--- Section 4: Before/After Comparison Template ---
|
||||
|
||||
Fill in after optimization work is complete.
|
||||
|
||||
Metric Before After Delta
|
||||
----------------------------------------------------------------
|
||||
Avg render time (s) 2.032 ___._____ ___._____
|
||||
Min render time (s) 0.056 ___._____ ___._____
|
||||
Max render time (s) 2.913 ___._____ ___._____
|
||||
Sequential cards/min 29.5 ___.___ ___.___
|
||||
Upload batch (20 cards) ___._____s ___._____s ___._____s
|
||||
Upload cards/min ___.___ ___.___ ___.___
|
||||
Full cardset time (est) ___._____min ___._____min ___ min saved
|
||||
|
||||
Benchmark complete.
|
||||
Results saved to: scripts/benchmark_results.txt
|
||||
|
||||
Loading…
Reference in New Issue
Block a user