diff --git a/check_cards_and_upload.py b/check_cards_and_upload.py index 905e33c..2135c88 100644 --- a/check_cards_and_upload.py +++ b/check_cards_and_upload.py @@ -1,5 +1,6 @@ import asyncio import datetime +import functools import sys import boto3 @@ -14,6 +15,9 @@ HTML_CARDS = False # boolean to only check and not generate cards SKIP_ARMS = False SKIP_BATS = False +# Concurrency +CONCURRENCY = 8 # number of parallel card-processing tasks + # AWS Configuration AWS_BUCKET_NAME = "paper-dynasty" # Change to your bucket name AWS_REGION = "us-east-1" # Change to your region @@ -23,11 +27,11 @@ UPLOAD_TO_S3 = ( ) UPDATE_PLAYER_URLS = True # Set to False to skip player URL updates (testing) - STEP 6: Update player URLs -# Initialize S3 client +# Initialize S3 client (module-level; boto3 client is thread-safe for concurrent reads) s3_client = boto3.client("s3", region_name=AWS_REGION) if UPLOAD_TO_S3 else None -async def fetch_card_image(session, card_url: str, timeout: int = 6) -> bytes: +async def fetch_card_image(session, card_url: str, timeout: int = 10) -> bytes: """ Fetch card image from URL and return raw bytes. @@ -134,165 +138,216 @@ async def main(args): # PD API base URL for card generation PD_API_URL = "https://pd.manticorum.com/api" + print(f"\nRelease date for cards: {release_date}") + print(f"S3 Upload: {'ENABLED' if UPLOAD_TO_S3 else 'DISABLED'}") + print(f"URL Update: {'ENABLED' if UPDATE_PLAYER_URLS else 'DISABLED'}") + print(f"Concurrency: {CONCURRENCY} parallel tasks\n") + + # Build filtered list respecting SKIP_ARMS, SKIP_BATS, START_ID, TEST_COUNT + max_count = TEST_COUNT if TEST_COUNT is not None else 9999 + filtered_players = [] + for x in all_players: + if len(filtered_players) >= max_count: + break + if "pitching" in x["image"] and SKIP_ARMS: + continue + if "batting" in x["image"] and SKIP_BATS: + continue + if START_ID is not None and START_ID > x["player_id"]: + continue + filtered_players.append(x) + + total = len(filtered_players) + logger.info(f"Processing {total} cards with concurrency={CONCURRENCY}") + + # Shared mutable state protected by locks errors = [] successes = [] uploads = [] url_updates = [] - cxn_error = False - count = -1 + completed = 0 + progress_lock = asyncio.Lock() + results_lock = asyncio.Lock() + start_time = datetime.datetime.now() + loop = asyncio.get_event_loop() + semaphore = asyncio.Semaphore(CONCURRENCY) - print(f"\nRelease date for cards: {release_date}") - print(f"S3 Upload: {'ENABLED' if UPLOAD_TO_S3 else 'DISABLED'}") - print(f"URL Update: {'ENABLED' if UPDATE_PLAYER_URLS else 'DISABLED'}\n") + async def report_progress(): + """Increment the completed counter and log/print every 20 completions.""" + nonlocal completed + async with progress_lock: + completed += 1 + if completed % 20 == 0 or completed == total: + print(f"Progress: {completed}/{total} cards processed") + logger.info(f"Progress: {completed}/{total} cards processed") - # Create persistent aiohttp session for all card fetches - async with aiohttp.ClientSession() as session: - for x in all_players: - if "pitching" in x["image"] and SKIP_ARMS: - pass - elif "batting" in x["image"] and SKIP_BATS: - pass - elif START_ID is not None and START_ID > x["player_id"]: - pass - elif "sombaseball" in x["image"]: - errors.append((x, f"Bad card url: {x['image']}")) + async def process_single_card(x: dict) -> None: + """ + Process one player entry under the semaphore: fetch card image(s), upload + to S3 (offloading the synchronous boto3 call to a thread pool), and + optionally patch the player record with the new S3 URL. + + Both the primary card (image) and the secondary card for two-way players + (image2) are handled. Failures are appended to the shared errors list + rather than re-raised so the overall batch continues. + """ + async with semaphore: + player_id = x["player_id"] + + # --- primary card --- + if "sombaseball" in x["image"]: + async with results_lock: + errors.append((x, f"Bad card url: {x['image']}")) + await report_progress() + return + + card_type = "pitching" if "pitching" in x["image"] else "batting" + pd_card_url = ( + f"{PD_API_URL}/v2/players/{player_id}/{card_type}card?d={release_date}" + ) + + if HTML_CARDS: + card_url = f"{pd_card_url}&html=true" + timeout = 2 else: - count += 1 - if count % 20 == 0: - print(f"Card #{count + 1} being pulled is {x['p_name']}...") - elif TEST_COUNT is not None and TEST_COUNT < count: - print("Done test run") - break + card_url = pd_card_url + timeout = 10 - # Determine card type from existing image URL - card_type = "pitching" if "pitching" in x["image"] else "batting" - - # Generate card URL from PD API (forces fresh generation from database) - pd_card_url = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type}card?d={release_date}" - - if HTML_CARDS: - card_url = f"{pd_card_url}&html=true" - timeout = 2 - else: - card_url = pd_card_url - timeout = 6 - - try: - # Upload to S3 if enabled - if UPLOAD_TO_S3 and not HTML_CARDS: - # Fetch card image bytes directly - image_bytes = await fetch_card_image( - session, card_url, timeout=timeout - ) - s3_url = upload_card_to_s3( + primary_ok = False + try: + if UPLOAD_TO_S3 and not HTML_CARDS: + image_bytes = await fetch_card_image( + session, card_url, timeout=timeout + ) + # boto3 is synchronous — offload to thread pool so the event + # loop is not blocked during the S3 PUT + s3_url = await loop.run_in_executor( + None, + functools.partial( + upload_card_to_s3, image_bytes, - x["player_id"], + player_id, card_type, release_date, cardset["id"], - ) - uploads.append((x["player_id"], card_type, s3_url)) + ), + ) + async with results_lock: + uploads.append((player_id, card_type, s3_url)) + + if UPDATE_PLAYER_URLS: + await db_patch( + "players", + object_id=player_id, + params=[("image", s3_url)], + ) + async with results_lock: + url_updates.append((player_id, card_type, s3_url)) + logger.info(f"Updated player {player_id} image URL to S3") + else: + # Just validate card exists (old behavior) + logger.info("calling the card url") + await url_get(card_url, timeout=timeout) + + primary_ok = True + + except ConnectionError as e: + logger.error(f"Connection error for player {player_id}: {e}") + async with results_lock: + errors.append((x, e)) + + except ValueError as e: + async with results_lock: + errors.append((x, e)) + + except Exception as e: + logger.error(f"S3 upload/update failed for player {player_id}: {e}") + async with results_lock: + errors.append((x, f"S3 error: {e}")) + + if not primary_ok: + await report_progress() + return + + # --- secondary card (two-way players) --- + if x["image2"] is not None: + if "sombaseball" in x["image2"]: + async with results_lock: + errors.append((x, f"Bad card url: {x['image2']}")) + await report_progress() + return + + card_type2 = "pitching" if "pitching" in x["image2"] else "batting" + pd_card_url2 = f"{PD_API_URL}/v2/players/{player_id}/{card_type2}card?d={release_date}" + + if HTML_CARDS: + card_url2 = f"{pd_card_url2}&html=true" + else: + card_url2 = pd_card_url2 + + try: + if UPLOAD_TO_S3 and not HTML_CARDS: + image_bytes2 = await fetch_card_image( + session, card_url2, timeout=10 + ) + s3_url2 = await loop.run_in_executor( + None, + functools.partial( + upload_card_to_s3, + image_bytes2, + player_id, + card_type2, + release_date, + cardset["id"], + ), + ) + async with results_lock: + uploads.append((player_id, card_type2, s3_url2)) - # Update player record with new S3 URL if UPDATE_PLAYER_URLS: await db_patch( "players", object_id=x["player_id"], - params=[("image", s3_url)], - ) - url_updates.append((x["player_id"], card_type, s3_url)) - logger.info( - f"Updated player {x['player_id']} image URL to S3" + params=[("image2", s3_url2)], ) + async with results_lock: + url_updates.append((player_id, card_type2, s3_url2)) + logger.info(f"Updated player {player_id} image2 URL to S3") else: # Just validate card exists (old behavior) - logger.info("calling the card url") - resp = await url_get(card_url, timeout=timeout) + await url_get(card_url2, timeout=10) + + async with results_lock: + successes.append(x) except ConnectionError as e: - if cxn_error: - raise e - cxn_error = True - errors.append((x, e)) + logger.error(f"Connection error for player {player_id} image2: {e}") + async with results_lock: + errors.append((x, e)) except ValueError as e: - errors.append((x, e)) + async with results_lock: + errors.append((x, e)) except Exception as e: logger.error( - f"S3 upload/update failed for player {x['player_id']}: {e}" + f"S3 upload/update failed for player {player_id} image2: {e}" ) - errors.append((x, f"S3 error: {e}")) - continue + async with results_lock: + errors.append((x, f"S3 error (image2): {e}")) - # Handle image2 (dual-position players) - if x["image2"] is not None: - # Determine second card type - card_type2 = "pitching" if "pitching" in x["image2"] else "batting" - - # Generate card URL from PD API (forces fresh generation from database) - pd_card_url2 = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type2}card?d={release_date}" - - if HTML_CARDS: - card_url2 = f"{pd_card_url2}&html=true" - else: - card_url2 = pd_card_url2 - - if "sombaseball" in x["image2"]: - errors.append((x, f"Bad card url: {x['image2']}")) - else: - try: - if UPLOAD_TO_S3 and not HTML_CARDS: - # Fetch second card image bytes directly from PD API - image_bytes2 = await fetch_card_image( - session, card_url2, timeout=6 - ) - s3_url2 = upload_card_to_s3( - image_bytes2, - x["player_id"], - card_type2, - release_date, - cardset["id"], - ) - uploads.append((x["player_id"], card_type2, s3_url2)) - - # Update player record with new S3 URL for image2 - if UPDATE_PLAYER_URLS: - await db_patch( - "players", - object_id=x["player_id"], - params=[("image2", s3_url2)], - ) - url_updates.append( - (x["player_id"], card_type2, s3_url2) - ) - logger.info( - f"Updated player {x['player_id']} image2 URL to S3" - ) - else: - # Just validate card exists (old behavior) - resp = await url_get(card_url2, timeout=6) - - successes.append(x) - - except ConnectionError as e: - if cxn_error: - raise e - cxn_error = True - errors.append((x, e)) - - except ValueError as e: - errors.append((x, e)) - - except Exception as e: - logger.error( - f"S3 upload/update failed for player {x['player_id']} image2: {e}" - ) - errors.append((x, f"S3 error (image2): {e}")) - else: + else: + async with results_lock: successes.append(x) + await report_progress() + + # Create persistent aiohttp session shared across all concurrent tasks + async with aiohttp.ClientSession() as session: + tasks = [process_single_card(x) for x in filtered_players] + await asyncio.gather(*tasks, return_exceptions=True) + # Print summary print(f"\n{'=' * 60}") print("SUMMARY") diff --git a/pd_cards/commands/upload.py b/pd_cards/commands/upload.py index 782f525..2625f1c 100644 --- a/pd_cards/commands/upload.py +++ b/pd_cards/commands/upload.py @@ -5,6 +5,7 @@ Commands for uploading card images to AWS S3. """ import asyncio +import sys from pathlib import Path from typing import Optional @@ -40,14 +41,19 @@ def s3( dry_run: bool = typer.Option( False, "--dry-run", "-n", help="Preview without uploading" ), + concurrency: int = typer.Option( + 8, "--concurrency", "-j", help="Number of parallel uploads (default: 8)" + ), ): """ Upload card images to AWS S3. Fetches card images from Paper Dynasty API and uploads to S3 bucket. + Cards are processed concurrently; use --concurrency to tune parallelism. Example: pd-cards upload s3 --cardset "2005 Live" --limit 10 + pd-cards upload s3 --cardset "2005 Live" --concurrency 16 """ console.print() console.print("=" * 70) @@ -67,6 +73,7 @@ def s3( console.print("Skipping: Pitching cards") console.print(f"Upload to S3: {upload and not dry_run}") console.print(f"Update URLs: {update_urls and not dry_run}") + console.print(f"Concurrency: {concurrency} parallel tasks") console.print() if dry_run: @@ -76,39 +83,52 @@ def s3( raise typer.Exit(0) try: - import sys - sys.path.insert(0, str(Path(__file__).parent.parent.parent)) - import check_cards_and_upload as ccu + from pd_cards.core.upload import upload_cards_to_s3 - # Configure the module's globals - ccu.CARDSET_NAME = cardset - ccu.START_ID = start_id - ccu.TEST_COUNT = limit if limit else 9999 - ccu.HTML_CARDS = html - ccu.SKIP_BATS = skip_batters - ccu.SKIP_ARMS = skip_pitchers - ccu.UPLOAD_TO_S3 = upload - ccu.UPDATE_PLAYER_URLS = update_urls - - # Re-initialize S3 client if uploading - if upload: - import boto3 - - ccu.s3_client = boto3.client("s3", region_name=ccu.AWS_REGION) - else: - ccu.s3_client = None + def progress_callback(_count: int, label: str) -> None: + console.print(f" Progress: {label}") console.print("[bold]Starting S3 upload...[/bold]") console.print() - asyncio.run(ccu.main([])) + result = asyncio.run( + upload_cards_to_s3( + cardset_name=cardset, + start_id=start_id, + limit=limit, + html_cards=html, + skip_batters=skip_batters, + skip_pitchers=skip_pitchers, + upload=upload, + update_urls=update_urls, + on_progress=progress_callback, + concurrency=concurrency, + ) + ) + + success_count = len(result["successes"]) + error_count = len(result["errors"]) + upload_count = len(result["uploads"]) + url_update_count = len(result["url_updates"]) console.print() console.print("=" * 70) console.print("[bold green]✓ S3 UPLOAD COMPLETE[/bold green]") console.print("=" * 70) + console.print(f" Successes: {success_count}") + console.print(f" S3 uploads: {upload_count}") + console.print(f" URL updates: {url_update_count}") + if error_count: + console.print(f" [red]Errors: {error_count}[/red]") + for player, err in result["errors"][:10]: + console.print( + f" - player {player.get('player_id', '?')} " + f"({player.get('p_name', '?')}): {err}" + ) + if error_count > 10: + console.print(f" ... and {error_count - 10} more (see logs)") except ImportError as e: console.print(f"[red]Error importing modules: {e}[/red]") diff --git a/pd_cards/core/upload.py b/pd_cards/core/upload.py index 247d7cd..26da559 100644 --- a/pd_cards/core/upload.py +++ b/pd_cards/core/upload.py @@ -4,6 +4,7 @@ Card image upload and management core logic. Business logic for uploading card images to AWS S3 and managing card URLs. """ +import asyncio import datetime from typing import Optional import urllib.parse @@ -25,7 +26,7 @@ def get_s3_base_url( return f"https://{bucket}.s3.{region}.amazonaws.com" -async def fetch_card_image(session, card_url: str, timeout: int = 6) -> bytes: +async def fetch_card_image(session, card_url: str, timeout: int = 10) -> bytes: """ Fetch card image from URL and return raw bytes. @@ -118,9 +119,17 @@ async def upload_cards_to_s3( bucket: str = DEFAULT_AWS_BUCKET, region: str = DEFAULT_AWS_REGION, on_progress: callable = None, + concurrency: int = 8, ) -> dict: """ - Upload card images to S3 for a cardset. + Upload card images to S3 for a cardset using concurrent async tasks. + + Cards are fetched and uploaded in parallel, bounded by ``concurrency`` + semaphore slots. boto3 S3 calls (synchronous) are offloaded to a thread + pool via ``loop.run_in_executor`` so they do not block the event loop. + + Individual card failures are collected and do NOT abort the batch; + a summary is logged once all tasks complete. Args: cardset_name: Name of the cardset to process @@ -134,6 +143,7 @@ async def upload_cards_to_s3( bucket: S3 bucket name region: AWS region on_progress: Callback function for progress updates + concurrency: Number of parallel card-processing tasks (default 8) Returns: Dict with counts of errors, successes, uploads, url_updates @@ -168,160 +178,221 @@ async def upload_cards_to_s3( # PD API base URL for card generation PD_API_URL = "https://pd.manticorum.com/api" - # Initialize S3 client if uploading + # Initialize S3 client if uploading (boto3 client is thread-safe for reads; + # we will call it from a thread pool so we create it once here) s3_client = boto3.client("s3", region_name=region) if upload else None + # Build the filtered list of players to process, respecting start_id / limit + max_count = limit or 9999 + filtered_players = [] + for x in all_players: + if len(filtered_players) >= max_count: + break + if "pitching" in x["image"] and skip_pitchers: + continue + if "batting" in x["image"] and skip_batters: + continue + if start_id is not None and start_id > x["player_id"]: + continue + filtered_players.append(x) + + total = len(filtered_players) + logger.info(f"Processing {total} cards with concurrency={concurrency}") + + # Shared mutable state protected by a lock errors = [] successes = [] uploads = [] url_updates = [] - cxn_error = False - count = 0 - max_count = limit or 9999 + completed = 0 + progress_lock = asyncio.Lock() + results_lock = asyncio.Lock() - async with aiohttp.ClientSession() as session: - for x in all_players: - # Apply filters - if "pitching" in x["image"] and skip_pitchers: - continue - if "batting" in x["image"] and skip_batters: - continue - if start_id is not None and start_id > x["player_id"]: - continue + loop = asyncio.get_event_loop() + semaphore = asyncio.Semaphore(concurrency) + + async def report_progress(): + """Increment the completed counter and log every 20 completions.""" + nonlocal completed + async with progress_lock: + completed += 1 + if completed % 20 == 0 or completed == total: + logger.info(f"Progress: {completed}/{total} cards processed") + if on_progress: + on_progress(completed, f"{completed}/{total}") + + async def process_single_card(x: dict) -> None: + """ + Process one player entry: fetch card image(s), upload to S3, and + optionally patch the player record with the new S3 URL. + + Both the primary card (image) and the secondary card for two-way + players (image2) are handled here. Errors are appended to the + shared ``errors`` list rather than re-raised so the batch continues. + """ + async with semaphore: + player_id = x["player_id"] + + # --- primary card --- if "sombaseball" in x["image"]: - errors.append((x, f"Bad card url: {x['image']}")) - continue - if count >= max_count: - break + async with results_lock: + errors.append((x, f"Bad card url: {x['image']}")) + await report_progress() + return - count += 1 - if on_progress and count % 20 == 0: - on_progress(count, x["p_name"]) - - # Determine card type from existing image URL card_type = "pitching" if "pitching" in x["image"] else "batting" - - # Generate card URL from PD API (forces fresh generation from database) - pd_card_url = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type}card?d={release_date}" + pd_card_url = ( + f"{PD_API_URL}/v2/players/{player_id}/{card_type}card?d={release_date}" + ) if html_cards: card_url = f"{pd_card_url}&html=true" timeout = 2 else: card_url = pd_card_url - timeout = 6 + timeout = 10 + primary_ok = False try: if upload and not html_cards: - # Fetch card image bytes directly image_bytes = await fetch_card_image( session, card_url, timeout=timeout ) - s3_url = upload_card_to_s3( + # boto3 is synchronous — offload to thread pool + s3_url = await loop.run_in_executor( + None, + upload_card_to_s3, s3_client, image_bytes, - x["player_id"], + player_id, card_type, release_date, cardset["id"], bucket, region, ) - uploads.append((x["player_id"], card_type, s3_url)) + async with results_lock: + uploads.append((player_id, card_type, s3_url)) - # Update player record with new S3 URL if update_urls: await db_patch( "players", - object_id=x["player_id"], + object_id=player_id, params=[("image", s3_url)], ) - url_updates.append((x["player_id"], card_type, s3_url)) - logger.info(f"Updated player {x['player_id']} image URL to S3") + async with results_lock: + url_updates.append((player_id, card_type, s3_url)) + logger.info(f"Updated player {player_id} image URL to S3") else: - # Just validate card exists logger.info(f"Validating card URL: {card_url}") await url_get(card_url, timeout=timeout) + primary_ok = True + except ConnectionError as e: - if cxn_error: - raise e - cxn_error = True - errors.append((x, e)) + logger.error(f"Connection error for player {player_id}: {e}") + async with results_lock: + errors.append((x, e)) except ValueError as e: - errors.append((x, e)) + async with results_lock: + errors.append((x, e)) except Exception as e: - logger.error( - f"S3 upload/update failed for player {x['player_id']}: {e}" - ) - errors.append((x, f"S3 error: {e}")) - continue + logger.error(f"S3 upload/update failed for player {player_id}: {e}") + async with results_lock: + errors.append((x, f"S3 error: {e}")) - # Handle image2 (dual-position players) + if not primary_ok: + await report_progress() + return + + # --- secondary card (two-way players) --- if x["image2"] is not None: - card_type2 = "pitching" if "pitching" in x["image2"] else "batting" - pd_card_url2 = f"{PD_API_URL}/v2/players/{x['player_id']}/{card_type2}card?d={release_date}" - - if html_cards: - card_url2 = f"{pd_card_url2}&html=true" - else: - card_url2 = pd_card_url2 - if "sombaseball" in x["image2"]: - errors.append((x, f"Bad card url: {x['image2']}")) - else: - try: - if upload and not html_cards: - image_bytes2 = await fetch_card_image( - session, card_url2, timeout=6 - ) - s3_url2 = upload_card_to_s3( - s3_client, - image_bytes2, - x["player_id"], - card_type2, - release_date, - cardset["id"], - bucket, - region, - ) - uploads.append((x["player_id"], card_type2, s3_url2)) + async with results_lock: + errors.append((x, f"Bad card url: {x['image2']}")) + await report_progress() + return - if update_urls: - await db_patch( - "players", - object_id=x["player_id"], - params=[("image2", s3_url2)], - ) - url_updates.append( - (x["player_id"], card_type2, s3_url2) - ) - logger.info( - f"Updated player {x['player_id']} image2 URL to S3" - ) - else: - await url_get(card_url2, timeout=6) + card_type2 = "pitching" if "pitching" in x["image2"] else "batting" + pd_card_url2 = f"{PD_API_URL}/v2/players/{player_id}/{card_type2}card?d={release_date}" + card_url2 = f"{pd_card_url2}&html=true" if html_cards else pd_card_url2 + try: + if upload and not html_cards: + image_bytes2 = await fetch_card_image( + session, card_url2, timeout=10 + ) + s3_url2 = await loop.run_in_executor( + None, + upload_card_to_s3, + s3_client, + image_bytes2, + player_id, + card_type2, + release_date, + cardset["id"], + bucket, + region, + ) + async with results_lock: + uploads.append((player_id, card_type2, s3_url2)) + + if update_urls: + await db_patch( + "players", + object_id=player_id, + params=[("image2", s3_url2)], + ) + async with results_lock: + url_updates.append((player_id, card_type2, s3_url2)) + logger.info(f"Updated player {player_id} image2 URL to S3") + else: + await url_get(card_url2, timeout=10) + + async with results_lock: successes.append(x) - except ConnectionError as e: - if cxn_error: - raise e - cxn_error = True + except ConnectionError as e: + logger.error(f"Connection error for player {player_id} image2: {e}") + async with results_lock: errors.append((x, e)) - except ValueError as e: + except ValueError as e: + async with results_lock: errors.append((x, e)) - except Exception as e: - logger.error( - f"S3 upload/update failed for player {x['player_id']} image2: {e}" - ) + except Exception as e: + logger.error( + f"S3 upload/update failed for player {player_id} image2: {e}" + ) + async with results_lock: errors.append((x, f"S3 error (image2): {e}")) + else: - successes.append(x) + async with results_lock: + successes.append(x) + + await report_progress() + + async with aiohttp.ClientSession() as session: + tasks = [process_single_card(x) for x in filtered_players] + await asyncio.gather(*tasks, return_exceptions=True) + + # Log final summary + success_count = len(successes) + error_count = len(errors) + logger.info( + f"Upload complete: {success_count} succeeded, {error_count} failed " + f"out of {total} cards" + ) + if error_count: + for player, err in errors: + logger.warning( + f" Failed: player {player.get('player_id', '?')} " + f"({player.get('p_name', '?')}): {err}" + ) return { "errors": errors, diff --git a/scripts/benchmark_render.sh b/scripts/benchmark_render.sh new file mode 100755 index 0000000..6048146 --- /dev/null +++ b/scripts/benchmark_render.sh @@ -0,0 +1,290 @@ +#!/bin/bash +# ============================================================================= +# WP-00: Paper Dynasty Card Render & Upload Pipeline Benchmark +# Phase 0 - Render Pipeline Optimization +# +# Usage: +# ./scripts/benchmark_render.sh # Run full benchmark (dev API) +# ./scripts/benchmark_render.sh --prod # Run against production API +# ./scripts/benchmark_render.sh --quick # Connectivity check only +# +# Requirements: curl, bc +# ============================================================================= + +# --- Configuration ----------------------------------------------------------- + +DEV_API="https://pddev.manticorum.com/api" +PROD_API="https://pd.manticorum.com/api" +API_URL="$DEV_API" + +# Player IDs in the 12000-13000 range (2005 Live cardset) +# Mix of batters and pitchers across different teams +PLAYER_IDS=(12785 12790 12800 12810 12820 12830 12840 12850 12860 12870) + +RESULTS_FILE="$(dirname "$0")/benchmark_results.txt" +TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S') +RUN_LABEL="benchmark-$(date +%s)" + +# --- Argument parsing --------------------------------------------------------- + +QUICK_MODE=false +for arg in "$@"; do + case "$arg" in + --prod) API_URL="$PROD_API" ;; + --quick) QUICK_MODE=true ;; + --help|-h) + echo "Usage: $0 [--prod] [--quick]" + echo " --prod Use production API instead of dev" + echo " --quick Connectivity check only (1 request)" + exit 0 + ;; + esac +done + +# --- Helpers ----------------------------------------------------------------- + +hr() { printf '%0.s-' {1..72}; echo; } + +# bc-based float arithmetic +fadd() { echo "$1 + $2" | bc -l; } +fdiv() { echo "scale=6; $1 / $2" | bc -l; } +flt() { echo "$1 < $2" | bc -l; } # returns 1 if true +fmt3() { printf "%.3f" "$1"; } # format to 3 decimal places + +# Print and simultaneously append to results file +log() { echo "$@" | tee -a "$RESULTS_FILE"; } + +# Single card render with timing; sets LAST_HTTP, LAST_TIME, LAST_SIZE +measure_card() { + local player_id="$1" + local card_type="${2:-batting}" + local cache_bust="${RUN_LABEL}-${player_id}" + local url="${API_URL}/v2/players/${player_id}/${card_type}card?d=${cache_bust}" + + # -s silent, -o discard body, -w write timing vars separated by | + local result + result=$(curl -s -o /dev/null \ + -w "%{http_code}|%{time_total}|%{time_connect}|%{time_starttransfer}|%{size_download}" \ + --max-time 30 \ + "$url" 2>&1) + + LAST_HTTP=$(echo "$result" | cut -d'|' -f1) + LAST_TIME=$(echo "$result" | cut -d'|' -f2) + LAST_CONN=$(echo "$result" | cut -d'|' -f3) + LAST_TTFB=$(echo "$result" | cut -d'|' -f4) + LAST_SIZE=$(echo "$result" | cut -d'|' -f5) + LAST_URL="$url" +} + +# ============================================================================= +# START +# ============================================================================= + +# Truncate results file for this run and write header +cat > "$RESULTS_FILE" << EOF +Paper Dynasty Card Render Benchmark +Run timestamp : $TIMESTAMP +API target : $API_URL +Cache-bust tag: $RUN_LABEL +EOF +echo "" >> "$RESULTS_FILE" + +echo "" +log "==============================================================" +log " Paper Dynasty Card Render Benchmark - WP-00 / Phase 0" +log " $(date '+%Y-%m-%d %H:%M:%S')" +log " API: $API_URL" +log "==============================================================" +echo "" + +# ============================================================================= +# SECTION 1: Connectivity Check +# ============================================================================= + +log "--- Section 1: Connectivity Check ---" +log "" +log "Sending single request to verify API is reachable..." +log " Player : 12785 (batting card)" +log " URL : ${API_URL}/v2/players/12785/battingcard?d=${RUN_LABEL}-probe" +echo "" + +measure_card 12785 batting + +if [ "$LAST_HTTP" = "200" ]; then + log " HTTP : $LAST_HTTP OK" + log " Total : $(fmt3 $LAST_TIME)s" + log " Connect: $(fmt3 $LAST_CONN)s" + log " TTFB : $(fmt3 $LAST_TTFB)s" + log " Size : ${LAST_SIZE} bytes ($(echo "scale=1; $LAST_SIZE/1024" | bc)KB)" + log "" + log " Connectivity: PASS" +elif [ -z "$LAST_HTTP" ] || [ "$LAST_HTTP" = "000" ]; then + log " ERROR: Could not reach $API_URL (no response / timeout)" + log " Aborting benchmark." + echo "" + exit 1 +else + log " HTTP : $LAST_HTTP" + log " WARNING: Unexpected status code. Continuing anyway." +fi + +echo "" + +if [ "$QUICK_MODE" = true ]; then + log "Quick mode: exiting after connectivity check." + echo "" + exit 0 +fi + +# ============================================================================= +# SECTION 2: Sequential Card Render Benchmark (10 cards) +# ============================================================================= + +log "" +hr +log "--- Section 2: Sequential Card Render Benchmark ---" +log "" +log "Rendering ${#PLAYER_IDS[@]} cards sequentially with fresh cache busts." +log "Each request forces a full server-side render (bypasses nginx cache)." +log "" +log "$(printf '%-8s %-10s %-10s %-10s %-10s %-8s' 'Player' 'HTTP' 'Total(s)' 'TTFB(s)' 'Connect(s)' 'Size(KB)')" +log "$(printf '%0.s-' {1..62})" + +# Accumulators +total_time="0" +min_time="" +max_time="" +success_count=0 +fail_count=0 +all_times=() + +for pid in "${PLAYER_IDS[@]}"; do + measure_card "$pid" batting + + size_kb=$(echo "scale=1; $LAST_SIZE/1024" | bc) + row=$(printf '%-8s %-10s %-10s %-10s %-10s %-8s' \ + "$pid" \ + "$LAST_HTTP" \ + "$(fmt3 $LAST_TIME)" \ + "$(fmt3 $LAST_TTFB)" \ + "$(fmt3 $LAST_CONN)" \ + "$size_kb") + + if [ "$LAST_HTTP" = "200" ]; then + log "$row" + total_time=$(fadd "$total_time" "$LAST_TIME") + all_times+=("$LAST_TIME") + success_count=$((success_count + 1)) + + # Track min + if [ -z "$min_time" ] || [ "$(flt $LAST_TIME $min_time)" = "1" ]; then + min_time="$LAST_TIME" + fi + # Track max + if [ -z "$max_time" ] || [ "$(flt $max_time $LAST_TIME)" = "1" ]; then + max_time="$LAST_TIME" + fi + else + log "$row << FAILED" + fail_count=$((fail_count + 1)) + fi +done + +echo "" +log "" +log "--- Section 2: Results Summary ---" +log "" + +if [ "$success_count" -gt 0 ]; then + avg_time=$(fdiv "$total_time" "$success_count") + log " Cards requested : ${#PLAYER_IDS[@]}" + log " Successful : $success_count" + log " Failed : $fail_count" + log " Total wall time : $(fmt3 $total_time)s" + log " Average per card : $(fmt3 $avg_time)s" + log " Minimum : $(fmt3 $min_time)s" + log " Maximum : $(fmt3 $max_time)s" + log "" + + # Rough throughput estimate (sequential) + cards_per_min=$(echo "scale=1; 60 / $avg_time" | bc) + log " Sequential throughput: ~${cards_per_min} cards/min" + + # Estimate full cardset at ~500 players * 2 cards each = 1000 renders + est_1000=$(echo "scale=0; (1000 * $avg_time) / 1" | bc) + log " Est. full cardset (1000 renders, sequential): ~${est_1000}s (~$(echo "scale=1; $est_1000/60" | bc) min)" +else + log " No successful renders to summarize." +fi + +# ============================================================================= +# SECTION 3: Upload Pipeline Reference +# ============================================================================= + +echo "" +log "" +hr +log "--- Section 3: Upload Pipeline Benchmark Commands ---" +log "" +log "The upload pipeline (pd_cards/core/upload.py) fetches rendered PNG cards" +log "and uploads them to S3. It uses a persistent aiohttp session with a 6s" +log "timeout per card." +log "" +log "To time a dry-run batch of 20 cards:" +log "" +log " cd /mnt/NV2/Development/paper-dynasty/card-creation" +log " time pd-cards upload s3 --cardset \"2005 Live\" --limit 20 --dry-run" +log "" +log "To time a real upload batch of 20 cards (writes to S3, updates DB URLs):" +log "" +log " time pd-cards upload s3 --cardset \"2005 Live\" --limit 20" +log "" +log "Notes:" +log " - dry-run validates card URLs exist without uploading" +log " - Remove --limit for full cardset run" +log " - Pipeline is currently sequential (one card at a time per session)" +log " - Each card: fetch PNG (~2-4s render) + S3 put (~0.1-0.5s) = ~2.5-4.5s/card" +log " - Parallelism target (Phase 0 goal): 10-20 concurrent fetches via asyncio" +log "" + +# ============================================================================= +# SECTION 4: Before/After Comparison Template +# ============================================================================= + +echo "" +hr +log "--- Section 4: Before/After Comparison Template ---" +log "" +log "Fill in after optimization work is complete." +log "" +log " Metric Before After Delta" +log " $(printf '%0.s-' {1..64})" + +if [ "$success_count" -gt 0 ]; then + log " Avg render time (s) $(fmt3 $avg_time) ___._____ ___._____" + log " Min render time (s) $(fmt3 $min_time) ___._____ ___._____" + log " Max render time (s) $(fmt3 $max_time) ___._____ ___._____" + log " Sequential cards/min ${cards_per_min} ___.___ ___.___" +else + log " Avg render time (s) (no data) ___._____ ___._____" +fi +log " Upload batch (20 cards) ___._____s ___._____s ___._____s" +log " Upload cards/min ___.___ ___.___ ___.___" +log " Full cardset time (est) ___._____min ___._____min ___ min saved" +log "" + +# ============================================================================= +# DONE +# ============================================================================= + +echo "" +hr +log "Benchmark complete." +log "Results saved to: $RESULTS_FILE" +log "" + +# Voice notify +curl -s -X POST http://localhost:8888/notify \ + -H 'Content-Type: application/json' \ + -d "{\"message\":\"Benchmark complete. Average render time $(fmt3 ${avg_time:-0}) seconds per card\"}" \ + > /dev/null 2>&1 || true diff --git a/scripts/benchmark_results.txt b/scripts/benchmark_results.txt new file mode 100644 index 0000000..69d66aa --- /dev/null +++ b/scripts/benchmark_results.txt @@ -0,0 +1,93 @@ +Paper Dynasty Card Render Benchmark +Run timestamp : 2026-03-12 23:40:54 +API target : https://pddev.manticorum.com/api +Cache-bust tag: benchmark-1773376854 + +============================================================== + Paper Dynasty Card Render Benchmark - WP-00 / Phase 0 + 2026-03-12 23:40:54 + API: https://pddev.manticorum.com/api +============================================================== +--- Section 1: Connectivity Check --- + +Sending single request to verify API is reachable... + Player : 12785 (batting card) + URL : https://pddev.manticorum.com/api/v2/players/12785/battingcard?d=benchmark-1773376854-probe + HTTP : 200 OK + Total : 1.944s + Connect: 0.010s + TTFB : 1.933s + Size : 192175 bytes (187.6KB) + + Connectivity: PASS + +--- Section 2: Sequential Card Render Benchmark --- + +Rendering 10 cards sequentially with fresh cache busts. +Each request forces a full server-side render (bypasses nginx cache). + +Player HTTP Total(s) TTFB(s) Connect(s) Size(KB) +-------------------------------------------------------------- +12785 200 0.056 0.046 0.008 187.6 +12790 200 1.829 1.815 0.008 202.3 +12800 200 2.106 2.096 0.008 192.4 +12810 200 1.755 1.745 0.009 189.8 +12820 200 2.041 2.030 0.009 193.1 +12830 200 2.433 2.423 0.009 180.3 +12840 200 2.518 2.507 0.009 202.3 +12850 200 2.191 2.174 0.009 187.6 +12860 200 2.478 2.469 0.009 190.4 +12870 200 2.913 2.901 0.009 192.8 + +--- Section 2: Results Summary --- + + Cards requested : 10 + Successful : 10 + Failed : 0 + Total wall time : 20.321s + Average per card : 2.032s + Minimum : 0.056s + Maximum : 2.913s + + Sequential throughput: ~29.5 cards/min + Est. full cardset (1000 renders, sequential): ~2032s (~33.8 min) + +--- Section 3: Upload Pipeline Benchmark Commands --- + +The upload pipeline (pd_cards/core/upload.py) fetches rendered PNG cards +and uploads them to S3. It uses a persistent aiohttp session with a 6s +timeout per card. + +To time a dry-run batch of 20 cards: + + cd /mnt/NV2/Development/paper-dynasty/card-creation + time pd-cards upload s3 --cardset "2005 Live" --limit 20 --dry-run + +To time a real upload batch of 20 cards (writes to S3, updates DB URLs): + + time pd-cards upload s3 --cardset "2005 Live" --limit 20 + +Notes: + - dry-run validates card URLs exist without uploading + - Remove --limit for full cardset run + - Pipeline is currently sequential (one card at a time per session) + - Each card: fetch PNG (~2-4s render) + S3 put (~0.1-0.5s) = ~2.5-4.5s/card + - Parallelism target (Phase 0 goal): 10-20 concurrent fetches via asyncio + +--- Section 4: Before/After Comparison Template --- + +Fill in after optimization work is complete. + + Metric Before After Delta + ---------------------------------------------------------------- + Avg render time (s) 2.032 ___._____ ___._____ + Min render time (s) 0.056 ___._____ ___._____ + Max render time (s) 2.913 ___._____ ___._____ + Sequential cards/min 29.5 ___.___ ___.___ + Upload batch (20 cards) ___._____s ___._____s ___._____s + Upload cards/min ___.___ ___.___ ___.___ + Full cardset time (est) ___._____min ___._____min ___ min saved + +Benchmark complete. +Results saved to: scripts/benchmark_results.txt +