diff --git a/migrate_all_cards_to_s3.py b/migrate_all_cards_to_s3.py new file mode 100644 index 0000000..6981300 --- /dev/null +++ b/migrate_all_cards_to_s3.py @@ -0,0 +1,339 @@ +import asyncio +import datetime +import sys +import boto3 +from io import BytesIO + +from creation_helpers import get_args +from db_calls import db_get, db_patch, url_get +from exceptions import logger + + +# Configuration +DRY_RUN = False # Set to False to actually upload and update +SKIP_AWS_URLS = True # Skip URLs already pointing to S3 +START_CARDSET_ID = 21 # Optional: Start from specific cardset ID +END_CARDSET_ID = 29 # Optional: End at specific cardset ID +EXCLUDE_CARDSET_IDS = [] # List of cardset IDs to skip (e.g., [1, 2, 3]) +MAX_PLAYERS_PER_CARDSET = None # Optional: Limit for testing (e.g., 10) + +# AWS Configuration +AWS_BUCKET_NAME = 'paper-dynasty' # Change to your bucket name +AWS_REGION = 'us-east-1' # Change to your region +S3_BASE_URL = f'https://{AWS_BUCKET_NAME}.s3.{AWS_REGION}.amazonaws.com' + +# Initialize S3 client (only if not dry run) +s3_client = boto3.client('s3', region_name=AWS_REGION) if not DRY_RUN else None + + +def is_aws_url(url: str) -> bool: + """ + Check if a URL is already pointing to AWS S3. + + Args: + url: URL to check + + Returns: + True if URL is already on S3, False otherwise + """ + if not url: + return False + + # Check for common S3 URL patterns + s3_patterns = [ + 's3.amazonaws.com', + 's3-', # Regional S3 URLs like s3-us-east-1 + f'{AWS_BUCKET_NAME}.s3', + f's3://{AWS_BUCKET_NAME}', + ] + + return any(pattern in url.lower() for pattern in s3_patterns) + + +async def fetch_card_image(session, card_url: str, timeout: int = 6) -> bytes: + """ + Fetch card image from URL and return raw bytes. + + Args: + session: aiohttp ClientSession to use for the request + card_url: URL to fetch the card from + timeout: Request timeout in seconds + + Returns: + Raw PNG image bytes + """ + import aiohttp + + async with session.get(card_url, timeout=aiohttp.ClientTimeout(total=timeout)) as resp: + if resp.status == 200: + logger.info(f'Fetched card image from {card_url}') + return await resp.read() + else: + error_text = await resp.text() + logger.error(f'Failed to fetch card: {error_text}') + raise ValueError(f'Card fetch error: {error_text}') + + +def upload_card_to_s3(image_data: bytes, player_id: int, card_type: str, release_date: str, cardset_id: int) -> str: + """ + Upload card image to S3 and return the S3 URL with cache-busting param. + + Args: + image_data: Raw PNG image bytes + player_id: Player ID + card_type: 'batting' or 'pitching' + release_date: Date string for cache busting (e.g., '2025-11-8') + cardset_id: Cardset ID (will be zero-padded to 3 digits) + + Returns: + Full S3 URL with ?d= parameter + """ + if DRY_RUN: + # In dry run, just return what the URL would be + cardset_str = f'{cardset_id:03d}' + s3_key = f'cards/cardset-{cardset_str}/player-{player_id}/{card_type}card.png' + s3_url = f'{S3_BASE_URL}/{s3_key}?d={release_date}' + logger.info(f'[DRY RUN] Would upload {card_type} card for player {player_id} to: {s3_url}') + return s3_url + + # Format cardset_id with 3 digits and leading zeros + cardset_str = f'{cardset_id:03d}' + s3_key = f'cards/cardset-{cardset_str}/player-{player_id}/{card_type}card.png' + + try: + s3_client.put_object( + Bucket=AWS_BUCKET_NAME, + Key=s3_key, + Body=image_data, + ContentType='image/png', + CacheControl='public, max-age=300', # 5 minute cache + Metadata={ + 'player-id': str(player_id), + 'card-type': card_type, + 'upload-date': datetime.datetime.now().isoformat() + } + ) + + # Return URL with cache-busting parameter + s3_url = f'{S3_BASE_URL}/{s3_key}?d={release_date}' + logger.info(f'Uploaded {card_type} card for player {player_id} to S3: {s3_url}') + return s3_url + + except Exception as e: + logger.error(f'Failed to upload {card_type} card for player {player_id}: {e}') + raise + + +async def process_cardset(cardset: dict, session, release_date: str, stats: dict): + """ + Process all players in a single cardset. + + Args: + cardset: Cardset dictionary from API + session: aiohttp ClientSession + release_date: Release date for cache busting + stats: Stats dictionary to update + """ + cardset_id = cardset['id'] + cardset_name = cardset['name'] + + print(f'\n{"="*60}') + print(f'Processing Cardset: {cardset_name} (ID: {cardset_id})') + print(f'{"="*60}') + + # Get all players for this cardset + p_query = await db_get( + 'players', + params=[('inc_dex', False), ('cardset_id', cardset_id), ('short_output', True)] + ) + + if not p_query or p_query['count'] == 0: + print(f'No players found for cardset {cardset_name}') + return + + all_players = p_query['players'] + + # Apply max players limit if set + if MAX_PLAYERS_PER_CARDSET: + all_players = all_players[:MAX_PLAYERS_PER_CARDSET] + print(f'Limited to first {MAX_PLAYERS_PER_CARDSET} players for testing') + + print(f'Found {len(all_players)} players') + + cardset_stats = { + 'total': len(all_players), + 'skipped_aws': 0, + 'uploaded': 0, + 'errors': 0, + 'url_updates': 0 + } + + for idx, player in enumerate(all_players): + player_id = player['player_id'] + player_name = player['p_name'] + + if idx % 50 == 0 and idx > 0: + print(f' Progress: {idx}/{len(all_players)} players processed...') + + # Process primary image + if player['image']: + try: + if SKIP_AWS_URLS and is_aws_url(player['image']): + logger.debug(f'Skipping player {player_id} image - already on AWS') + cardset_stats['skipped_aws'] += 1 + else: + card_type = 'pitching' if 'pitching' in player['image'] else 'batting' + + if not DRY_RUN: + # Fetch and upload + image_bytes = await fetch_card_image(session, player['image'], timeout=6) + s3_url = upload_card_to_s3(image_bytes, player_id, card_type, release_date, cardset_id) + + # Update player record + await db_patch('players', object_id=player_id, params=[('image', s3_url)]) + cardset_stats['url_updates'] += 1 + else: + # Dry run - just log what would happen + s3_url = upload_card_to_s3(None, player_id, card_type, release_date, cardset_id) + logger.info(f'[DRY RUN] Would update player {player_id} image to: {s3_url}') + + cardset_stats['uploaded'] += 1 + + except Exception as e: + logger.error(f'Error processing player {player_id} ({player_name}) image: {e}') + cardset_stats['errors'] += 1 + + # Process secondary image (dual-position players) + if player['image2']: + try: + if SKIP_AWS_URLS and is_aws_url(player['image2']): + logger.debug(f'Skipping player {player_id} image2 - already on AWS') + cardset_stats['skipped_aws'] += 1 + else: + card_type = 'pitching' if 'pitching' in player['image2'] else 'batting' + + if not DRY_RUN: + # Fetch and upload + image_bytes = await fetch_card_image(session, player['image2'], timeout=6) + s3_url = upload_card_to_s3(image_bytes, player_id, card_type, release_date, cardset_id) + + # Update player record + await db_patch('players', object_id=player_id, params=[('image2', s3_url)]) + cardset_stats['url_updates'] += 1 + else: + # Dry run - just log what would happen + s3_url = upload_card_to_s3(None, player_id, card_type, release_date, cardset_id) + logger.info(f'[DRY RUN] Would update player {player_id} image2 to: {s3_url}') + + cardset_stats['uploaded'] += 1 + + except Exception as e: + logger.error(f'Error processing player {player_id} ({player_name}) image2: {e}') + cardset_stats['errors'] += 1 + + # Print cardset summary + print(f'\nCardset {cardset_name} Summary:') + print(f' Total players: {cardset_stats["total"]}') + print(f' Skipped (already AWS): {cardset_stats["skipped_aws"]}') + print(f' Uploaded: {cardset_stats["uploaded"]}') + print(f' URL updates: {cardset_stats["url_updates"]}') + print(f' Errors: {cardset_stats["errors"]}') + + # Update global stats + for key in cardset_stats: + stats[key] = stats.get(key, 0) + cardset_stats[key] + + +async def main(args): + import aiohttp + + print(f'\n{"="*60}') + print(f'PAPER DYNASTY - BATCH CARD MIGRATION TO AWS S3') + print(f'{"="*60}') + print(f'Mode: {"DRY RUN (no changes will be made)" if DRY_RUN else "LIVE (will upload and update)"}') + print(f'Skip AWS URLs: {SKIP_AWS_URLS}') + if START_CARDSET_ID: + print(f'Start Cardset ID: {START_CARDSET_ID}') + if END_CARDSET_ID: + print(f'End Cardset ID: {END_CARDSET_ID}') + if EXCLUDE_CARDSET_IDS: + print(f'Excluded Cardset IDs: {EXCLUDE_CARDSET_IDS}') + print(f'{"="*60}\n') + + # Get all cardsets + print('Fetching all cardsets...') + c_query = await db_get('cardsets') + + if not c_query or c_query['count'] == 0: + print('No cardsets found!') + return + + all_cardsets = c_query['cardsets'] + print(f'Found {len(all_cardsets)} total cardsets') + + # Filter cardsets based on configuration + filtered_cardsets = [] + for cardset in all_cardsets: + cardset_id = cardset['id'] + + # Apply filters + if START_CARDSET_ID and cardset_id < START_CARDSET_ID: + continue + if END_CARDSET_ID and cardset_id > END_CARDSET_ID: + continue + if cardset_id in EXCLUDE_CARDSET_IDS: + continue + + filtered_cardsets.append(cardset) + + print(f'Processing {len(filtered_cardsets)} cardsets after filters\n') + + # Generate release date for cache busting + now = datetime.datetime.now() + release_date = f'{now.year}-{now.month}-{now.day}' + + # Global statistics + stats = { + 'cardsets_processed': 0, + 'total': 0, + 'skipped_aws': 0, + 'uploaded': 0, + 'errors': 0, + 'url_updates': 0 + } + + start_time = datetime.datetime.now() + + # Create persistent aiohttp session for all card fetches + async with aiohttp.ClientSession() as session: + for cardset in filtered_cardsets: + try: + await process_cardset(cardset, session, release_date, stats) + stats['cardsets_processed'] += 1 + except Exception as e: + logger.error(f'Failed to process cardset {cardset["name"]}: {e}') + continue + + # Print final summary + runtime = datetime.datetime.now() - start_time + + print(f'\n{"="*60}') + print(f'FINAL SUMMARY') + print(f'{"="*60}') + print(f'Mode: {"DRY RUN" if DRY_RUN else "LIVE"}') + print(f'Cardsets processed: {stats["cardsets_processed"]}') + print(f'Total player cards: {stats["total"]}') + print(f'Skipped (already AWS): {stats["skipped_aws"]}') + print(f'Uploaded to S3: {stats["uploaded"]}') + print(f'URL updates: {stats["url_updates"]}') + print(f'Errors: {stats["errors"]}') + print(f'Runtime: {runtime.total_seconds():.2f} seconds') + print(f'{"="*60}') + + if DRY_RUN: + print('\n*** THIS WAS A DRY RUN - NO CHANGES WERE MADE ***') + print('Set DRY_RUN = False to actually upload and update') + + +if __name__ == '__main__': + asyncio.run(main(sys.argv[1:]))