import asyncio import datetime import sys import boto3 from io import BytesIO from creation_helpers import get_args from db_calls import db_get, db_patch, url_get from exceptions import logger # Configuration DRY_RUN = False # Set to False to actually upload and update SKIP_AWS_URLS = True # Skip URLs already pointing to S3 START_CARDSET_ID = 21 # Optional: Start from specific cardset ID END_CARDSET_ID = 29 # Optional: End at specific cardset ID EXCLUDE_CARDSET_IDS = [] # List of cardset IDs to skip (e.g., [1, 2, 3]) MAX_PLAYERS_PER_CARDSET = None # Optional: Limit for testing (e.g., 10) # AWS Configuration AWS_BUCKET_NAME = 'paper-dynasty' # Change to your bucket name AWS_REGION = 'us-east-1' # Change to your region S3_BASE_URL = f'https://{AWS_BUCKET_NAME}.s3.{AWS_REGION}.amazonaws.com' # Initialize S3 client (only if not dry run) s3_client = boto3.client('s3', region_name=AWS_REGION) if not DRY_RUN else None def is_aws_url(url: str) -> bool: """ Check if a URL is already pointing to AWS S3. Args: url: URL to check Returns: True if URL is already on S3, False otherwise """ if not url: return False # Check for common S3 URL patterns s3_patterns = [ 's3.amazonaws.com', 's3-', # Regional S3 URLs like s3-us-east-1 f'{AWS_BUCKET_NAME}.s3', f's3://{AWS_BUCKET_NAME}', ] return any(pattern in url.lower() for pattern in s3_patterns) async def fetch_card_image(session, card_url: str, timeout: int = 6) -> bytes: """ Fetch card image from URL and return raw bytes. Args: session: aiohttp ClientSession to use for the request card_url: URL to fetch the card from timeout: Request timeout in seconds Returns: Raw PNG image bytes """ import aiohttp async with session.get(card_url, timeout=aiohttp.ClientTimeout(total=timeout)) as resp: if resp.status == 200: logger.info(f'Fetched card image from {card_url}') return await resp.read() else: error_text = await resp.text() logger.error(f'Failed to fetch card: {error_text}') raise ValueError(f'Card fetch error: {error_text}') def upload_card_to_s3(image_data: bytes, player_id: int, card_type: str, release_date: str, cardset_id: int) -> str: """ Upload card image to S3 and return the S3 URL with cache-busting param. Args: image_data: Raw PNG image bytes player_id: Player ID card_type: 'batting' or 'pitching' release_date: Date string for cache busting (e.g., '2025-11-8') cardset_id: Cardset ID (will be zero-padded to 3 digits) Returns: Full S3 URL with ?d= parameter """ if DRY_RUN: # In dry run, just return what the URL would be cardset_str = f'{cardset_id:03d}' s3_key = f'cards/cardset-{cardset_str}/player-{player_id}/{card_type}card.png' s3_url = f'{S3_BASE_URL}/{s3_key}?d={release_date}' logger.info(f'[DRY RUN] Would upload {card_type} card for player {player_id} to: {s3_url}') return s3_url # Format cardset_id with 3 digits and leading zeros cardset_str = f'{cardset_id:03d}' s3_key = f'cards/cardset-{cardset_str}/player-{player_id}/{card_type}card.png' try: s3_client.put_object( Bucket=AWS_BUCKET_NAME, Key=s3_key, Body=image_data, ContentType='image/png', CacheControl='public, max-age=300', # 5 minute cache Metadata={ 'player-id': str(player_id), 'card-type': card_type, 'upload-date': datetime.datetime.now().isoformat() } ) # Return URL with cache-busting parameter s3_url = f'{S3_BASE_URL}/{s3_key}?d={release_date}' logger.info(f'Uploaded {card_type} card for player {player_id} to S3: {s3_url}') return s3_url except Exception as e: logger.error(f'Failed to upload {card_type} card for player {player_id}: {e}') raise async def process_cardset(cardset: dict, session, release_date: str, stats: dict): """ Process all players in a single cardset. Args: cardset: Cardset dictionary from API session: aiohttp ClientSession release_date: Release date for cache busting stats: Stats dictionary to update """ cardset_id = cardset['id'] cardset_name = cardset['name'] print(f'\n{"="*60}') print(f'Processing Cardset: {cardset_name} (ID: {cardset_id})') print(f'{"="*60}') # Get all players for this cardset p_query = await db_get( 'players', params=[('inc_dex', False), ('cardset_id', cardset_id), ('short_output', True)] ) if not p_query or p_query['count'] == 0: print(f'No players found for cardset {cardset_name}') return all_players = p_query['players'] # Apply max players limit if set if MAX_PLAYERS_PER_CARDSET: all_players = all_players[:MAX_PLAYERS_PER_CARDSET] print(f'Limited to first {MAX_PLAYERS_PER_CARDSET} players for testing') print(f'Found {len(all_players)} players') cardset_stats = { 'total': len(all_players), 'skipped_aws': 0, 'uploaded': 0, 'errors': 0, 'url_updates': 0 } for idx, player in enumerate(all_players): player_id = player['player_id'] player_name = player['p_name'] if idx % 50 == 0 and idx > 0: print(f' Progress: {idx}/{len(all_players)} players processed...') # Process primary image if player['image']: try: if SKIP_AWS_URLS and is_aws_url(player['image']): logger.debug(f'Skipping player {player_id} image - already on AWS') cardset_stats['skipped_aws'] += 1 else: card_type = 'pitching' if 'pitching' in player['image'] else 'batting' if not DRY_RUN: # Fetch and upload image_bytes = await fetch_card_image(session, player['image'], timeout=6) s3_url = upload_card_to_s3(image_bytes, player_id, card_type, release_date, cardset_id) # Update player record await db_patch('players', object_id=player_id, params=[('image', s3_url)]) cardset_stats['url_updates'] += 1 else: # Dry run - just log what would happen s3_url = upload_card_to_s3(None, player_id, card_type, release_date, cardset_id) logger.info(f'[DRY RUN] Would update player {player_id} image to: {s3_url}') cardset_stats['uploaded'] += 1 except Exception as e: logger.error(f'Error processing player {player_id} ({player_name}) image: {e}') cardset_stats['errors'] += 1 # Process secondary image (dual-position players) if player['image2']: try: if SKIP_AWS_URLS and is_aws_url(player['image2']): logger.debug(f'Skipping player {player_id} image2 - already on AWS') cardset_stats['skipped_aws'] += 1 else: card_type = 'pitching' if 'pitching' in player['image2'] else 'batting' if not DRY_RUN: # Fetch and upload image_bytes = await fetch_card_image(session, player['image2'], timeout=6) s3_url = upload_card_to_s3(image_bytes, player_id, card_type, release_date, cardset_id) # Update player record await db_patch('players', object_id=player_id, params=[('image2', s3_url)]) cardset_stats['url_updates'] += 1 else: # Dry run - just log what would happen s3_url = upload_card_to_s3(None, player_id, card_type, release_date, cardset_id) logger.info(f'[DRY RUN] Would update player {player_id} image2 to: {s3_url}') cardset_stats['uploaded'] += 1 except Exception as e: logger.error(f'Error processing player {player_id} ({player_name}) image2: {e}') cardset_stats['errors'] += 1 # Print cardset summary print(f'\nCardset {cardset_name} Summary:') print(f' Total players: {cardset_stats["total"]}') print(f' Skipped (already AWS): {cardset_stats["skipped_aws"]}') print(f' Uploaded: {cardset_stats["uploaded"]}') print(f' URL updates: {cardset_stats["url_updates"]}') print(f' Errors: {cardset_stats["errors"]}') # Update global stats for key in cardset_stats: stats[key] = stats.get(key, 0) + cardset_stats[key] async def main(args): import aiohttp print(f'\n{"="*60}') print(f'PAPER DYNASTY - BATCH CARD MIGRATION TO AWS S3') print(f'{"="*60}') print(f'Mode: {"DRY RUN (no changes will be made)" if DRY_RUN else "LIVE (will upload and update)"}') print(f'Skip AWS URLs: {SKIP_AWS_URLS}') if START_CARDSET_ID: print(f'Start Cardset ID: {START_CARDSET_ID}') if END_CARDSET_ID: print(f'End Cardset ID: {END_CARDSET_ID}') if EXCLUDE_CARDSET_IDS: print(f'Excluded Cardset IDs: {EXCLUDE_CARDSET_IDS}') print(f'{"="*60}\n') # Get all cardsets print('Fetching all cardsets...') c_query = await db_get('cardsets') if not c_query or c_query['count'] == 0: print('No cardsets found!') return all_cardsets = c_query['cardsets'] print(f'Found {len(all_cardsets)} total cardsets') # Filter cardsets based on configuration filtered_cardsets = [] for cardset in all_cardsets: cardset_id = cardset['id'] # Apply filters if START_CARDSET_ID and cardset_id < START_CARDSET_ID: continue if END_CARDSET_ID and cardset_id > END_CARDSET_ID: continue if cardset_id in EXCLUDE_CARDSET_IDS: continue filtered_cardsets.append(cardset) print(f'Processing {len(filtered_cardsets)} cardsets after filters\n') # Generate release date for cache busting now = datetime.datetime.now() release_date = f'{now.year}-{now.month}-{now.day}' # Global statistics stats = { 'cardsets_processed': 0, 'total': 0, 'skipped_aws': 0, 'uploaded': 0, 'errors': 0, 'url_updates': 0 } start_time = datetime.datetime.now() # Create persistent aiohttp session for all card fetches async with aiohttp.ClientSession() as session: for cardset in filtered_cardsets: try: await process_cardset(cardset, session, release_date, stats) stats['cardsets_processed'] += 1 except Exception as e: logger.error(f'Failed to process cardset {cardset["name"]}: {e}') continue # Print final summary runtime = datetime.datetime.now() - start_time print(f'\n{"="*60}') print(f'FINAL SUMMARY') print(f'{"="*60}') print(f'Mode: {"DRY RUN" if DRY_RUN else "LIVE"}') print(f'Cardsets processed: {stats["cardsets_processed"]}') print(f'Total player cards: {stats["total"]}') print(f'Skipped (already AWS): {stats["skipped_aws"]}') print(f'Uploaded to S3: {stats["uploaded"]}') print(f'URL updates: {stats["url_updates"]}') print(f'Errors: {stats["errors"]}') print(f'Runtime: {runtime.total_seconds():.2f} seconds') print(f'{"="*60}') if DRY_RUN: print('\n*** THIS WAS A DRY RUN - NO CHANGES WERE MADE ***') print('Set DRY_RUN = False to actually upload and update') if __name__ == '__main__': asyncio.run(main(sys.argv[1:]))