#!/usr/bin/env python3 """ One-time bulk update of Player.image values to S3 URLs Maps player records to: https://sba-cards-2024.s3.us-east-1.amazonaws.com/-cards/.png """ import logging from app.db_engine import db, Player # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(f'{__name__}.bulk_update_player_images') # Season to year mapping SEASON_TO_YEAR = { 4: 2020, 5: 2020, 6: 2021, 7: 2021, 8: 2022, 9: 2022, 10: 2023, 11: 2023, } S3_BASE_URL = "https://sba-cards-2024.s3.us-east-1.amazonaws.com" def generate_image_url(current_url: str, season: int) -> str | None: """ Generate S3 image URL for a player based on their season Preserves the existing filename from current URL (including jr/sr suffixes) Args: current_url: Current image URL (e.g., "https://sombaseball.ddns.net/cards/2020/albert-almora-jr.png") season: Season number Returns: Full S3 URL or None if season not in mapping Example: https://sba-cards-2024.s3.us-east-1.amazonaws.com/2020-cards/albert-almora-jr.png """ year = SEASON_TO_YEAR.get(season) if year is None: return None # Extract filename from current URL (preserves jr/sr/etc designations) # "https://sombaseball.ddns.net/cards/2020/albert-almora-jr.png" -> "albert-almora-jr.png" filename = current_url.split('/')[-1] return f"{S3_BASE_URL}/{year}-cards/{filename}" def preview_updates(limit: int = 10): """Preview what the updates would look like""" logger.info("=" * 80) logger.info("PREVIEW MODE - Showing first %d updates", limit) logger.info("=" * 80) query = (Player .select(Player.id, Player.name, Player.season, Player.image) .where(Player.season.in_(list(SEASON_TO_YEAR.keys()))) .limit(limit)) for player in query: new_url = generate_image_url(player.image, player.season) logger.info(f"Player ID {player.id}: {player.name} (Season {player.season})") logger.info(f" OLD: {player.image}") logger.info(f" NEW: {new_url}") logger.info("-" * 80) def get_update_statistics(): """Get statistics about what will be updated""" logger.info("=" * 80) logger.info("GATHERING STATISTICS") logger.info("=" * 80) # Total players in target seasons total_query = (Player .select() .where(Player.season.in_(list(SEASON_TO_YEAR.keys())))) total_count = total_query.count() # Breakdown by season season_counts = {} for season in sorted(SEASON_TO_YEAR.keys()): count = Player.select().where(Player.season == season).count() season_counts[season] = count logger.info(f"Season {season} ({SEASON_TO_YEAR[season]}): {count} players") logger.info("-" * 80) logger.info(f"TOTAL players to update: {total_count}") logger.info("=" * 80) return total_count, season_counts def bulk_update_images(batch_size: int = 1000, dry_run: bool = False): """ Bulk update player images in batches Args: batch_size: Number of records to update per batch dry_run: If True, only show what would be updated without committing """ if dry_run: logger.info("DRY RUN MODE - No changes will be committed") preview_updates(limit=20) total_count, season_counts = get_update_statistics() return logger.info("=" * 80) logger.info("STARTING BULK UPDATE") logger.info("=" * 80) # Get all players that need updates target_seasons = list(SEASON_TO_YEAR.keys()) players_query = (Player .select(Player.id, Player.name, Player.season, Player.image) .where(Player.season.in_(target_seasons))) # Build update list updates = [] skipped = 0 logger.info("Building update list...") for player in players_query: new_url = generate_image_url(player.image, player.season) if new_url: updates.append({'id': player.id, 'image': new_url}) else: skipped += 1 logger.warning(f"Skipped player {player.id} - season {player.season} not in mapping") total = len(updates) logger.info(f"Prepared {total} updates (skipped {skipped})") if total == 0: logger.warning("No updates to perform!") return # Perform batch updates in a single transaction try: with db.atomic(): updated_count = 0 for i in range(0, total, batch_size): batch = updates[i:i + batch_size] # Build CASE statement for batch update # SQL: UPDATE player SET image = CASE id WHEN 1 THEN 'url1' WHEN 2 THEN 'url2' END WHERE id IN (1,2) case_statements = " ".join([ f"WHEN {item['id']} THEN '{item['image']}'" for item in batch ]) ids = ",".join(str(item['id']) for item in batch) query = f""" UPDATE player SET image = CASE id {case_statements} END WHERE id IN ({ids}) """ result = db.execute_sql(query) updated_count += len(batch) logger.info(f"Progress: {updated_count}/{total} records updated ({updated_count/total*100:.1f}%)") logger.info("=" * 80) logger.info(f"SUCCESS! Updated {updated_count} player image values") logger.info("=" * 80) except Exception as e: logger.error(f"ERROR during bulk update: {e}") logger.error("Transaction rolled back - no changes were made") raise def main(): """Main execution function""" import sys # Check command line arguments dry_run = '--dry-run' in sys.argv or '-n' in sys.argv if dry_run: logger.info("Running in DRY RUN mode (use without --dry-run to apply changes)") bulk_update_images(dry_run=True) else: logger.warning("=" * 80) logger.warning("LIVE RUN - This will modify the database!") logger.warning("Press Ctrl+C within 5 seconds to cancel...") logger.warning("=" * 80) import time try: time.sleep(5) except KeyboardInterrupt: logger.info("\nCancelled by user") sys.exit(0) bulk_update_images(batch_size=1000, dry_run=False) logger.info("Done!") if __name__ == "__main__": main()