major-domo-database/.claude/bulk_update_player_images.py
2025-10-25 20:17:02 -05:00

212 lines
6.5 KiB
Python

#!/usr/bin/env python3
"""
One-time bulk update of Player.image values to S3 URLs
Maps player records to: https://sba-cards-2024.s3.us-east-1.amazonaws.com/<year>-cards/<player_name>.png
"""
import logging
from app.db_engine import db, Player
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(f'{__name__}.bulk_update_player_images')
# Season to year mapping
SEASON_TO_YEAR = {
4: 2020,
5: 2020,
6: 2021,
7: 2021,
8: 2022,
9: 2022,
10: 2023,
11: 2023,
}
S3_BASE_URL = "https://sba-cards-2024.s3.us-east-1.amazonaws.com"
def generate_image_url(current_url: str, season: int) -> str | None:
"""
Generate S3 image URL for a player based on their season
Preserves the existing filename from current URL (including jr/sr suffixes)
Args:
current_url: Current image URL (e.g., "https://sombaseball.ddns.net/cards/2020/albert-almora-jr.png")
season: Season number
Returns:
Full S3 URL or None if season not in mapping
Example: https://sba-cards-2024.s3.us-east-1.amazonaws.com/2020-cards/albert-almora-jr.png
"""
year = SEASON_TO_YEAR.get(season)
if year is None:
return None
# Extract filename from current URL (preserves jr/sr/etc designations)
# "https://sombaseball.ddns.net/cards/2020/albert-almora-jr.png" -> "albert-almora-jr.png"
filename = current_url.split('/')[-1]
return f"{S3_BASE_URL}/{year}-cards/{filename}"
def preview_updates(limit: int = 10):
"""Preview what the updates would look like"""
logger.info("=" * 80)
logger.info("PREVIEW MODE - Showing first %d updates", limit)
logger.info("=" * 80)
query = (Player
.select(Player.id, Player.name, Player.season, Player.image)
.where(Player.season.in_(list(SEASON_TO_YEAR.keys())))
.limit(limit))
for player in query:
new_url = generate_image_url(player.image, player.season)
logger.info(f"Player ID {player.id}: {player.name} (Season {player.season})")
logger.info(f" OLD: {player.image}")
logger.info(f" NEW: {new_url}")
logger.info("-" * 80)
def get_update_statistics():
"""Get statistics about what will be updated"""
logger.info("=" * 80)
logger.info("GATHERING STATISTICS")
logger.info("=" * 80)
# Total players in target seasons
total_query = (Player
.select()
.where(Player.season.in_(list(SEASON_TO_YEAR.keys()))))
total_count = total_query.count()
# Breakdown by season
season_counts = {}
for season in sorted(SEASON_TO_YEAR.keys()):
count = Player.select().where(Player.season == season).count()
season_counts[season] = count
logger.info(f"Season {season} ({SEASON_TO_YEAR[season]}): {count} players")
logger.info("-" * 80)
logger.info(f"TOTAL players to update: {total_count}")
logger.info("=" * 80)
return total_count, season_counts
def bulk_update_images(batch_size: int = 1000, dry_run: bool = False):
"""
Bulk update player images in batches
Args:
batch_size: Number of records to update per batch
dry_run: If True, only show what would be updated without committing
"""
if dry_run:
logger.info("DRY RUN MODE - No changes will be committed")
preview_updates(limit=20)
total_count, season_counts = get_update_statistics()
return
logger.info("=" * 80)
logger.info("STARTING BULK UPDATE")
logger.info("=" * 80)
# Get all players that need updates
target_seasons = list(SEASON_TO_YEAR.keys())
players_query = (Player
.select(Player.id, Player.name, Player.season, Player.image)
.where(Player.season.in_(target_seasons)))
# Build update list
updates = []
skipped = 0
logger.info("Building update list...")
for player in players_query:
new_url = generate_image_url(player.image, player.season)
if new_url:
updates.append({'id': player.id, 'image': new_url})
else:
skipped += 1
logger.warning(f"Skipped player {player.id} - season {player.season} not in mapping")
total = len(updates)
logger.info(f"Prepared {total} updates (skipped {skipped})")
if total == 0:
logger.warning("No updates to perform!")
return
# Perform batch updates in a single transaction
try:
with db.atomic():
updated_count = 0
for i in range(0, total, batch_size):
batch = updates[i:i + batch_size]
# Build CASE statement for batch update
# SQL: UPDATE player SET image = CASE id WHEN 1 THEN 'url1' WHEN 2 THEN 'url2' END WHERE id IN (1,2)
case_statements = " ".join([
f"WHEN {item['id']} THEN '{item['image']}'"
for item in batch
])
ids = ",".join(str(item['id']) for item in batch)
query = f"""
UPDATE player
SET image = CASE id {case_statements} END
WHERE id IN ({ids})
"""
result = db.execute_sql(query)
updated_count += len(batch)
logger.info(f"Progress: {updated_count}/{total} records updated ({updated_count/total*100:.1f}%)")
logger.info("=" * 80)
logger.info(f"SUCCESS! Updated {updated_count} player image values")
logger.info("=" * 80)
except Exception as e:
logger.error(f"ERROR during bulk update: {e}")
logger.error("Transaction rolled back - no changes were made")
raise
def main():
"""Main execution function"""
import sys
# Check command line arguments
dry_run = '--dry-run' in sys.argv or '-n' in sys.argv
if dry_run:
logger.info("Running in DRY RUN mode (use without --dry-run to apply changes)")
bulk_update_images(dry_run=True)
else:
logger.warning("=" * 80)
logger.warning("LIVE RUN - This will modify the database!")
logger.warning("Press Ctrl+C within 5 seconds to cancel...")
logger.warning("=" * 80)
import time
try:
time.sleep(5)
except KeyboardInterrupt:
logger.info("\nCancelled by user")
sys.exit(0)
bulk_update_images(batch_size=1000, dry_run=False)
logger.info("Done!")
if __name__ == "__main__":
main()