212 lines
6.5 KiB
Python
212 lines
6.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
One-time bulk update of Player.image values to S3 URLs
|
|
Maps player records to: https://sba-cards-2024.s3.us-east-1.amazonaws.com/<year>-cards/<player_name>.png
|
|
"""
|
|
|
|
import logging
|
|
from app.db_engine import db, Player
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(f'{__name__}.bulk_update_player_images')
|
|
|
|
# Season to year mapping
|
|
SEASON_TO_YEAR = {
|
|
4: 2020,
|
|
5: 2020,
|
|
6: 2021,
|
|
7: 2021,
|
|
8: 2022,
|
|
9: 2022,
|
|
10: 2023,
|
|
11: 2023,
|
|
}
|
|
|
|
S3_BASE_URL = "https://sba-cards-2024.s3.us-east-1.amazonaws.com"
|
|
|
|
|
|
def generate_image_url(current_url: str, season: int) -> str | None:
|
|
"""
|
|
Generate S3 image URL for a player based on their season
|
|
Preserves the existing filename from current URL (including jr/sr suffixes)
|
|
|
|
Args:
|
|
current_url: Current image URL (e.g., "https://sombaseball.ddns.net/cards/2020/albert-almora-jr.png")
|
|
season: Season number
|
|
|
|
Returns:
|
|
Full S3 URL or None if season not in mapping
|
|
Example: https://sba-cards-2024.s3.us-east-1.amazonaws.com/2020-cards/albert-almora-jr.png
|
|
"""
|
|
year = SEASON_TO_YEAR.get(season)
|
|
if year is None:
|
|
return None
|
|
|
|
# Extract filename from current URL (preserves jr/sr/etc designations)
|
|
# "https://sombaseball.ddns.net/cards/2020/albert-almora-jr.png" -> "albert-almora-jr.png"
|
|
filename = current_url.split('/')[-1]
|
|
|
|
return f"{S3_BASE_URL}/{year}-cards/{filename}"
|
|
|
|
|
|
def preview_updates(limit: int = 10):
|
|
"""Preview what the updates would look like"""
|
|
logger.info("=" * 80)
|
|
logger.info("PREVIEW MODE - Showing first %d updates", limit)
|
|
logger.info("=" * 80)
|
|
|
|
query = (Player
|
|
.select(Player.id, Player.name, Player.season, Player.image)
|
|
.where(Player.season.in_(list(SEASON_TO_YEAR.keys())))
|
|
.limit(limit))
|
|
|
|
for player in query:
|
|
new_url = generate_image_url(player.image, player.season)
|
|
logger.info(f"Player ID {player.id}: {player.name} (Season {player.season})")
|
|
logger.info(f" OLD: {player.image}")
|
|
logger.info(f" NEW: {new_url}")
|
|
logger.info("-" * 80)
|
|
|
|
|
|
def get_update_statistics():
|
|
"""Get statistics about what will be updated"""
|
|
logger.info("=" * 80)
|
|
logger.info("GATHERING STATISTICS")
|
|
logger.info("=" * 80)
|
|
|
|
# Total players in target seasons
|
|
total_query = (Player
|
|
.select()
|
|
.where(Player.season.in_(list(SEASON_TO_YEAR.keys()))))
|
|
total_count = total_query.count()
|
|
|
|
# Breakdown by season
|
|
season_counts = {}
|
|
for season in sorted(SEASON_TO_YEAR.keys()):
|
|
count = Player.select().where(Player.season == season).count()
|
|
season_counts[season] = count
|
|
logger.info(f"Season {season} ({SEASON_TO_YEAR[season]}): {count} players")
|
|
|
|
logger.info("-" * 80)
|
|
logger.info(f"TOTAL players to update: {total_count}")
|
|
logger.info("=" * 80)
|
|
|
|
return total_count, season_counts
|
|
|
|
|
|
def bulk_update_images(batch_size: int = 1000, dry_run: bool = False):
|
|
"""
|
|
Bulk update player images in batches
|
|
|
|
Args:
|
|
batch_size: Number of records to update per batch
|
|
dry_run: If True, only show what would be updated without committing
|
|
"""
|
|
if dry_run:
|
|
logger.info("DRY RUN MODE - No changes will be committed")
|
|
preview_updates(limit=20)
|
|
total_count, season_counts = get_update_statistics()
|
|
return
|
|
|
|
logger.info("=" * 80)
|
|
logger.info("STARTING BULK UPDATE")
|
|
logger.info("=" * 80)
|
|
|
|
# Get all players that need updates
|
|
target_seasons = list(SEASON_TO_YEAR.keys())
|
|
players_query = (Player
|
|
.select(Player.id, Player.name, Player.season, Player.image)
|
|
.where(Player.season.in_(target_seasons)))
|
|
|
|
# Build update list
|
|
updates = []
|
|
skipped = 0
|
|
|
|
logger.info("Building update list...")
|
|
for player in players_query:
|
|
new_url = generate_image_url(player.image, player.season)
|
|
if new_url:
|
|
updates.append({'id': player.id, 'image': new_url})
|
|
else:
|
|
skipped += 1
|
|
logger.warning(f"Skipped player {player.id} - season {player.season} not in mapping")
|
|
|
|
total = len(updates)
|
|
logger.info(f"Prepared {total} updates (skipped {skipped})")
|
|
|
|
if total == 0:
|
|
logger.warning("No updates to perform!")
|
|
return
|
|
|
|
# Perform batch updates in a single transaction
|
|
try:
|
|
with db.atomic():
|
|
updated_count = 0
|
|
|
|
for i in range(0, total, batch_size):
|
|
batch = updates[i:i + batch_size]
|
|
|
|
# Build CASE statement for batch update
|
|
# SQL: UPDATE player SET image = CASE id WHEN 1 THEN 'url1' WHEN 2 THEN 'url2' END WHERE id IN (1,2)
|
|
case_statements = " ".join([
|
|
f"WHEN {item['id']} THEN '{item['image']}'"
|
|
for item in batch
|
|
])
|
|
ids = ",".join(str(item['id']) for item in batch)
|
|
|
|
query = f"""
|
|
UPDATE player
|
|
SET image = CASE id {case_statements} END
|
|
WHERE id IN ({ids})
|
|
"""
|
|
|
|
result = db.execute_sql(query)
|
|
updated_count += len(batch)
|
|
|
|
logger.info(f"Progress: {updated_count}/{total} records updated ({updated_count/total*100:.1f}%)")
|
|
|
|
logger.info("=" * 80)
|
|
logger.info(f"SUCCESS! Updated {updated_count} player image values")
|
|
logger.info("=" * 80)
|
|
|
|
except Exception as e:
|
|
logger.error(f"ERROR during bulk update: {e}")
|
|
logger.error("Transaction rolled back - no changes were made")
|
|
raise
|
|
|
|
|
|
def main():
|
|
"""Main execution function"""
|
|
import sys
|
|
|
|
# Check command line arguments
|
|
dry_run = '--dry-run' in sys.argv or '-n' in sys.argv
|
|
|
|
if dry_run:
|
|
logger.info("Running in DRY RUN mode (use without --dry-run to apply changes)")
|
|
bulk_update_images(dry_run=True)
|
|
else:
|
|
logger.warning("=" * 80)
|
|
logger.warning("LIVE RUN - This will modify the database!")
|
|
logger.warning("Press Ctrl+C within 5 seconds to cancel...")
|
|
logger.warning("=" * 80)
|
|
|
|
import time
|
|
try:
|
|
time.sleep(5)
|
|
except KeyboardInterrupt:
|
|
logger.info("\nCancelled by user")
|
|
sys.exit(0)
|
|
|
|
bulk_update_images(batch_size=1000, dry_run=False)
|
|
|
|
logger.info("Done!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|