Standardize formatting with black and apply ruff auto-fixes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
377 lines
12 KiB
Python
377 lines
12 KiB
Python
import asyncio
|
|
import datetime
|
|
import sys
|
|
import boto3
|
|
|
|
from db_calls import db_get, db_patch
|
|
from exceptions import logger
|
|
|
|
# Configuration
|
|
DRY_RUN = False # Set to False to actually upload and update
|
|
SKIP_AWS_URLS = True # Skip URLs already pointing to S3
|
|
START_CARDSET_ID = 21 # Optional: Start from specific cardset ID
|
|
END_CARDSET_ID = 29 # Optional: End at specific cardset ID
|
|
EXCLUDE_CARDSET_IDS = [] # List of cardset IDs to skip (e.g., [1, 2, 3])
|
|
MAX_PLAYERS_PER_CARDSET = None # Optional: Limit for testing (e.g., 10)
|
|
|
|
# AWS Configuration
|
|
AWS_BUCKET_NAME = "paper-dynasty" # Change to your bucket name
|
|
AWS_REGION = "us-east-1" # Change to your region
|
|
S3_BASE_URL = f"https://{AWS_BUCKET_NAME}.s3.{AWS_REGION}.amazonaws.com"
|
|
|
|
# Initialize S3 client (only if not dry run)
|
|
s3_client = boto3.client("s3", region_name=AWS_REGION) if not DRY_RUN else None
|
|
|
|
|
|
def is_aws_url(url: str) -> bool:
|
|
"""
|
|
Check if a URL is already pointing to AWS S3.
|
|
|
|
Args:
|
|
url: URL to check
|
|
|
|
Returns:
|
|
True if URL is already on S3, False otherwise
|
|
"""
|
|
if not url:
|
|
return False
|
|
|
|
# Check for common S3 URL patterns
|
|
s3_patterns = [
|
|
"s3.amazonaws.com",
|
|
"s3-", # Regional S3 URLs like s3-us-east-1
|
|
f"{AWS_BUCKET_NAME}.s3",
|
|
f"s3://{AWS_BUCKET_NAME}",
|
|
]
|
|
|
|
return any(pattern in url.lower() for pattern in s3_patterns)
|
|
|
|
|
|
async def fetch_card_image(session, card_url: str, timeout: int = 6) -> bytes:
|
|
"""
|
|
Fetch card image from URL and return raw bytes.
|
|
|
|
Args:
|
|
session: aiohttp ClientSession to use for the request
|
|
card_url: URL to fetch the card from
|
|
timeout: Request timeout in seconds
|
|
|
|
Returns:
|
|
Raw PNG image bytes
|
|
"""
|
|
import aiohttp
|
|
|
|
async with session.get(
|
|
card_url, timeout=aiohttp.ClientTimeout(total=timeout)
|
|
) as resp:
|
|
if resp.status == 200:
|
|
logger.info(f"Fetched card image from {card_url}")
|
|
return await resp.read()
|
|
else:
|
|
error_text = await resp.text()
|
|
logger.error(f"Failed to fetch card: {error_text}")
|
|
raise ValueError(f"Card fetch error: {error_text}")
|
|
|
|
|
|
def upload_card_to_s3(
|
|
image_data: bytes,
|
|
player_id: int,
|
|
card_type: str,
|
|
release_date: str,
|
|
cardset_id: int,
|
|
) -> str:
|
|
"""
|
|
Upload card image to S3 and return the S3 URL with cache-busting param.
|
|
|
|
Args:
|
|
image_data: Raw PNG image bytes
|
|
player_id: Player ID
|
|
card_type: 'batting' or 'pitching'
|
|
release_date: Date string for cache busting (e.g., '2025-11-8')
|
|
cardset_id: Cardset ID (will be zero-padded to 3 digits)
|
|
|
|
Returns:
|
|
Full S3 URL with ?d= parameter
|
|
"""
|
|
if DRY_RUN:
|
|
# In dry run, just return what the URL would be
|
|
cardset_str = f"{cardset_id:03d}"
|
|
s3_key = f"cards/cardset-{cardset_str}/player-{player_id}/{card_type}card.png"
|
|
s3_url = f"{S3_BASE_URL}/{s3_key}?d={release_date}"
|
|
logger.info(
|
|
f"[DRY RUN] Would upload {card_type} card for player {player_id} to: {s3_url}"
|
|
)
|
|
return s3_url
|
|
|
|
# Format cardset_id with 3 digits and leading zeros
|
|
cardset_str = f"{cardset_id:03d}"
|
|
s3_key = f"cards/cardset-{cardset_str}/player-{player_id}/{card_type}card.png"
|
|
|
|
try:
|
|
s3_client.put_object(
|
|
Bucket=AWS_BUCKET_NAME,
|
|
Key=s3_key,
|
|
Body=image_data,
|
|
ContentType="image/png",
|
|
CacheControl="public, max-age=300", # 5 minute cache
|
|
Metadata={
|
|
"player-id": str(player_id),
|
|
"card-type": card_type,
|
|
"upload-date": datetime.datetime.now().isoformat(),
|
|
},
|
|
)
|
|
|
|
# Return URL with cache-busting parameter
|
|
s3_url = f"{S3_BASE_URL}/{s3_key}?d={release_date}"
|
|
logger.info(f"Uploaded {card_type} card for player {player_id} to S3: {s3_url}")
|
|
return s3_url
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to upload {card_type} card for player {player_id}: {e}")
|
|
raise
|
|
|
|
|
|
async def process_cardset(cardset: dict, session, release_date: str, stats: dict):
|
|
"""
|
|
Process all players in a single cardset.
|
|
|
|
Args:
|
|
cardset: Cardset dictionary from API
|
|
session: aiohttp ClientSession
|
|
release_date: Release date for cache busting
|
|
stats: Stats dictionary to update
|
|
"""
|
|
cardset_id = cardset["id"]
|
|
cardset_name = cardset["name"]
|
|
|
|
print(f'\n{"="*60}')
|
|
print(f"Processing Cardset: {cardset_name} (ID: {cardset_id})")
|
|
print(f'{"="*60}')
|
|
|
|
# Get all players for this cardset
|
|
p_query = await db_get(
|
|
"players",
|
|
params=[("inc_dex", False), ("cardset_id", cardset_id), ("short_output", True)],
|
|
)
|
|
|
|
if not p_query or p_query["count"] == 0:
|
|
print(f"No players found for cardset {cardset_name}")
|
|
return
|
|
|
|
all_players = p_query["players"]
|
|
|
|
# Apply max players limit if set
|
|
if MAX_PLAYERS_PER_CARDSET:
|
|
all_players = all_players[:MAX_PLAYERS_PER_CARDSET]
|
|
print(f"Limited to first {MAX_PLAYERS_PER_CARDSET} players for testing")
|
|
|
|
print(f"Found {len(all_players)} players")
|
|
|
|
cardset_stats = {
|
|
"total": len(all_players),
|
|
"skipped_aws": 0,
|
|
"uploaded": 0,
|
|
"errors": 0,
|
|
"url_updates": 0,
|
|
}
|
|
|
|
for idx, player in enumerate(all_players):
|
|
player_id = player["player_id"]
|
|
player_name = player["p_name"]
|
|
|
|
if idx % 50 == 0 and idx > 0:
|
|
print(f" Progress: {idx}/{len(all_players)} players processed...")
|
|
|
|
# Process primary image
|
|
if player["image"]:
|
|
try:
|
|
if SKIP_AWS_URLS and is_aws_url(player["image"]):
|
|
logger.debug(f"Skipping player {player_id} image - already on AWS")
|
|
cardset_stats["skipped_aws"] += 1
|
|
else:
|
|
card_type = (
|
|
"pitching" if "pitching" in player["image"] else "batting"
|
|
)
|
|
|
|
if not DRY_RUN:
|
|
# Fetch and upload
|
|
image_bytes = await fetch_card_image(
|
|
session, player["image"], timeout=6
|
|
)
|
|
s3_url = upload_card_to_s3(
|
|
image_bytes, player_id, card_type, release_date, cardset_id
|
|
)
|
|
|
|
# Update player record
|
|
await db_patch(
|
|
"players", object_id=player_id, params=[("image", s3_url)]
|
|
)
|
|
cardset_stats["url_updates"] += 1
|
|
else:
|
|
# Dry run - just log what would happen
|
|
s3_url = upload_card_to_s3(
|
|
None, player_id, card_type, release_date, cardset_id
|
|
)
|
|
logger.info(
|
|
f"[DRY RUN] Would update player {player_id} image to: {s3_url}"
|
|
)
|
|
|
|
cardset_stats["uploaded"] += 1
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Error processing player {player_id} ({player_name}) image: {e}"
|
|
)
|
|
cardset_stats["errors"] += 1
|
|
|
|
# Process secondary image (dual-position players)
|
|
if player["image2"]:
|
|
try:
|
|
if SKIP_AWS_URLS and is_aws_url(player["image2"]):
|
|
logger.debug(f"Skipping player {player_id} image2 - already on AWS")
|
|
cardset_stats["skipped_aws"] += 1
|
|
else:
|
|
card_type = (
|
|
"pitching" if "pitching" in player["image2"] else "batting"
|
|
)
|
|
|
|
if not DRY_RUN:
|
|
# Fetch and upload
|
|
image_bytes = await fetch_card_image(
|
|
session, player["image2"], timeout=6
|
|
)
|
|
s3_url = upload_card_to_s3(
|
|
image_bytes, player_id, card_type, release_date, cardset_id
|
|
)
|
|
|
|
# Update player record
|
|
await db_patch(
|
|
"players", object_id=player_id, params=[("image2", s3_url)]
|
|
)
|
|
cardset_stats["url_updates"] += 1
|
|
else:
|
|
# Dry run - just log what would happen
|
|
s3_url = upload_card_to_s3(
|
|
None, player_id, card_type, release_date, cardset_id
|
|
)
|
|
logger.info(
|
|
f"[DRY RUN] Would update player {player_id} image2 to: {s3_url}"
|
|
)
|
|
|
|
cardset_stats["uploaded"] += 1
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
f"Error processing player {player_id} ({player_name}) image2: {e}"
|
|
)
|
|
cardset_stats["errors"] += 1
|
|
|
|
# Print cardset summary
|
|
print(f"\nCardset {cardset_name} Summary:")
|
|
print(f' Total players: {cardset_stats["total"]}')
|
|
print(f' Skipped (already AWS): {cardset_stats["skipped_aws"]}')
|
|
print(f' Uploaded: {cardset_stats["uploaded"]}')
|
|
print(f' URL updates: {cardset_stats["url_updates"]}')
|
|
print(f' Errors: {cardset_stats["errors"]}')
|
|
|
|
# Update global stats
|
|
for key in cardset_stats:
|
|
stats[key] = stats.get(key, 0) + cardset_stats[key]
|
|
|
|
|
|
async def main(args):
|
|
import aiohttp
|
|
|
|
print(f'\n{"="*60}')
|
|
print("PAPER DYNASTY - BATCH CARD MIGRATION TO AWS S3")
|
|
print(f'{"="*60}')
|
|
print(
|
|
f'Mode: {"DRY RUN (no changes will be made)" if DRY_RUN else "LIVE (will upload and update)"}'
|
|
)
|
|
print(f"Skip AWS URLs: {SKIP_AWS_URLS}")
|
|
if START_CARDSET_ID:
|
|
print(f"Start Cardset ID: {START_CARDSET_ID}")
|
|
if END_CARDSET_ID:
|
|
print(f"End Cardset ID: {END_CARDSET_ID}")
|
|
if EXCLUDE_CARDSET_IDS:
|
|
print(f"Excluded Cardset IDs: {EXCLUDE_CARDSET_IDS}")
|
|
print(f'{"="*60}\n')
|
|
|
|
# Get all cardsets
|
|
print("Fetching all cardsets...")
|
|
c_query = await db_get("cardsets")
|
|
|
|
if not c_query or c_query["count"] == 0:
|
|
print("No cardsets found!")
|
|
return
|
|
|
|
all_cardsets = c_query["cardsets"]
|
|
print(f"Found {len(all_cardsets)} total cardsets")
|
|
|
|
# Filter cardsets based on configuration
|
|
filtered_cardsets = []
|
|
for cardset in all_cardsets:
|
|
cardset_id = cardset["id"]
|
|
|
|
# Apply filters
|
|
if START_CARDSET_ID and cardset_id < START_CARDSET_ID:
|
|
continue
|
|
if END_CARDSET_ID and cardset_id > END_CARDSET_ID:
|
|
continue
|
|
if cardset_id in EXCLUDE_CARDSET_IDS:
|
|
continue
|
|
|
|
filtered_cardsets.append(cardset)
|
|
|
|
print(f"Processing {len(filtered_cardsets)} cardsets after filters\n")
|
|
|
|
# Generate release date for cache busting
|
|
now = datetime.datetime.now()
|
|
release_date = f"{now.year}-{now.month}-{now.day}"
|
|
|
|
# Global statistics
|
|
stats = {
|
|
"cardsets_processed": 0,
|
|
"total": 0,
|
|
"skipped_aws": 0,
|
|
"uploaded": 0,
|
|
"errors": 0,
|
|
"url_updates": 0,
|
|
}
|
|
|
|
start_time = datetime.datetime.now()
|
|
|
|
# Create persistent aiohttp session for all card fetches
|
|
async with aiohttp.ClientSession() as session:
|
|
for cardset in filtered_cardsets:
|
|
try:
|
|
await process_cardset(cardset, session, release_date, stats)
|
|
stats["cardsets_processed"] += 1
|
|
except Exception as e:
|
|
logger.error(f'Failed to process cardset {cardset["name"]}: {e}')
|
|
continue
|
|
|
|
# Print final summary
|
|
runtime = datetime.datetime.now() - start_time
|
|
|
|
print(f'\n{"="*60}')
|
|
print("FINAL SUMMARY")
|
|
print(f'{"="*60}')
|
|
print(f'Mode: {"DRY RUN" if DRY_RUN else "LIVE"}')
|
|
print(f'Cardsets processed: {stats["cardsets_processed"]}')
|
|
print(f'Total player cards: {stats["total"]}')
|
|
print(f'Skipped (already AWS): {stats["skipped_aws"]}')
|
|
print(f'Uploaded to S3: {stats["uploaded"]}')
|
|
print(f'URL updates: {stats["url_updates"]}')
|
|
print(f'Errors: {stats["errors"]}')
|
|
print(f"Runtime: {runtime.total_seconds():.2f} seconds")
|
|
print(f'{"="*60}')
|
|
|
|
if DRY_RUN:
|
|
print("\n*** THIS WAS A DRY RUN - NO CHANGES WERE MADE ***")
|
|
print("Set DRY_RUN = False to actually upload and update")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main(sys.argv[1:]))
|