mantimon-tcg/backend/scripts/convert_cards.py
Cal Corum b78236ac49 Simplify card image URLs to use set-based paths
- Derive image_path from card ID instead of raw data image_file field
- Use simplified CDN paths: /<set>/<card>.webp (e.g., a1/033-charmander.webp)
- Energy cards use basic/<type>.webp paths
- Fix undefined variable bug in transform_trainer_card
- Update tests to match new path format
- Regenerate all 382 card definitions with correct image_url fields
2026-01-27 16:16:51 -06:00

706 lines
20 KiB
Python

#!/usr/bin/env python3
"""Convert raw scraped card data to CardDefinition-compatible JSON.
This script transforms card data from data/raw/ (scraped from pokemon-zone.com)
into the CardDefinition format expected by the game engine, writing output to
data/definitions/.
Usage:
python scripts/convert_cards.py # Convert all cards
python scripts/convert_cards.py --set a1 # Convert one set only
python scripts/convert_cards.py --validate-only # Validate without writing
python scripts/convert_cards.py --verbose # Show detailed progress
The script also generates basic energy card definitions.
"""
from __future__ import annotations
import argparse
import json
import sys
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from app.core.enums import CardType, PokemonVariant
from app.core.models.card import CardDefinition
# TODO: Update CDN_BASE_URL when CDN is configured
CDN_BASE_URL = "https://mantipocket.s3.us-east-1.amazonaws.com/card-images/"
# Paths
SCRIPT_DIR = Path(__file__).parent
BACKEND_DIR = SCRIPT_DIR.parent
RAW_DATA_DIR = BACKEND_DIR / "data" / "raw"
DEFINITIONS_DIR = BACKEND_DIR / "data" / "definitions"
# Basic energy types to generate
ENERGY_TYPES = [
("colorless", "Colorless Energy"),
("darkness", "Darkness Energy"),
("dragon", "Dragon Energy"),
("fighting", "Fighting Energy"),
("fire", "Fire Energy"),
("grass", "Grass Energy"),
("lightning", "Lightning Energy"),
("metal", "Metal Energy"),
("psychic", "Psychic Energy"),
("water", "Water Energy"),
]
# Set metadata (from raw index)
SET_METADATA: dict[str, dict[str, Any]] = {}
def load_raw_index() -> dict[str, Any]:
"""Load the raw data index file.
Returns:
Index data containing set information and card listing.
"""
index_path = RAW_DATA_DIR / "_index.json"
if not index_path.exists():
raise FileNotFoundError(f"Raw index not found: {index_path}")
with open(index_path) as f:
return json.load(f)
def transform_attack(raw_attack: dict[str, Any]) -> dict[str, Any]:
"""Transform a raw attack to CardDefinition Attack format.
Args:
raw_attack: Attack data from scraped JSON.
Returns:
Attack data compatible with the Attack model.
"""
damage = raw_attack.get("damage") or 0
damage_modifier = raw_attack.get("damage_modifier")
# Build damage_display string
damage_display = None
if damage > 0:
if damage_modifier:
damage_display = f"{damage}{damage_modifier}"
else:
damage_display = str(damage)
elif damage_modifier:
# Edge case: modifier but no base damage (e.g., "x" for coin flip damage)
damage_display = f"0{damage_modifier}"
# Build effect_params
effect_params: dict[str, Any] = {}
if damage_modifier:
effect_params["damage_modifier"] = damage_modifier
attack = {
"name": raw_attack["name"],
"cost": raw_attack.get("cost", []),
"damage": damage,
}
if damage_display:
attack["damage_display"] = damage_display
if raw_attack.get("effect_text"):
attack["effect_description"] = raw_attack["effect_text"]
if raw_attack.get("effect_id"):
attack["effect_id"] = raw_attack["effect_id"]
if effect_params:
attack["effect_params"] = effect_params
return attack
def transform_weakness_resistance(raw_wr: dict[str, Any] | None) -> dict[str, Any] | None:
"""Transform weakness/resistance to CardDefinition format.
Args:
raw_wr: Weakness or resistance data with 'type' and 'value' keys.
Returns:
WeaknessResistance-compatible dict or None.
"""
if raw_wr is None:
return None
return {
"energy_type": raw_wr["type"],
"value": raw_wr.get("value"),
}
def transform_ability(raw_ability: dict[str, Any]) -> dict[str, Any]:
"""Transform a raw ability to CardDefinition Ability format.
Args:
raw_ability: Ability data from scraped JSON.
Returns:
Ability data compatible with the Ability model.
"""
ability = {
"name": raw_ability["name"],
"effect_id": raw_ability.get("effect_id") or "unimplemented",
}
if raw_ability.get("effect_text"):
ability["effect_description"] = raw_ability["effect_text"]
if raw_ability.get("effect_params"):
ability["effect_params"] = raw_ability["effect_params"]
return ability
def determine_variant(raw_card: dict[str, Any]) -> str:
"""Determine the Pokemon variant from raw card data.
Args:
raw_card: Raw card data.
Returns:
Variant string (e.g., "normal", "ex").
"""
if raw_card.get("is_ex"):
return PokemonVariant.EX.value
# Could add more variant detection here (V, VMAX, etc.)
# based on card name patterns if needed
return PokemonVariant.NORMAL.value
def get_image_url(image_path: str | None) -> str | None:
"""Generate CDN URL from image path.
Args:
image_path: Local image path (e.g., "pokemon/a1/001-bulbasaur.webp").
Returns:
Full CDN URL or None if no path.
"""
if not image_path:
return None
return f"{CDN_BASE_URL}{image_path}"
def transform_pokemon_card(raw_card: dict[str, Any]) -> dict[str, Any]:
"""Transform a raw Pokemon card to CardDefinition format.
Args:
raw_card: Raw card data from scraped JSON.
Returns:
CardDefinition-compatible dict for a Pokemon card.
"""
set_code = raw_card.get("set_code", "")
# Build image path from card ID
# Card ID format: "a1-033-charmander" -> path: "a1/033-charmander.webp"
card_id = raw_card["id"]
# Split "a1-033-charmander" into ["a1", "033-charmander"]
parts = card_id.split("-", 1)
if len(parts) == 2:
image_path = f"{parts[0]}/{parts[1]}.webp"
else:
image_path = None
card: dict[str, Any] = {
"id": raw_card["id"],
"name": raw_card["name"],
"card_type": CardType.POKEMON.value,
"hp": raw_card["hp"],
"pokemon_type": raw_card["pokemon_type"],
"stage": raw_card["stage"],
"variant": determine_variant(raw_card),
"retreat_cost": raw_card.get("retreat_cost", 0),
"set_id": set_code,
"rarity": (raw_card.get("rarity") or "common").lower(),
}
# Optional fields
if raw_card.get("evolves_from"):
card["evolves_from"] = raw_card["evolves_from"]
# Transform attacks
if raw_card.get("attacks"):
card["attacks"] = [transform_attack(a) for a in raw_card["attacks"]]
# Transform abilities
if raw_card.get("abilities"):
card["abilities"] = [transform_ability(a) for a in raw_card["abilities"]]
# Transform weakness/resistance
if raw_card.get("weakness"):
card["weakness"] = transform_weakness_resistance(raw_card["weakness"])
if raw_card.get("resistance"):
card["resistance"] = transform_weakness_resistance(raw_card["resistance"])
# Metadata
if raw_card.get("illustrator"):
card["illustrator"] = raw_card["illustrator"]
if raw_card.get("flavor_text"):
card["flavor_text"] = raw_card["flavor_text"]
if image_path:
card["image_path"] = image_path
card["image_url"] = get_image_url(image_path)
return card
def transform_trainer_card(raw_card: dict[str, Any]) -> dict[str, Any]:
"""Transform a raw Trainer card to CardDefinition format.
Args:
raw_card: Raw card data from scraped JSON.
Returns:
CardDefinition-compatible dict for a Trainer card.
"""
set_code = raw_card.get("set_code", "")
# Build image path from card ID
# Card ID format: "a1-219-erika" -> path: "a1/219-erika.webp"
card_id = raw_card["id"]
parts = card_id.split("-", 1)
if len(parts) == 2:
image_path = f"{parts[0]}/{parts[1]}.webp"
else:
image_path = None
card: dict[str, Any] = {
"id": raw_card["id"],
"name": raw_card["name"],
"card_type": CardType.TRAINER.value,
"trainer_type": raw_card.get("trainer_type", "item"),
"set_id": set_code,
"rarity": (raw_card.get("rarity") or "common").lower(),
}
# Effect description
if raw_card.get("effect_text"):
card["effect_description"] = raw_card["effect_text"]
if raw_card.get("effect_id"):
card["effect_id"] = raw_card["effect_id"]
# Metadata
if raw_card.get("illustrator"):
card["illustrator"] = raw_card["illustrator"]
if image_path:
card["image_path"] = image_path
card["image_url"] = get_image_url(image_path)
return card
def transform_card(raw_card: dict[str, Any]) -> dict[str, Any]:
"""Transform a raw card to CardDefinition format.
Dispatches to the appropriate transformer based on card_type.
Args:
raw_card: Raw card data from scraped JSON.
Returns:
CardDefinition-compatible dict.
Raises:
ValueError: If card type is unsupported.
"""
card_type = raw_card.get("card_type", "").lower()
if card_type == "pokemon":
return transform_pokemon_card(raw_card)
elif card_type == "trainer":
return transform_trainer_card(raw_card)
else:
raise ValueError(f"Unsupported card type: {card_type}")
def validate_card(card_dict: dict[str, Any]) -> CardDefinition:
"""Validate a card dict against the CardDefinition model.
Args:
card_dict: Card data to validate.
Returns:
Validated CardDefinition.
Raises:
ValueError: If validation fails.
"""
return CardDefinition.model_validate(card_dict)
def generate_energy_cards() -> list[dict[str, Any]]:
"""Generate basic energy card definitions.
Returns:
List of CardDefinition-compatible dicts for basic energy.
"""
cards = []
for energy_type, display_name in ENERGY_TYPES:
card_id = f"energy-basic-{energy_type}"
image_path = f"basic/{energy_type}.webp"
card = {
"id": card_id,
"name": display_name,
"card_type": CardType.ENERGY.value,
"energy_type": energy_type,
"energy_provides": [energy_type],
"rarity": "common",
"set_id": "basic",
"image_path": image_path,
"image_url": get_image_url(image_path),
}
cards.append(card)
return cards
def write_card_file(card: dict[str, Any], output_path: Path) -> None:
"""Write a card definition to a JSON file.
Args:
card: Card definition dict.
output_path: Path to write the JSON file.
"""
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w", encoding="utf-8") as f:
json.dump(card, f, indent=2, ensure_ascii=False)
def validate_evolution_chains(
all_cards: list[dict[str, Any]],
) -> list[dict[str, Any]]:
"""Validate that all evolves_from references point to existing cards.
For Pokemon that evolve from other Pokemon, checks that the base form exists.
For Pokemon that evolve from items (fossils), checks that the item exists.
Args:
all_cards: List of all converted card definitions.
Returns:
List of validation warnings (not errors, since missing cards may be intentional).
"""
warnings = []
# Build lookup sets
pokemon_names = {card["name"] for card in all_cards if card.get("card_type") == "pokemon"}
trainer_names = {card["name"] for card in all_cards if card.get("card_type") == "trainer"}
all_card_names = pokemon_names | trainer_names
# Check each Pokemon with evolves_from
for card in all_cards:
if card.get("card_type") != "pokemon":
continue
evolves_from = card.get("evolves_from")
if not evolves_from:
continue
# Check if the evolution source exists
if evolves_from not in all_card_names:
warnings.append(
{
"card_id": card["id"],
"card_name": card["name"],
"issue": "broken_evolution_chain",
"evolves_from": evolves_from,
"message": f"'{card['name']}' evolves from '{evolves_from}' which was not found",
}
)
return warnings
def build_index(
all_cards: list[dict[str, Any]],
set_metadata: dict[str, dict[str, Any]],
conversion_errors: list[dict[str, Any]] | None = None,
validation_warnings: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
"""Build the master index file.
Args:
all_cards: List of all card definitions.
set_metadata: Metadata about each set.
Returns:
Index data structure.
"""
# Count cards by set and type
sets: dict[str, dict[str, Any]] = {}
for set_code, meta in set_metadata.items():
sets[set_code] = {
"name": meta.get("name", set_code),
"card_count": 0,
"pokemon_count": 0,
"trainer_count": 0,
}
# Add basic energy "set"
sets["basic"] = {
"name": "Basic Energy",
"card_count": 0,
"pokemon_count": 0,
"trainer_count": 0,
"energy_count": 0,
}
# Count cards
for card in all_cards:
set_id = card.get("set_id", "")
if set_id not in sets:
sets[set_id] = {
"name": set_id,
"card_count": 0,
"pokemon_count": 0,
"trainer_count": 0,
}
sets[set_id]["card_count"] += 1
card_type = card.get("card_type", "")
if card_type == "pokemon":
sets[set_id]["pokemon_count"] = sets[set_id].get("pokemon_count", 0) + 1
elif card_type == "trainer":
sets[set_id]["trainer_count"] = sets[set_id].get("trainer_count", 0) + 1
elif card_type == "energy":
sets[set_id]["energy_count"] = sets[set_id].get("energy_count", 0) + 1
# Build card listing
cards = [
{
"id": card["id"],
"name": card["name"],
"card_type": card["card_type"],
"set_id": card.get("set_id", ""),
}
for card in all_cards
]
index: dict[str, Any] = {
"generated_at": datetime.now(UTC).isoformat(),
"schema_version": "1.0",
"total_cards": len(all_cards),
"sets": sets,
"cards": cards,
}
# Add conversion errors if any
if conversion_errors:
index["conversion_errors"] = conversion_errors
# Add validation warnings if any
if validation_warnings:
index["validation_warnings"] = validation_warnings
return index
def convert_set(
set_code: str,
validate_only: bool = False,
verbose: bool = False,
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
"""Convert all cards from a single set.
Args:
set_code: Set code (e.g., "a1").
validate_only: If True, validate but don't write files.
verbose: If True, print detailed progress.
Returns:
Tuple of (converted card definitions, conversion errors).
"""
raw_set_dir = RAW_DATA_DIR / set_code
if not raw_set_dir.exists():
print(f"Warning: Raw data directory not found: {raw_set_dir}")
return [], []
converted = []
errors = []
for card_file in sorted(raw_set_dir.glob("*.json")):
try:
with open(card_file) as f:
raw_card = json.load(f)
# Transform the card
card_dict = transform_card(raw_card)
# Validate against model
validate_card(card_dict)
if verbose:
print(f" Converted: {card_dict['id']} ({card_dict['name']})")
if not validate_only:
# Determine output path based on card type
card_type = card_dict["card_type"]
output_path = DEFINITIONS_DIR / card_type / set_code / f"{card_file.stem}.json"
write_card_file(card_dict, output_path)
converted.append(card_dict)
except Exception as e:
errors.append(
{
"file": card_file.name,
"set_code": set_code,
"card_id": raw_card.get("id", "unknown"),
"card_name": raw_card.get("name", "unknown"),
"error": str(e),
}
)
print(f" ERROR: {card_file.name}: {e}")
if errors:
print(f" {len(errors)} errors in set {set_code}")
return converted, errors
def main() -> int:
"""Main entry point for the converter script.
Returns:
Exit code (0 for success, 1 for errors).
"""
parser = argparse.ArgumentParser(
description="Convert raw scraped card data to CardDefinition format."
)
parser.add_argument(
"--set",
type=str,
help="Convert only a specific set (e.g., 'a1')",
)
parser.add_argument(
"--validate-only",
action="store_true",
help="Validate cards without writing files",
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Show detailed progress",
)
args = parser.parse_args()
print("Card Definition Converter")
print("=" * 40)
# Load raw index
try:
raw_index = load_raw_index()
except FileNotFoundError as e:
print(f"Error: {e}")
return 1
# Get set metadata
global SET_METADATA
SET_METADATA = raw_index.get("sets", {})
# Determine which sets to convert
if args.set:
sets_to_convert = [args.set]
else:
sets_to_convert = list(SET_METADATA.keys())
all_cards: list[dict[str, Any]] = []
all_errors: list[dict[str, Any]] = []
# Convert each set
for set_code in sets_to_convert:
set_name = SET_METADATA.get(set_code, {}).get("name", set_code)
print(f"\nConverting set: {set_code} ({set_name})")
cards, errors = convert_set(
set_code,
validate_only=args.validate_only,
verbose=args.verbose,
)
all_cards.extend(cards)
all_errors.extend(errors)
print(f" Converted {len(cards)} cards")
# Generate energy cards
print("\nGenerating basic energy cards...")
energy_cards = generate_energy_cards()
for card in energy_cards:
# Validate
validate_card(card)
if args.verbose:
print(f" Generated: {card['id']} ({card['name']})")
if not args.validate_only:
output_path = DEFINITIONS_DIR / "energy" / "basic" / f"{card['id']}.json"
write_card_file(card, output_path)
all_cards.extend(energy_cards)
print(f" Generated {len(energy_cards)} energy cards")
# Validate evolution chains
print("\nValidating evolution chains...")
validation_warnings = validate_evolution_chains(all_cards)
if validation_warnings:
print(f" Found {len(validation_warnings)} broken evolution chains:")
for warning in validation_warnings:
print(f" - {warning['message']}")
else:
print(" All evolution chains valid")
# Write index file
if not args.validate_only:
print("\nWriting index file...")
index = build_index(
all_cards,
SET_METADATA,
conversion_errors=all_errors if all_errors else None,
validation_warnings=validation_warnings if validation_warnings else None,
)
index_path = DEFINITIONS_DIR / "_index.json"
with open(index_path, "w", encoding="utf-8") as f:
json.dump(index, f, indent=2, ensure_ascii=False)
print(f" Wrote {index_path}")
# Summary
print("\n" + "=" * 40)
print(f"Total cards converted: {len(all_cards)}")
if all_errors:
print(f"Conversion errors: {len(all_errors)}")
if validation_warnings:
print(f"Validation warnings: {len(validation_warnings)}")
if args.validate_only:
print("(Validation only - no files written)")
return 0
if __name__ == "__main__":
sys.exit(main())