#!/usr/bin/env python3 """Convert raw scraped card data to CardDefinition-compatible JSON. This script transforms card data from data/raw/ (scraped from pokemon-zone.com) into the CardDefinition format expected by the game engine, writing output to data/definitions/. Usage: python scripts/convert_cards.py # Convert all cards python scripts/convert_cards.py --set a1 # Convert one set only python scripts/convert_cards.py --validate-only # Validate without writing python scripts/convert_cards.py --verbose # Show detailed progress The script also generates basic energy card definitions. """ from __future__ import annotations import argparse import json import sys from datetime import UTC, datetime from pathlib import Path from typing import Any # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) from app.core.enums import CardType, PokemonVariant from app.core.models.card import CardDefinition # TODO: Update CDN_BASE_URL when CDN is configured CDN_BASE_URL = "https://mantipocket.s3.us-east-1.amazonaws.com/card-images/" # Paths SCRIPT_DIR = Path(__file__).parent BACKEND_DIR = SCRIPT_DIR.parent RAW_DATA_DIR = BACKEND_DIR / "data" / "raw" DEFINITIONS_DIR = BACKEND_DIR / "data" / "definitions" # Basic energy types to generate ENERGY_TYPES = [ ("colorless", "Colorless Energy"), ("darkness", "Darkness Energy"), ("dragon", "Dragon Energy"), ("fighting", "Fighting Energy"), ("fire", "Fire Energy"), ("grass", "Grass Energy"), ("lightning", "Lightning Energy"), ("metal", "Metal Energy"), ("psychic", "Psychic Energy"), ("water", "Water Energy"), ] # Set metadata (from raw index) SET_METADATA: dict[str, dict[str, Any]] = {} def load_raw_index() -> dict[str, Any]: """Load the raw data index file. Returns: Index data containing set information and card listing. """ index_path = RAW_DATA_DIR / "_index.json" if not index_path.exists(): raise FileNotFoundError(f"Raw index not found: {index_path}") with open(index_path) as f: return json.load(f) def transform_attack(raw_attack: dict[str, Any]) -> dict[str, Any]: """Transform a raw attack to CardDefinition Attack format. Args: raw_attack: Attack data from scraped JSON. Returns: Attack data compatible with the Attack model. """ damage = raw_attack.get("damage") or 0 damage_modifier = raw_attack.get("damage_modifier") # Build damage_display string damage_display = None if damage > 0: if damage_modifier: damage_display = f"{damage}{damage_modifier}" else: damage_display = str(damage) elif damage_modifier: # Edge case: modifier but no base damage (e.g., "x" for coin flip damage) damage_display = f"0{damage_modifier}" # Build effect_params effect_params: dict[str, Any] = {} if damage_modifier: effect_params["damage_modifier"] = damage_modifier attack = { "name": raw_attack["name"], "cost": raw_attack.get("cost", []), "damage": damage, } if damage_display: attack["damage_display"] = damage_display if raw_attack.get("effect_text"): attack["effect_description"] = raw_attack["effect_text"] if raw_attack.get("effect_id"): attack["effect_id"] = raw_attack["effect_id"] if effect_params: attack["effect_params"] = effect_params return attack def transform_weakness_resistance(raw_wr: dict[str, Any] | None) -> dict[str, Any] | None: """Transform weakness/resistance to CardDefinition format. Args: raw_wr: Weakness or resistance data with 'type' and 'value' keys. Returns: WeaknessResistance-compatible dict or None. """ if raw_wr is None: return None return { "energy_type": raw_wr["type"], "value": raw_wr.get("value"), } def transform_ability(raw_ability: dict[str, Any]) -> dict[str, Any]: """Transform a raw ability to CardDefinition Ability format. Args: raw_ability: Ability data from scraped JSON. Returns: Ability data compatible with the Ability model. """ ability = { "name": raw_ability["name"], "effect_id": raw_ability.get("effect_id") or "unimplemented", } if raw_ability.get("effect_text"): ability["effect_description"] = raw_ability["effect_text"] if raw_ability.get("effect_params"): ability["effect_params"] = raw_ability["effect_params"] return ability def determine_variant(raw_card: dict[str, Any]) -> str: """Determine the Pokemon variant from raw card data. Args: raw_card: Raw card data. Returns: Variant string (e.g., "normal", "ex"). """ if raw_card.get("is_ex"): return PokemonVariant.EX.value # Could add more variant detection here (V, VMAX, etc.) # based on card name patterns if needed return PokemonVariant.NORMAL.value def get_image_url(image_path: str | None) -> str | None: """Generate CDN URL from image path. Args: image_path: Local image path (e.g., "pokemon/a1/001-bulbasaur.webp"). Returns: Full CDN URL or None if no path. """ if not image_path: return None return f"{CDN_BASE_URL}{image_path}" def transform_pokemon_card(raw_card: dict[str, Any]) -> dict[str, Any]: """Transform a raw Pokemon card to CardDefinition format. Args: raw_card: Raw card data from scraped JSON. Returns: CardDefinition-compatible dict for a Pokemon card. """ set_code = raw_card.get("set_code", "") # Build image path from card ID # Card ID format: "a1-033-charmander" -> path: "a1/033-charmander.webp" card_id = raw_card["id"] # Split "a1-033-charmander" into ["a1", "033-charmander"] parts = card_id.split("-", 1) if len(parts) == 2: image_path = f"{parts[0]}/{parts[1]}.webp" else: image_path = None card: dict[str, Any] = { "id": raw_card["id"], "name": raw_card["name"], "card_type": CardType.POKEMON.value, "hp": raw_card["hp"], "pokemon_type": raw_card["pokemon_type"], "stage": raw_card["stage"], "variant": determine_variant(raw_card), "retreat_cost": raw_card.get("retreat_cost", 0), "set_id": set_code, "rarity": (raw_card.get("rarity") or "common").lower(), } # Optional fields if raw_card.get("evolves_from"): card["evolves_from"] = raw_card["evolves_from"] # Transform attacks if raw_card.get("attacks"): card["attacks"] = [transform_attack(a) for a in raw_card["attacks"]] # Transform abilities if raw_card.get("abilities"): card["abilities"] = [transform_ability(a) for a in raw_card["abilities"]] # Transform weakness/resistance if raw_card.get("weakness"): card["weakness"] = transform_weakness_resistance(raw_card["weakness"]) if raw_card.get("resistance"): card["resistance"] = transform_weakness_resistance(raw_card["resistance"]) # Metadata if raw_card.get("illustrator"): card["illustrator"] = raw_card["illustrator"] if raw_card.get("flavor_text"): card["flavor_text"] = raw_card["flavor_text"] if image_path: card["image_path"] = image_path card["image_url"] = get_image_url(image_path) return card def transform_trainer_card(raw_card: dict[str, Any]) -> dict[str, Any]: """Transform a raw Trainer card to CardDefinition format. Args: raw_card: Raw card data from scraped JSON. Returns: CardDefinition-compatible dict for a Trainer card. """ set_code = raw_card.get("set_code", "") # Build image path from card ID # Card ID format: "a1-219-erika" -> path: "a1/219-erika.webp" card_id = raw_card["id"] parts = card_id.split("-", 1) if len(parts) == 2: image_path = f"{parts[0]}/{parts[1]}.webp" else: image_path = None card: dict[str, Any] = { "id": raw_card["id"], "name": raw_card["name"], "card_type": CardType.TRAINER.value, "trainer_type": raw_card.get("trainer_type", "item"), "set_id": set_code, "rarity": (raw_card.get("rarity") or "common").lower(), } # Effect description if raw_card.get("effect_text"): card["effect_description"] = raw_card["effect_text"] if raw_card.get("effect_id"): card["effect_id"] = raw_card["effect_id"] # Metadata if raw_card.get("illustrator"): card["illustrator"] = raw_card["illustrator"] if image_path: card["image_path"] = image_path card["image_url"] = get_image_url(image_path) return card def transform_card(raw_card: dict[str, Any]) -> dict[str, Any]: """Transform a raw card to CardDefinition format. Dispatches to the appropriate transformer based on card_type. Args: raw_card: Raw card data from scraped JSON. Returns: CardDefinition-compatible dict. Raises: ValueError: If card type is unsupported. """ card_type = raw_card.get("card_type", "").lower() if card_type == "pokemon": return transform_pokemon_card(raw_card) elif card_type == "trainer": return transform_trainer_card(raw_card) else: raise ValueError(f"Unsupported card type: {card_type}") def validate_card(card_dict: dict[str, Any]) -> CardDefinition: """Validate a card dict against the CardDefinition model. Args: card_dict: Card data to validate. Returns: Validated CardDefinition. Raises: ValueError: If validation fails. """ return CardDefinition.model_validate(card_dict) def generate_energy_cards() -> list[dict[str, Any]]: """Generate basic energy card definitions. Returns: List of CardDefinition-compatible dicts for basic energy. """ cards = [] for energy_type, display_name in ENERGY_TYPES: card_id = f"energy-basic-{energy_type}" image_path = f"basic/{energy_type}.webp" card = { "id": card_id, "name": display_name, "card_type": CardType.ENERGY.value, "energy_type": energy_type, "energy_provides": [energy_type], "rarity": "common", "set_id": "basic", "image_path": image_path, "image_url": get_image_url(image_path), } cards.append(card) return cards def write_card_file(card: dict[str, Any], output_path: Path) -> None: """Write a card definition to a JSON file. Args: card: Card definition dict. output_path: Path to write the JSON file. """ output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w", encoding="utf-8") as f: json.dump(card, f, indent=2, ensure_ascii=False) def validate_evolution_chains( all_cards: list[dict[str, Any]], ) -> list[dict[str, Any]]: """Validate that all evolves_from references point to existing cards. For Pokemon that evolve from other Pokemon, checks that the base form exists. For Pokemon that evolve from items (fossils), checks that the item exists. Args: all_cards: List of all converted card definitions. Returns: List of validation warnings (not errors, since missing cards may be intentional). """ warnings = [] # Build lookup sets pokemon_names = {card["name"] for card in all_cards if card.get("card_type") == "pokemon"} trainer_names = {card["name"] for card in all_cards if card.get("card_type") == "trainer"} all_card_names = pokemon_names | trainer_names # Check each Pokemon with evolves_from for card in all_cards: if card.get("card_type") != "pokemon": continue evolves_from = card.get("evolves_from") if not evolves_from: continue # Check if the evolution source exists if evolves_from not in all_card_names: warnings.append( { "card_id": card["id"], "card_name": card["name"], "issue": "broken_evolution_chain", "evolves_from": evolves_from, "message": f"'{card['name']}' evolves from '{evolves_from}' which was not found", } ) return warnings def build_index( all_cards: list[dict[str, Any]], set_metadata: dict[str, dict[str, Any]], conversion_errors: list[dict[str, Any]] | None = None, validation_warnings: list[dict[str, Any]] | None = None, ) -> dict[str, Any]: """Build the master index file. Args: all_cards: List of all card definitions. set_metadata: Metadata about each set. Returns: Index data structure. """ # Count cards by set and type sets: dict[str, dict[str, Any]] = {} for set_code, meta in set_metadata.items(): sets[set_code] = { "name": meta.get("name", set_code), "card_count": 0, "pokemon_count": 0, "trainer_count": 0, } # Add basic energy "set" sets["basic"] = { "name": "Basic Energy", "card_count": 0, "pokemon_count": 0, "trainer_count": 0, "energy_count": 0, } # Count cards for card in all_cards: set_id = card.get("set_id", "") if set_id not in sets: sets[set_id] = { "name": set_id, "card_count": 0, "pokemon_count": 0, "trainer_count": 0, } sets[set_id]["card_count"] += 1 card_type = card.get("card_type", "") if card_type == "pokemon": sets[set_id]["pokemon_count"] = sets[set_id].get("pokemon_count", 0) + 1 elif card_type == "trainer": sets[set_id]["trainer_count"] = sets[set_id].get("trainer_count", 0) + 1 elif card_type == "energy": sets[set_id]["energy_count"] = sets[set_id].get("energy_count", 0) + 1 # Build card listing cards = [ { "id": card["id"], "name": card["name"], "card_type": card["card_type"], "set_id": card.get("set_id", ""), } for card in all_cards ] index: dict[str, Any] = { "generated_at": datetime.now(UTC).isoformat(), "schema_version": "1.0", "total_cards": len(all_cards), "sets": sets, "cards": cards, } # Add conversion errors if any if conversion_errors: index["conversion_errors"] = conversion_errors # Add validation warnings if any if validation_warnings: index["validation_warnings"] = validation_warnings return index def convert_set( set_code: str, validate_only: bool = False, verbose: bool = False, ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: """Convert all cards from a single set. Args: set_code: Set code (e.g., "a1"). validate_only: If True, validate but don't write files. verbose: If True, print detailed progress. Returns: Tuple of (converted card definitions, conversion errors). """ raw_set_dir = RAW_DATA_DIR / set_code if not raw_set_dir.exists(): print(f"Warning: Raw data directory not found: {raw_set_dir}") return [], [] converted = [] errors = [] for card_file in sorted(raw_set_dir.glob("*.json")): try: with open(card_file) as f: raw_card = json.load(f) # Transform the card card_dict = transform_card(raw_card) # Validate against model validate_card(card_dict) if verbose: print(f" Converted: {card_dict['id']} ({card_dict['name']})") if not validate_only: # Determine output path based on card type card_type = card_dict["card_type"] output_path = DEFINITIONS_DIR / card_type / set_code / f"{card_file.stem}.json" write_card_file(card_dict, output_path) converted.append(card_dict) except Exception as e: errors.append( { "file": card_file.name, "set_code": set_code, "card_id": raw_card.get("id", "unknown"), "card_name": raw_card.get("name", "unknown"), "error": str(e), } ) print(f" ERROR: {card_file.name}: {e}") if errors: print(f" {len(errors)} errors in set {set_code}") return converted, errors def main() -> int: """Main entry point for the converter script. Returns: Exit code (0 for success, 1 for errors). """ parser = argparse.ArgumentParser( description="Convert raw scraped card data to CardDefinition format." ) parser.add_argument( "--set", type=str, help="Convert only a specific set (e.g., 'a1')", ) parser.add_argument( "--validate-only", action="store_true", help="Validate cards without writing files", ) parser.add_argument( "--verbose", "-v", action="store_true", help="Show detailed progress", ) args = parser.parse_args() print("Card Definition Converter") print("=" * 40) # Load raw index try: raw_index = load_raw_index() except FileNotFoundError as e: print(f"Error: {e}") return 1 # Get set metadata global SET_METADATA SET_METADATA = raw_index.get("sets", {}) # Determine which sets to convert if args.set: sets_to_convert = [args.set] else: sets_to_convert = list(SET_METADATA.keys()) all_cards: list[dict[str, Any]] = [] all_errors: list[dict[str, Any]] = [] # Convert each set for set_code in sets_to_convert: set_name = SET_METADATA.get(set_code, {}).get("name", set_code) print(f"\nConverting set: {set_code} ({set_name})") cards, errors = convert_set( set_code, validate_only=args.validate_only, verbose=args.verbose, ) all_cards.extend(cards) all_errors.extend(errors) print(f" Converted {len(cards)} cards") # Generate energy cards print("\nGenerating basic energy cards...") energy_cards = generate_energy_cards() for card in energy_cards: # Validate validate_card(card) if args.verbose: print(f" Generated: {card['id']} ({card['name']})") if not args.validate_only: output_path = DEFINITIONS_DIR / "energy" / "basic" / f"{card['id']}.json" write_card_file(card, output_path) all_cards.extend(energy_cards) print(f" Generated {len(energy_cards)} energy cards") # Validate evolution chains print("\nValidating evolution chains...") validation_warnings = validate_evolution_chains(all_cards) if validation_warnings: print(f" Found {len(validation_warnings)} broken evolution chains:") for warning in validation_warnings: print(f" - {warning['message']}") else: print(" All evolution chains valid") # Write index file if not args.validate_only: print("\nWriting index file...") index = build_index( all_cards, SET_METADATA, conversion_errors=all_errors if all_errors else None, validation_warnings=validation_warnings if validation_warnings else None, ) index_path = DEFINITIONS_DIR / "_index.json" with open(index_path, "w", encoding="utf-8") as f: json.dump(index, f, indent=2, ensure_ascii=False) print(f" Wrote {index_path}") # Summary print("\n" + "=" * 40) print(f"Total cards converted: {len(all_cards)}") if all_errors: print(f"Conversion errors: {len(all_errors)}") if validation_warnings: print(f"Validation warnings: {len(validation_warnings)}") if args.validate_only: print("(Validation only - no files written)") return 0 if __name__ == "__main__": sys.exit(main())