#!/usr/bin/env python3 """Convert raw scraped card data to CardDefinition-compatible JSON. This script transforms card data from data/raw/ (scraped from pokemon-zone.com) into the CardDefinition format expected by the game engine, writing output to data/definitions/. Usage: python scripts/convert_cards.py # Convert all cards python scripts/convert_cards.py --set a1 # Convert one set only python scripts/convert_cards.py --validate-only # Validate without writing python scripts/convert_cards.py --verbose # Show detailed progress The script also generates basic energy card definitions. """ from __future__ import annotations import argparse import json import sys from datetime import UTC, datetime from pathlib import Path from typing import Any # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) from app.core.enums import CardType, PokemonVariant from app.core.models.card import CardDefinition # TODO: Update CDN_BASE_URL when CDN is configured CDN_BASE_URL = "https://cdn.mantimon.com/cards" # Paths SCRIPT_DIR = Path(__file__).parent BACKEND_DIR = SCRIPT_DIR.parent RAW_DATA_DIR = BACKEND_DIR / "data" / "raw" DEFINITIONS_DIR = BACKEND_DIR / "data" / "definitions" # Basic energy types to generate ENERGY_TYPES = [ ("colorless", "Colorless Energy"), ("darkness", "Darkness Energy"), ("dragon", "Dragon Energy"), ("fighting", "Fighting Energy"), ("fire", "Fire Energy"), ("grass", "Grass Energy"), ("lightning", "Lightning Energy"), ("metal", "Metal Energy"), ("psychic", "Psychic Energy"), ("water", "Water Energy"), ] # Set metadata (from raw index) SET_METADATA: dict[str, dict[str, Any]] = {} def load_raw_index() -> dict[str, Any]: """Load the raw data index file. Returns: Index data containing set information and card listing. """ index_path = RAW_DATA_DIR / "_index.json" if not index_path.exists(): raise FileNotFoundError(f"Raw index not found: {index_path}") with open(index_path) as f: return json.load(f) def transform_attack(raw_attack: dict[str, Any]) -> dict[str, Any]: """Transform a raw attack to CardDefinition Attack format. Args: raw_attack: Attack data from scraped JSON. Returns: Attack data compatible with the Attack model. """ damage = raw_attack.get("damage") or 0 damage_modifier = raw_attack.get("damage_modifier") # Build damage_display string damage_display = None if damage > 0: if damage_modifier: damage_display = f"{damage}{damage_modifier}" else: damage_display = str(damage) elif damage_modifier: # Edge case: modifier but no base damage (e.g., "x" for coin flip damage) damage_display = f"0{damage_modifier}" # Build effect_params effect_params: dict[str, Any] = {} if damage_modifier: effect_params["damage_modifier"] = damage_modifier attack = { "name": raw_attack["name"], "cost": raw_attack.get("cost", []), "damage": damage, } if damage_display: attack["damage_display"] = damage_display if raw_attack.get("effect_text"): attack["effect_description"] = raw_attack["effect_text"] if raw_attack.get("effect_id"): attack["effect_id"] = raw_attack["effect_id"] if effect_params: attack["effect_params"] = effect_params return attack def transform_weakness_resistance(raw_wr: dict[str, Any] | None) -> dict[str, Any] | None: """Transform weakness/resistance to CardDefinition format. Args: raw_wr: Weakness or resistance data with 'type' and 'value' keys. Returns: WeaknessResistance-compatible dict or None. """ if raw_wr is None: return None return { "energy_type": raw_wr["type"], "value": raw_wr.get("value"), } def transform_ability(raw_ability: dict[str, Any]) -> dict[str, Any]: """Transform a raw ability to CardDefinition Ability format. Args: raw_ability: Ability data from scraped JSON. Returns: Ability data compatible with the Ability model. """ ability = { "name": raw_ability["name"], "effect_id": raw_ability.get("effect_id") or "unimplemented", } if raw_ability.get("effect_text"): ability["effect_description"] = raw_ability["effect_text"] if raw_ability.get("effect_params"): ability["effect_params"] = raw_ability["effect_params"] return ability def determine_variant(raw_card: dict[str, Any]) -> str: """Determine the Pokemon variant from raw card data. Args: raw_card: Raw card data. Returns: Variant string (e.g., "normal", "ex"). """ if raw_card.get("is_ex"): return PokemonVariant.EX.value # Could add more variant detection here (V, VMAX, etc.) # based on card name patterns if needed return PokemonVariant.NORMAL.value def get_image_url(image_path: str | None) -> str | None: """Generate CDN URL from image path. Args: image_path: Local image path (e.g., "pokemon/a1/001-bulbasaur.webp"). Returns: Full CDN URL or None if no path. """ if not image_path: return None return f"{CDN_BASE_URL}/{image_path}" def transform_pokemon_card(raw_card: dict[str, Any]) -> dict[str, Any]: """Transform a raw Pokemon card to CardDefinition format. Args: raw_card: Raw card data from scraped JSON. Returns: CardDefinition-compatible dict for a Pokemon card. """ set_code = raw_card.get("set_code", "") # Build image path: pokemon/{set_code}/{filename}.webp raw_image_file = raw_card.get("image_file") if raw_image_file: # raw_image_file is like "a1/001-bulbasaur.webp" # We want: "pokemon/a1/001-bulbasaur.webp" image_path = f"pokemon/{raw_image_file}" else: image_path = None card: dict[str, Any] = { "id": raw_card["id"], "name": raw_card["name"], "card_type": CardType.POKEMON.value, "hp": raw_card["hp"], "pokemon_type": raw_card["pokemon_type"], "stage": raw_card["stage"], "variant": determine_variant(raw_card), "retreat_cost": raw_card.get("retreat_cost", 0), "set_id": set_code, "rarity": (raw_card.get("rarity") or "common").lower(), } # Optional fields if raw_card.get("evolves_from"): card["evolves_from"] = raw_card["evolves_from"] # Transform attacks if raw_card.get("attacks"): card["attacks"] = [transform_attack(a) for a in raw_card["attacks"]] # Transform abilities if raw_card.get("abilities"): card["abilities"] = [transform_ability(a) for a in raw_card["abilities"]] # Transform weakness/resistance if raw_card.get("weakness"): card["weakness"] = transform_weakness_resistance(raw_card["weakness"]) if raw_card.get("resistance"): card["resistance"] = transform_weakness_resistance(raw_card["resistance"]) # Metadata if raw_card.get("illustrator"): card["illustrator"] = raw_card["illustrator"] if raw_card.get("flavor_text"): card["flavor_text"] = raw_card["flavor_text"] if image_path: card["image_path"] = image_path card["image_url"] = get_image_url(image_path) return card def transform_trainer_card(raw_card: dict[str, Any]) -> dict[str, Any]: """Transform a raw Trainer card to CardDefinition format. Args: raw_card: Raw card data from scraped JSON. Returns: CardDefinition-compatible dict for a Trainer card. """ set_code = raw_card.get("set_code", "") # Build image path raw_image_file = raw_card.get("image_file") if raw_image_file: image_path = f"trainer/{raw_image_file}" else: image_path = None card: dict[str, Any] = { "id": raw_card["id"], "name": raw_card["name"], "card_type": CardType.TRAINER.value, "trainer_type": raw_card.get("trainer_type", "item"), "set_id": set_code, "rarity": (raw_card.get("rarity") or "common").lower(), } # Effect description if raw_card.get("effect_text"): card["effect_description"] = raw_card["effect_text"] if raw_card.get("effect_id"): card["effect_id"] = raw_card["effect_id"] # Metadata if raw_card.get("illustrator"): card["illustrator"] = raw_card["illustrator"] if image_path: card["image_path"] = image_path card["image_url"] = get_image_url(image_path) return card def transform_card(raw_card: dict[str, Any]) -> dict[str, Any]: """Transform a raw card to CardDefinition format. Dispatches to the appropriate transformer based on card_type. Args: raw_card: Raw card data from scraped JSON. Returns: CardDefinition-compatible dict. Raises: ValueError: If card type is unsupported. """ card_type = raw_card.get("card_type", "").lower() if card_type == "pokemon": return transform_pokemon_card(raw_card) elif card_type == "trainer": return transform_trainer_card(raw_card) else: raise ValueError(f"Unsupported card type: {card_type}") def validate_card(card_dict: dict[str, Any]) -> CardDefinition: """Validate a card dict against the CardDefinition model. Args: card_dict: Card data to validate. Returns: Validated CardDefinition. Raises: ValueError: If validation fails. """ return CardDefinition.model_validate(card_dict) def generate_energy_cards() -> list[dict[str, Any]]: """Generate basic energy card definitions. Returns: List of CardDefinition-compatible dicts for basic energy. """ cards = [] for energy_type, display_name in ENERGY_TYPES: card_id = f"energy-basic-{energy_type}" image_path = f"energy/basic/{energy_type}.webp" card = { "id": card_id, "name": display_name, "card_type": CardType.ENERGY.value, "energy_type": energy_type, "energy_provides": [energy_type], "rarity": "common", "set_id": "basic", "image_path": image_path, "image_url": get_image_url(image_path), } cards.append(card) return cards def write_card_file(card: dict[str, Any], output_path: Path) -> None: """Write a card definition to a JSON file. Args: card: Card definition dict. output_path: Path to write the JSON file. """ output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w") as f: json.dump(card, f, indent=2) def build_index( all_cards: list[dict[str, Any]], set_metadata: dict[str, dict[str, Any]], ) -> dict[str, Any]: """Build the master index file. Args: all_cards: List of all card definitions. set_metadata: Metadata about each set. Returns: Index data structure. """ # Count cards by set and type sets: dict[str, dict[str, Any]] = {} for set_code, meta in set_metadata.items(): sets[set_code] = { "name": meta.get("name", set_code), "card_count": 0, "pokemon_count": 0, "trainer_count": 0, } # Add basic energy "set" sets["basic"] = { "name": "Basic Energy", "card_count": 0, "pokemon_count": 0, "trainer_count": 0, "energy_count": 0, } # Count cards for card in all_cards: set_id = card.get("set_id", "") if set_id not in sets: sets[set_id] = { "name": set_id, "card_count": 0, "pokemon_count": 0, "trainer_count": 0, } sets[set_id]["card_count"] += 1 card_type = card.get("card_type", "") if card_type == "pokemon": sets[set_id]["pokemon_count"] = sets[set_id].get("pokemon_count", 0) + 1 elif card_type == "trainer": sets[set_id]["trainer_count"] = sets[set_id].get("trainer_count", 0) + 1 elif card_type == "energy": sets[set_id]["energy_count"] = sets[set_id].get("energy_count", 0) + 1 # Build card listing cards = [ { "id": card["id"], "name": card["name"], "card_type": card["card_type"], "set_id": card.get("set_id", ""), } for card in all_cards ] return { "generated_at": datetime.now(UTC).isoformat(), "schema_version": "1.0", "total_cards": len(all_cards), "sets": sets, "cards": cards, } def convert_set( set_code: str, validate_only: bool = False, verbose: bool = False, ) -> list[dict[str, Any]]: """Convert all cards from a single set. Args: set_code: Set code (e.g., "a1"). validate_only: If True, validate but don't write files. verbose: If True, print detailed progress. Returns: List of converted card definitions. """ raw_set_dir = RAW_DATA_DIR / set_code if not raw_set_dir.exists(): print(f"Warning: Raw data directory not found: {raw_set_dir}") return [] converted = [] errors = [] for card_file in sorted(raw_set_dir.glob("*.json")): try: with open(card_file) as f: raw_card = json.load(f) # Transform the card card_dict = transform_card(raw_card) # Validate against model validate_card(card_dict) if verbose: print(f" Converted: {card_dict['id']} ({card_dict['name']})") if not validate_only: # Determine output path based on card type card_type = card_dict["card_type"] output_path = DEFINITIONS_DIR / card_type / set_code / f"{card_file.stem}.json" write_card_file(card_dict, output_path) converted.append(card_dict) except Exception as e: errors.append((card_file.name, str(e))) print(f" ERROR: {card_file.name}: {e}") if errors: print(f" {len(errors)} errors in set {set_code}") return converted def main() -> int: """Main entry point for the converter script. Returns: Exit code (0 for success, 1 for errors). """ parser = argparse.ArgumentParser( description="Convert raw scraped card data to CardDefinition format." ) parser.add_argument( "--set", type=str, help="Convert only a specific set (e.g., 'a1')", ) parser.add_argument( "--validate-only", action="store_true", help="Validate cards without writing files", ) parser.add_argument( "--verbose", "-v", action="store_true", help="Show detailed progress", ) args = parser.parse_args() print("Card Definition Converter") print("=" * 40) # Load raw index try: raw_index = load_raw_index() except FileNotFoundError as e: print(f"Error: {e}") return 1 # Get set metadata global SET_METADATA SET_METADATA = raw_index.get("sets", {}) # Determine which sets to convert if args.set: sets_to_convert = [args.set] else: sets_to_convert = list(SET_METADATA.keys()) all_cards: list[dict[str, Any]] = [] # Convert each set for set_code in sets_to_convert: set_name = SET_METADATA.get(set_code, {}).get("name", set_code) print(f"\nConverting set: {set_code} ({set_name})") cards = convert_set( set_code, validate_only=args.validate_only, verbose=args.verbose, ) all_cards.extend(cards) print(f" Converted {len(cards)} cards") # Generate energy cards print("\nGenerating basic energy cards...") energy_cards = generate_energy_cards() for card in energy_cards: # Validate validate_card(card) if args.verbose: print(f" Generated: {card['id']} ({card['name']})") if not args.validate_only: output_path = DEFINITIONS_DIR / "energy" / "basic" / f"{card['id']}.json" write_card_file(card, output_path) all_cards.extend(energy_cards) print(f" Generated {len(energy_cards)} energy cards") # Write index file if not args.validate_only: print("\nWriting index file...") index = build_index(all_cards, SET_METADATA) index_path = DEFINITIONS_DIR / "_index.json" with open(index_path, "w") as f: json.dump(index, f, indent=2) print(f" Wrote {index_path}") # Summary print("\n" + "=" * 40) print(f"Total cards converted: {len(all_cards)}") if args.validate_only: print("(Validation only - no files written)") return 0 if __name__ == "__main__": sys.exit(main())