mantimon-tcg/backend/scripts/convert_cards.py
Cal Corum 934aa4c443 Add CardService and card data conversion pipeline
- Rename data/cards/ to data/raw/ for scraped data
- Add data/definitions/ as authoritative card data source
- Add convert_cards.py script to transform raw -> definitions
- Generate 378 card definitions (344 Pokemon, 24 Trainers, 10 Energy)
- Add CardService for loading and querying card definitions
  - In-memory indexes for fast lookups by type, set, pokemon_type
  - search() with multiple filter criteria
  - get_all_cards() for GameEngine integration
- Add SetInfo model for set metadata
- Update Attack model with damage_display field for variable damage
- Update CardDefinition with image_path, illustrator, flavor_text
- Add 45 tests (21 converter + 24 CardService)
- Update scraper output path to data/raw/

Card data is JSON-authoritative (no database) to support offline fork goal.
2026-01-27 14:16:40 -06:00

617 lines
17 KiB
Python

#!/usr/bin/env python3
"""Convert raw scraped card data to CardDefinition-compatible JSON.
This script transforms card data from data/raw/ (scraped from pokemon-zone.com)
into the CardDefinition format expected by the game engine, writing output to
data/definitions/.
Usage:
python scripts/convert_cards.py # Convert all cards
python scripts/convert_cards.py --set a1 # Convert one set only
python scripts/convert_cards.py --validate-only # Validate without writing
python scripts/convert_cards.py --verbose # Show detailed progress
The script also generates basic energy card definitions.
"""
from __future__ import annotations
import argparse
import json
import sys
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from app.core.enums import CardType, PokemonVariant
from app.core.models.card import CardDefinition
# TODO: Update CDN_BASE_URL when CDN is configured
CDN_BASE_URL = "https://cdn.mantimon.com/cards"
# Paths
SCRIPT_DIR = Path(__file__).parent
BACKEND_DIR = SCRIPT_DIR.parent
RAW_DATA_DIR = BACKEND_DIR / "data" / "raw"
DEFINITIONS_DIR = BACKEND_DIR / "data" / "definitions"
# Basic energy types to generate
ENERGY_TYPES = [
("colorless", "Colorless Energy"),
("darkness", "Darkness Energy"),
("dragon", "Dragon Energy"),
("fighting", "Fighting Energy"),
("fire", "Fire Energy"),
("grass", "Grass Energy"),
("lightning", "Lightning Energy"),
("metal", "Metal Energy"),
("psychic", "Psychic Energy"),
("water", "Water Energy"),
]
# Set metadata (from raw index)
SET_METADATA: dict[str, dict[str, Any]] = {}
def load_raw_index() -> dict[str, Any]:
"""Load the raw data index file.
Returns:
Index data containing set information and card listing.
"""
index_path = RAW_DATA_DIR / "_index.json"
if not index_path.exists():
raise FileNotFoundError(f"Raw index not found: {index_path}")
with open(index_path) as f:
return json.load(f)
def transform_attack(raw_attack: dict[str, Any]) -> dict[str, Any]:
"""Transform a raw attack to CardDefinition Attack format.
Args:
raw_attack: Attack data from scraped JSON.
Returns:
Attack data compatible with the Attack model.
"""
damage = raw_attack.get("damage") or 0
damage_modifier = raw_attack.get("damage_modifier")
# Build damage_display string
damage_display = None
if damage > 0:
if damage_modifier:
damage_display = f"{damage}{damage_modifier}"
else:
damage_display = str(damage)
elif damage_modifier:
# Edge case: modifier but no base damage (e.g., "x" for coin flip damage)
damage_display = f"0{damage_modifier}"
# Build effect_params
effect_params: dict[str, Any] = {}
if damage_modifier:
effect_params["damage_modifier"] = damage_modifier
attack = {
"name": raw_attack["name"],
"cost": raw_attack.get("cost", []),
"damage": damage,
}
if damage_display:
attack["damage_display"] = damage_display
if raw_attack.get("effect_text"):
attack["effect_description"] = raw_attack["effect_text"]
if raw_attack.get("effect_id"):
attack["effect_id"] = raw_attack["effect_id"]
if effect_params:
attack["effect_params"] = effect_params
return attack
def transform_weakness_resistance(raw_wr: dict[str, Any] | None) -> dict[str, Any] | None:
"""Transform weakness/resistance to CardDefinition format.
Args:
raw_wr: Weakness or resistance data with 'type' and 'value' keys.
Returns:
WeaknessResistance-compatible dict or None.
"""
if raw_wr is None:
return None
return {
"energy_type": raw_wr["type"],
"value": raw_wr.get("value"),
}
def transform_ability(raw_ability: dict[str, Any]) -> dict[str, Any]:
"""Transform a raw ability to CardDefinition Ability format.
Args:
raw_ability: Ability data from scraped JSON.
Returns:
Ability data compatible with the Ability model.
"""
ability = {
"name": raw_ability["name"],
"effect_id": raw_ability.get("effect_id") or "unimplemented",
}
if raw_ability.get("effect_text"):
ability["effect_description"] = raw_ability["effect_text"]
if raw_ability.get("effect_params"):
ability["effect_params"] = raw_ability["effect_params"]
return ability
def determine_variant(raw_card: dict[str, Any]) -> str:
"""Determine the Pokemon variant from raw card data.
Args:
raw_card: Raw card data.
Returns:
Variant string (e.g., "normal", "ex").
"""
if raw_card.get("is_ex"):
return PokemonVariant.EX.value
# Could add more variant detection here (V, VMAX, etc.)
# based on card name patterns if needed
return PokemonVariant.NORMAL.value
def get_image_url(image_path: str | None) -> str | None:
"""Generate CDN URL from image path.
Args:
image_path: Local image path (e.g., "pokemon/a1/001-bulbasaur.webp").
Returns:
Full CDN URL or None if no path.
"""
if not image_path:
return None
return f"{CDN_BASE_URL}/{image_path}"
def transform_pokemon_card(raw_card: dict[str, Any]) -> dict[str, Any]:
"""Transform a raw Pokemon card to CardDefinition format.
Args:
raw_card: Raw card data from scraped JSON.
Returns:
CardDefinition-compatible dict for a Pokemon card.
"""
set_code = raw_card.get("set_code", "")
# Build image path: pokemon/{set_code}/{filename}.webp
raw_image_file = raw_card.get("image_file")
if raw_image_file:
# raw_image_file is like "a1/001-bulbasaur.webp"
# We want: "pokemon/a1/001-bulbasaur.webp"
image_path = f"pokemon/{raw_image_file}"
else:
image_path = None
card: dict[str, Any] = {
"id": raw_card["id"],
"name": raw_card["name"],
"card_type": CardType.POKEMON.value,
"hp": raw_card["hp"],
"pokemon_type": raw_card["pokemon_type"],
"stage": raw_card["stage"],
"variant": determine_variant(raw_card),
"retreat_cost": raw_card.get("retreat_cost", 0),
"set_id": set_code,
"rarity": (raw_card.get("rarity") or "common").lower(),
}
# Optional fields
if raw_card.get("evolves_from"):
card["evolves_from"] = raw_card["evolves_from"]
# Transform attacks
if raw_card.get("attacks"):
card["attacks"] = [transform_attack(a) for a in raw_card["attacks"]]
# Transform abilities
if raw_card.get("abilities"):
card["abilities"] = [transform_ability(a) for a in raw_card["abilities"]]
# Transform weakness/resistance
if raw_card.get("weakness"):
card["weakness"] = transform_weakness_resistance(raw_card["weakness"])
if raw_card.get("resistance"):
card["resistance"] = transform_weakness_resistance(raw_card["resistance"])
# Metadata
if raw_card.get("illustrator"):
card["illustrator"] = raw_card["illustrator"]
if raw_card.get("flavor_text"):
card["flavor_text"] = raw_card["flavor_text"]
if image_path:
card["image_path"] = image_path
card["image_url"] = get_image_url(image_path)
return card
def transform_trainer_card(raw_card: dict[str, Any]) -> dict[str, Any]:
"""Transform a raw Trainer card to CardDefinition format.
Args:
raw_card: Raw card data from scraped JSON.
Returns:
CardDefinition-compatible dict for a Trainer card.
"""
set_code = raw_card.get("set_code", "")
# Build image path
raw_image_file = raw_card.get("image_file")
if raw_image_file:
image_path = f"trainer/{raw_image_file}"
else:
image_path = None
card: dict[str, Any] = {
"id": raw_card["id"],
"name": raw_card["name"],
"card_type": CardType.TRAINER.value,
"trainer_type": raw_card.get("trainer_type", "item"),
"set_id": set_code,
"rarity": (raw_card.get("rarity") or "common").lower(),
}
# Effect description
if raw_card.get("effect_text"):
card["effect_description"] = raw_card["effect_text"]
if raw_card.get("effect_id"):
card["effect_id"] = raw_card["effect_id"]
# Metadata
if raw_card.get("illustrator"):
card["illustrator"] = raw_card["illustrator"]
if image_path:
card["image_path"] = image_path
card["image_url"] = get_image_url(image_path)
return card
def transform_card(raw_card: dict[str, Any]) -> dict[str, Any]:
"""Transform a raw card to CardDefinition format.
Dispatches to the appropriate transformer based on card_type.
Args:
raw_card: Raw card data from scraped JSON.
Returns:
CardDefinition-compatible dict.
Raises:
ValueError: If card type is unsupported.
"""
card_type = raw_card.get("card_type", "").lower()
if card_type == "pokemon":
return transform_pokemon_card(raw_card)
elif card_type == "trainer":
return transform_trainer_card(raw_card)
else:
raise ValueError(f"Unsupported card type: {card_type}")
def validate_card(card_dict: dict[str, Any]) -> CardDefinition:
"""Validate a card dict against the CardDefinition model.
Args:
card_dict: Card data to validate.
Returns:
Validated CardDefinition.
Raises:
ValueError: If validation fails.
"""
return CardDefinition.model_validate(card_dict)
def generate_energy_cards() -> list[dict[str, Any]]:
"""Generate basic energy card definitions.
Returns:
List of CardDefinition-compatible dicts for basic energy.
"""
cards = []
for energy_type, display_name in ENERGY_TYPES:
card_id = f"energy-basic-{energy_type}"
image_path = f"energy/basic/{energy_type}.webp"
card = {
"id": card_id,
"name": display_name,
"card_type": CardType.ENERGY.value,
"energy_type": energy_type,
"energy_provides": [energy_type],
"rarity": "common",
"set_id": "basic",
"image_path": image_path,
"image_url": get_image_url(image_path),
}
cards.append(card)
return cards
def write_card_file(card: dict[str, Any], output_path: Path) -> None:
"""Write a card definition to a JSON file.
Args:
card: Card definition dict.
output_path: Path to write the JSON file.
"""
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
json.dump(card, f, indent=2)
def build_index(
all_cards: list[dict[str, Any]],
set_metadata: dict[str, dict[str, Any]],
) -> dict[str, Any]:
"""Build the master index file.
Args:
all_cards: List of all card definitions.
set_metadata: Metadata about each set.
Returns:
Index data structure.
"""
# Count cards by set and type
sets: dict[str, dict[str, Any]] = {}
for set_code, meta in set_metadata.items():
sets[set_code] = {
"name": meta.get("name", set_code),
"card_count": 0,
"pokemon_count": 0,
"trainer_count": 0,
}
# Add basic energy "set"
sets["basic"] = {
"name": "Basic Energy",
"card_count": 0,
"pokemon_count": 0,
"trainer_count": 0,
"energy_count": 0,
}
# Count cards
for card in all_cards:
set_id = card.get("set_id", "")
if set_id not in sets:
sets[set_id] = {
"name": set_id,
"card_count": 0,
"pokemon_count": 0,
"trainer_count": 0,
}
sets[set_id]["card_count"] += 1
card_type = card.get("card_type", "")
if card_type == "pokemon":
sets[set_id]["pokemon_count"] = sets[set_id].get("pokemon_count", 0) + 1
elif card_type == "trainer":
sets[set_id]["trainer_count"] = sets[set_id].get("trainer_count", 0) + 1
elif card_type == "energy":
sets[set_id]["energy_count"] = sets[set_id].get("energy_count", 0) + 1
# Build card listing
cards = [
{
"id": card["id"],
"name": card["name"],
"card_type": card["card_type"],
"set_id": card.get("set_id", ""),
}
for card in all_cards
]
return {
"generated_at": datetime.now(UTC).isoformat(),
"schema_version": "1.0",
"total_cards": len(all_cards),
"sets": sets,
"cards": cards,
}
def convert_set(
set_code: str,
validate_only: bool = False,
verbose: bool = False,
) -> list[dict[str, Any]]:
"""Convert all cards from a single set.
Args:
set_code: Set code (e.g., "a1").
validate_only: If True, validate but don't write files.
verbose: If True, print detailed progress.
Returns:
List of converted card definitions.
"""
raw_set_dir = RAW_DATA_DIR / set_code
if not raw_set_dir.exists():
print(f"Warning: Raw data directory not found: {raw_set_dir}")
return []
converted = []
errors = []
for card_file in sorted(raw_set_dir.glob("*.json")):
try:
with open(card_file) as f:
raw_card = json.load(f)
# Transform the card
card_dict = transform_card(raw_card)
# Validate against model
validate_card(card_dict)
if verbose:
print(f" Converted: {card_dict['id']} ({card_dict['name']})")
if not validate_only:
# Determine output path based on card type
card_type = card_dict["card_type"]
output_path = DEFINITIONS_DIR / card_type / set_code / f"{card_file.stem}.json"
write_card_file(card_dict, output_path)
converted.append(card_dict)
except Exception as e:
errors.append((card_file.name, str(e)))
print(f" ERROR: {card_file.name}: {e}")
if errors:
print(f" {len(errors)} errors in set {set_code}")
return converted
def main() -> int:
"""Main entry point for the converter script.
Returns:
Exit code (0 for success, 1 for errors).
"""
parser = argparse.ArgumentParser(
description="Convert raw scraped card data to CardDefinition format."
)
parser.add_argument(
"--set",
type=str,
help="Convert only a specific set (e.g., 'a1')",
)
parser.add_argument(
"--validate-only",
action="store_true",
help="Validate cards without writing files",
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Show detailed progress",
)
args = parser.parse_args()
print("Card Definition Converter")
print("=" * 40)
# Load raw index
try:
raw_index = load_raw_index()
except FileNotFoundError as e:
print(f"Error: {e}")
return 1
# Get set metadata
global SET_METADATA
SET_METADATA = raw_index.get("sets", {})
# Determine which sets to convert
if args.set:
sets_to_convert = [args.set]
else:
sets_to_convert = list(SET_METADATA.keys())
all_cards: list[dict[str, Any]] = []
# Convert each set
for set_code in sets_to_convert:
set_name = SET_METADATA.get(set_code, {}).get("name", set_code)
print(f"\nConverting set: {set_code} ({set_name})")
cards = convert_set(
set_code,
validate_only=args.validate_only,
verbose=args.verbose,
)
all_cards.extend(cards)
print(f" Converted {len(cards)} cards")
# Generate energy cards
print("\nGenerating basic energy cards...")
energy_cards = generate_energy_cards()
for card in energy_cards:
# Validate
validate_card(card)
if args.verbose:
print(f" Generated: {card['id']} ({card['name']})")
if not args.validate_only:
output_path = DEFINITIONS_DIR / "energy" / "basic" / f"{card['id']}.json"
write_card_file(card, output_path)
all_cards.extend(energy_cards)
print(f" Generated {len(energy_cards)} energy cards")
# Write index file
if not args.validate_only:
print("\nWriting index file...")
index = build_index(all_cards, SET_METADATA)
index_path = DEFINITIONS_DIR / "_index.json"
with open(index_path, "w") as f:
json.dump(index, f, indent=2)
print(f" Wrote {index_path}")
# Summary
print("\n" + "=" * 40)
print(f"Total cards converted: {len(all_cards)}")
if args.validate_only:
print("(Validation only - no files written)")
return 0
if __name__ == "__main__":
sys.exit(main())