diff --git a/backend/data/cards/.gitkeep b/backend/data/cards/.gitkeep new file mode 100644 index 0000000..9396629 --- /dev/null +++ b/backend/data/cards/.gitkeep @@ -0,0 +1,3 @@ +# Card data directory +# Contains scraped Pokemon TCG Pocket card data in JSON format +# Structure: {set_code}/{number}-{name}.json diff --git a/backend/data/cards/_index.json b/backend/data/cards/_index.json new file mode 100644 index 0000000..37d1061 --- /dev/null +++ b/backend/data/cards/_index.json @@ -0,0 +1,52 @@ +{ + "generated_at": "2026-01-27T04:45:04.962697+00:00", + "schema_version": "1.0", + "sets": { + "a1": { + "name": "Genetic Apex", + "card_count": 5 + }, + "a1a": { + "name": "Mythical Island", + "card_count": 0 + } + }, + "cards": [ + { + "id": "a1-001-bulbasaur", + "name": "Bulbasaur", + "set_code": "a1", + "card_number": 1, + "file": "a1/001-bulbasaur.json" + }, + { + "id": "a1-002-ivysaur", + "name": "Ivysaur", + "set_code": "a1", + "card_number": 2, + "file": "a1/002-ivysaur.json" + }, + { + "id": "a1-003-venusaur", + "name": "Venusaur", + "set_code": "a1", + "card_number": 3, + "file": "a1/003-venusaur.json" + }, + { + "id": "a1-004-venusaur-ex", + "name": "Venusaur ex", + "set_code": "a1", + "card_number": 4, + "file": "a1/004-venusaur-ex.json" + }, + { + "id": "a1-005-caterpie", + "name": "Caterpie", + "set_code": "a1", + "card_number": 5, + "file": "a1/005-caterpie.json" + } + ], + "total_cards": 5 +} \ No newline at end of file diff --git a/backend/data/cards/a1/001-bulbasaur.json b/backend/data/cards/a1/001-bulbasaur.json new file mode 100644 index 0000000..99b302c --- /dev/null +++ b/backend/data/cards/a1/001-bulbasaur.json @@ -0,0 +1,38 @@ +{ + "id": "a1-001-bulbasaur", + "name": "Bulbasaur", + "set_code": "a1", + "set_name": "Genetic Apex", + "card_number": 1, + "rarity": "Common", + "card_type": "pokemon", + "image_url": "https://assets.pokemon-zone.com/game-assets/CardPreviews/cPK_10_000010_00_FUSHIGIDANE_C.webp", + "source_url": "https://www.pokemon-zone.com/cards/a1/1/bulbasaur/", + "hp": 70, + "pokemon_type": "grass", + "stage": "basic", + "evolves_from": null, + "is_ex": false, + "abilities": [], + "attacks": [ + { + "name": "Vine Whip", + "cost": [ + "grass", + "colorless" + ], + "damage": 40, + "damage_modifier": null, + "effect_text": null, + "effect_id": null + } + ], + "weakness": { + "type": "fire", + "value": 20 + }, + "resistance": null, + "retreat_cost": 1, + "flavor_text": null, + "illustrator": "Narumi Sato" +} \ No newline at end of file diff --git a/backend/data/cards/a1/002-ivysaur.json b/backend/data/cards/a1/002-ivysaur.json new file mode 100644 index 0000000..1f0b774 --- /dev/null +++ b/backend/data/cards/a1/002-ivysaur.json @@ -0,0 +1,39 @@ +{ + "id": "a1-002-ivysaur", + "name": "Ivysaur", + "set_code": "a1", + "set_name": "Genetic Apex", + "card_number": 2, + "rarity": "Uncommon", + "card_type": "pokemon", + "image_url": "https://assets.pokemon-zone.com/game-assets/CardPreviews/cPK_10_000020_00_FUSHIGISOU_U.webp", + "source_url": "https://www.pokemon-zone.com/cards/a1/2/ivysaur/", + "hp": 90, + "pokemon_type": "grass", + "stage": "stage_1", + "evolves_from": "Bulbasaur", + "is_ex": false, + "abilities": [], + "attacks": [ + { + "name": "Razor Leaf", + "cost": [ + "grass", + "colorless", + "colorless" + ], + "damage": 60, + "damage_modifier": null, + "effect_text": null, + "effect_id": null + } + ], + "weakness": { + "type": "fire", + "value": 20 + }, + "resistance": null, + "retreat_cost": 2, + "flavor_text": null, + "illustrator": "Kurata So" +} \ No newline at end of file diff --git a/backend/data/cards/a1/003-venusaur.json b/backend/data/cards/a1/003-venusaur.json new file mode 100644 index 0000000..5fae561 --- /dev/null +++ b/backend/data/cards/a1/003-venusaur.json @@ -0,0 +1,40 @@ +{ + "id": "a1-003-venusaur", + "name": "Venusaur", + "set_code": "a1", + "set_name": "Genetic Apex", + "card_number": 3, + "rarity": "Rare", + "card_type": "pokemon", + "image_url": "https://assets.pokemon-zone.com/game-assets/CardPreviews/cPK_10_000030_00_FUSHIGIBANA_R.webp", + "source_url": "https://www.pokemon-zone.com/cards/a1/3/venusaur/", + "hp": 160, + "pokemon_type": "grass", + "stage": "stage_2", + "evolves_from": "Ivysaur", + "is_ex": false, + "abilities": [], + "attacks": [ + { + "name": "Mega Drain", + "cost": [ + "grass", + "grass", + "colorless", + "colorless" + ], + "damage": 80, + "damage_modifier": null, + "effect_text": "Heal 30 damage from this Pokémon.", + "effect_id": null + } + ], + "weakness": { + "type": "fire", + "value": 20 + }, + "resistance": null, + "retreat_cost": 3, + "flavor_text": null, + "illustrator": "Ryota Murayama" +} \ No newline at end of file diff --git a/backend/data/cards/a1/004-venusaur-ex.json b/backend/data/cards/a1/004-venusaur-ex.json new file mode 100644 index 0000000..0bf7359 --- /dev/null +++ b/backend/data/cards/a1/004-venusaur-ex.json @@ -0,0 +1,52 @@ +{ + "id": "a1-004-venusaur-ex", + "name": "Venusaur ex", + "set_code": "a1", + "set_name": "Genetic Apex", + "card_number": 4, + "rarity": "Double Rare", + "card_type": "pokemon", + "image_url": "https://assets.pokemon-zone.com/game-assets/CardPreviews/cPK_10_000040_00_FUSHIGIBANAex_RR.webp", + "source_url": "https://www.pokemon-zone.com/cards/a1/4/venusaur-ex/", + "hp": 190, + "pokemon_type": "grass", + "stage": "stage_2", + "evolves_from": "Ivysaur", + "is_ex": true, + "abilities": [], + "attacks": [ + { + "name": "Razor Leaf", + "cost": [ + "grass", + "colorless", + "colorless" + ], + "damage": 60, + "damage_modifier": null, + "effect_text": null, + "effect_id": null + }, + { + "name": "Giant Bloom", + "cost": [ + "grass", + "grass", + "colorless", + "colorless" + ], + "damage": 100, + "damage_modifier": null, + "effect_text": "Heal 30 damage from this Pokémon.", + "effect_id": null + } + ], + "weakness": { + "type": "fire", + "value": 20 + }, + "resistance": null, + "retreat_cost": 3, + "flavor_text": null, + "illustrator": "PLANETA CG Works" +} \ No newline at end of file diff --git a/backend/data/cards/a1/005-caterpie.json b/backend/data/cards/a1/005-caterpie.json new file mode 100644 index 0000000..8e1466d --- /dev/null +++ b/backend/data/cards/a1/005-caterpie.json @@ -0,0 +1,37 @@ +{ + "id": "a1-005-caterpie", + "name": "Caterpie", + "set_code": "a1", + "set_name": "Genetic Apex", + "card_number": 5, + "rarity": "Common", + "card_type": "pokemon", + "image_url": "https://assets.pokemon-zone.com/game-assets/CardPreviews/cPK_10_000050_00_CATERPIE_C.webp", + "source_url": "https://www.pokemon-zone.com/cards/a1/5/caterpie/", + "hp": 50, + "pokemon_type": "grass", + "stage": "basic", + "evolves_from": null, + "is_ex": false, + "abilities": [], + "attacks": [ + { + "name": "Find a Friend", + "cost": [ + "colorless" + ], + "damage": null, + "damage_modifier": null, + "effect_text": "Put 1 randomPokémon from your deck into your hand.", + "effect_id": null + } + ], + "weakness": { + "type": "fire", + "value": 20 + }, + "resistance": null, + "retreat_cost": 1, + "flavor_text": null, + "illustrator": "Miki Tanaka" +} \ No newline at end of file diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 8d90de1..545bca6 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -21,12 +21,14 @@ dependencies = [ [dependency-groups] dev = [ + "beautifulsoup4>=4.12.0", "black>=26.1.0", "httpx>=0.28.1", "mypy>=1.19.1", "pytest>=9.0.2", "pytest-asyncio>=1.3.0", "pytest-cov>=7.0.0", + "requests>=2.31.0", "ruff>=0.14.14", ] diff --git a/backend/scripts/scrape_pokemon_pocket.py b/backend/scripts/scrape_pokemon_pocket.py new file mode 100644 index 0000000..d244c5c --- /dev/null +++ b/backend/scripts/scrape_pokemon_pocket.py @@ -0,0 +1,903 @@ +#!/usr/bin/env python +"""Scrape Pokemon TCG Pocket card data from pokemon-zone.com. + +This script fetches card data from the Genetic Apex (A1) and Mythical Island (A1a) +sets and saves them as individual JSON files for use in the Mantimon TCG game engine. + +Usage: + # Scrape entire set + uv run python scripts/scrape_pokemon_pocket.py --set a1 + + # Scrape with limit (for testing) + uv run python scripts/scrape_pokemon_pocket.py --set a1 --limit 5 + + # Scrape single card by ID + uv run python scripts/scrape_pokemon_pocket.py --card a1-132-gardevoir + + # Regenerate index from existing card files + uv run python scripts/scrape_pokemon_pocket.py --reindex + +Output: + - Individual card files: data/cards/{set}/{number}-{name}.json + - Combined index: data/cards/_index.json + - Error log: data/cards/_errors.log +""" + +import argparse +import json +import logging +import re +import sys +import time +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import requests +from bs4 import BeautifulSoup, Tag + +# ============================================================================= +# Configuration +# ============================================================================= + +BASE_URL = "https://www.pokemon-zone.com" +DATA_DIR = Path(__file__).parent.parent / "data" / "cards" +REQUEST_DELAY = 1.5 # seconds between requests +MAX_RETRIES = 3 +RETRY_DELAY = 5 # seconds + +# Set info for validation and metadata +SETS = { + "a1": {"name": "Genetic Apex", "expected_cards": 286}, + "a1a": {"name": "Mythical Island", "expected_cards": 86}, +} + +# Energy type mapping from CSS classes +ENERGY_TYPES = { + "energy-icon--type-grass": "grass", + "energy-icon--type-fire": "fire", + "energy-icon--type-water": "water", + "energy-icon--type-lightning": "lightning", + "energy-icon--type-psychic": "psychic", + "energy-icon--type-fighting": "fighting", + "energy-icon--type-darkness": "darkness", + "energy-icon--type-metal": "metal", + "energy-icon--type-colorless": "colorless", + "energy-icon--type-dragon": "dragon", +} + +# Rarity code mapping from CSS classes (rarity-icon--rarity-X) +RARITY_CODES = { + "C": "Common", + "U": "Uncommon", + "R": "Rare", + "RR": "Double Rare", + "AR": "Art Rare", + "SAR": "Special Art Rare", + "UR": "Ultra Rare", + "IM": "Immersive", + "S": "Shiny", + "CR": "Crown Rare", +} + +# ============================================================================= +# Logging Setup +# ============================================================================= + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%H:%M:%S", +) +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Data Classes +# ============================================================================= + + +@dataclass +class Attack: + """A Pokemon's attack.""" + + name: str + cost: list[str] + damage: int | None + damage_modifier: str | None # "+", "x", or None + effect_text: str | None + effect_id: str | None = None # To be mapped later + + def to_dict(self) -> dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + return { + "name": self.name, + "cost": self.cost, + "damage": self.damage, + "damage_modifier": self.damage_modifier, + "effect_text": self.effect_text, + "effect_id": self.effect_id, + } + + +@dataclass +class Ability: + """A Pokemon's ability.""" + + name: str + effect_text: str + effect_id: str | None = None # To be mapped later + + def to_dict(self) -> dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + return { + "name": self.name, + "effect_text": self.effect_text, + "effect_id": self.effect_id, + } + + +@dataclass +class Card: + """Complete card data.""" + + id: str + name: str + set_code: str + set_name: str + card_number: int + rarity: str + card_type: str # "pokemon", "trainer", "energy" + image_url: str | None = None # URL to card image for offline caching + hp: int | None = None + pokemon_type: str | None = None + stage: str | None = None # "basic", "stage_1", "stage_2" + evolves_from: str | None = None + is_ex: bool = False + abilities: list[Ability] = field(default_factory=list) + attacks: list[Attack] = field(default_factory=list) + weakness_type: str | None = None + weakness_value: int | None = None + resistance_type: str | None = None + resistance_value: int | None = None + retreat_cost: int = 0 + flavor_text: str | None = None + illustrator: str | None = None + source_url: str = "" + + def to_dict(self) -> dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + data: dict[str, Any] = { + "id": self.id, + "name": self.name, + "set_code": self.set_code, + "set_name": self.set_name, + "card_number": self.card_number, + "rarity": self.rarity, + "card_type": self.card_type, + "image_url": self.image_url, + "source_url": self.source_url, + } + + if self.card_type == "pokemon": + data.update( + { + "hp": self.hp, + "pokemon_type": self.pokemon_type, + "stage": self.stage, + "evolves_from": self.evolves_from, + "is_ex": self.is_ex, + "abilities": [a.to_dict() for a in self.abilities], + "attacks": [a.to_dict() for a in self.attacks], + "weakness": ( + {"type": self.weakness_type, "value": self.weakness_value} + if self.weakness_type + else None + ), + "resistance": ( + {"type": self.resistance_type, "value": self.resistance_value} + if self.resistance_type + else None + ), + "retreat_cost": self.retreat_cost, + "flavor_text": self.flavor_text, + "illustrator": self.illustrator, + } + ) + elif self.card_type == "trainer": + data.update( + { + "trainer_type": self.stage, # Reusing stage field for trainer type + "effect_text": self.flavor_text, # Trainer effect + "illustrator": self.illustrator, + } + ) + elif self.card_type == "energy": + data.update( + { + "energy_type": self.pokemon_type, + "illustrator": self.illustrator, + } + ) + + return data + + +# ============================================================================= +# Scraper Class +# ============================================================================= + + +class PokemonPocketScraper: + """Scraper for Pokemon TCG Pocket card data from pokemon-zone.com.""" + + def __init__(self, data_dir: Path = DATA_DIR): + """Initialize the scraper. + + Args: + data_dir: Directory to save card data files. + """ + self.data_dir = data_dir + self.session = requests.Session() + self.session.headers.update( + { + "User-Agent": "MantimonTCG-CardScraper/1.0 (https://github.com/mantimon-tcg)", + "Accept": "text/html,application/xhtml+xml", + } + ) + self.errors: list[dict[str, Any]] = [] + + def fetch_page(self, url: str) -> BeautifulSoup | None: + """Fetch a page with retry logic. + + Args: + url: URL to fetch. + + Returns: + BeautifulSoup object or None if all retries failed. + """ + for attempt in range(MAX_RETRIES): + try: + response = self.session.get(url, timeout=30) + response.raise_for_status() + return BeautifulSoup(response.text, "html.parser") + except requests.RequestException as e: + logger.warning(f"Attempt {attempt + 1}/{MAX_RETRIES} failed for {url}: {e}") + if attempt < MAX_RETRIES - 1: + time.sleep(RETRY_DELAY) + + self.errors.append({"url": url, "error": "Max retries exceeded"}) + return None + + def get_card_urls_for_set(self, set_code: str) -> list[tuple[int, str, str]]: + """Get all card URLs for a set. + + Args: + set_code: Set code (e.g., "a1", "a1a"). + + Returns: + List of (card_number, card_name, url) tuples. + """ + set_url = f"{BASE_URL}/sets/{set_code}/" + logger.info(f"Fetching set page: {set_url}") + + soup = self.fetch_page(set_url) + if not soup: + logger.error(f"Failed to fetch set page for {set_code}") + return [] + + cards: list[tuple[int, str, str]] = [] + # Find all card links - they follow pattern /cards/{set}/{number}/{name}/ + pattern = re.compile(rf"^/cards/{set_code}/(\d+)/([^/]+)/$") + + for link in soup.find_all("a", href=pattern): + href = link.get("href", "") + match = pattern.match(href) + if match: + card_number = int(match.group(1)) + card_name = match.group(2) + # Avoid duplicates (page may have multiple links to same card) + card_tuple = (card_number, card_name, f"{BASE_URL}{href}") + if card_tuple not in cards: + cards.append(card_tuple) + + # Sort by card number + cards.sort(key=lambda x: x[0]) + logger.info(f"Found {len(cards)} cards in set {set_code}") + + return cards + + def parse_energy_type(self, element: Tag | None) -> str | None: + """Extract energy type from an element containing an energy icon. + + Args: + element: BeautifulSoup element that may contain energy icons. + + Returns: + Energy type string or None. + """ + if not element: + return None + + for icon in element.find_all("span", class_=re.compile(r"energy-icon--type-")): + for cls in icon.get("class", []): + if cls in ENERGY_TYPES: + return ENERGY_TYPES[cls] + + return None + + def parse_attack(self, attack_row: Tag) -> Attack | None: + """Parse an attack from an attack-summary-row element. + + Args: + attack_row: BeautifulSoup element for the attack row. + + Returns: + Attack object or None if parsing failed. + """ + try: + # Get attack name + name_elem = attack_row.find(class_="attack-summary-row__name") + if not name_elem: + return None + name = name_elem.get_text(strip=True) + + # Get energy cost + cost: list[str] = [] + costs_elem = attack_row.find(class_="attack-summary-row__costs") + if costs_elem: + for cost_icon in costs_elem.find_all("span", class_=re.compile(r"energy-icon")): + for cls in cost_icon.get("class", []): + if cls in ENERGY_TYPES: + cost.append(ENERGY_TYPES[cls]) + + # Get damage + damage: int | None = None + damage_modifier: str | None = None + damage_elem = attack_row.find(class_="attack-summary-row__damage") + if damage_elem: + damage_text = damage_elem.get_text(strip=True) + # Parse damage like "60", "50+", "100x" + match = re.match(r"(\d+)([+x])?", damage_text) + if match: + damage = int(match.group(1)) + damage_modifier = match.group(2) + + # Get effect text + effect_text: str | None = None + footer_elem = attack_row.find(class_="attack-summary-row__footer") + if footer_elem: + effect_text = footer_elem.get_text(strip=True) + if not effect_text: + effect_text = None + + return Attack( + name=name, + cost=cost, + damage=damage, + damage_modifier=damage_modifier, + effect_text=effect_text, + ) + + except Exception as e: + logger.warning(f"Failed to parse attack: {e}") + return None + + def parse_ability(self, ability_row: Tag) -> Ability | None: + """Parse an ability from an ability-summary-row element. + + Args: + ability_row: BeautifulSoup element for the ability row. + + Returns: + Ability object or None if parsing failed. + """ + try: + # Get ability name (text after "Ability" badge) + name_elem = ability_row.find(class_="ability-summary-row__name") + if not name_elem: + return None + + # Remove the "Ability" badge text to get just the name + name_text = name_elem.get_text(strip=True) + name = re.sub(r"^Ability\s*", "", name_text) + + # Get effect text + desc_elem = ability_row.find(class_="ability-summary-row__description") + effect_text = desc_elem.get_text(strip=True) if desc_elem else "" + + return Ability(name=name, effect_text=effect_text) + + except Exception as e: + logger.warning(f"Failed to parse ability: {e}") + return None + + def parse_card_page(self, soup: BeautifulSoup, url: str, set_code: str) -> Card | None: + """Parse a card page into a Card object. + + Args: + soup: BeautifulSoup object of the card page. + url: URL of the card page (for error logging). + set_code: Set code for this card. + + Returns: + Card object or None if parsing failed. + """ + try: + # Extract card number and name from URL + match = re.search(rf"/cards/{set_code}/(\d+)/([^/]+)/", url) + if not match: + logger.error(f"Could not parse card URL: {url}") + return None + + card_number = int(match.group(1)) + url_name = match.group(2) + + # Get card name from page + name_elem = soup.find("h1") + if not name_elem: + logger.error(f"Could not find card name on page: {url}") + return None + name = name_elem.get_text(strip=True) + + # Determine card type + card_type = "pokemon" + type_info = soup.find(string=re.compile(r"Pokémon|Trainer|Energy", re.IGNORECASE)) + if type_info: + type_text = str(type_info).lower() + if "trainer" in type_text: + card_type = "trainer" + elif "energy" in type_text: + card_type = "energy" + + # Create card ID + card_id = f"{set_code}-{card_number:03d}-{url_name}" + + # Get rarity from CSS class (rarity-icon--rarity-X) + rarity = "Unknown" + rarity_icon = soup.find("span", class_="rarity-icon") + if rarity_icon: + for cls in rarity_icon.get("class", []): + if "rarity-icon--rarity-" in cls: + rarity_code = cls.replace("rarity-icon--rarity-", "") + rarity = RARITY_CODES.get(rarity_code, rarity_code) + break + + # Get card image URL (first image in card-detail__card section) + image_url: str | None = None + card_section = soup.find("div", class_="card-detail__card") + if card_section: + img = card_section.find("img") + if img: + image_url = img.get("src") + # Remove query params to get full resolution + if image_url and "?" in image_url: + image_url = image_url.split("?")[0] + + # Initialize card + card = Card( + id=card_id, + name=name, + set_code=set_code, + set_name=SETS.get(set_code, {}).get("name", set_code), + card_number=card_number, + rarity=rarity, + card_type=card_type, + image_url=image_url, + source_url=url, + is_ex="ex" in name.lower(), + ) + + if card_type == "pokemon": + self._parse_pokemon_details(soup, card) + elif card_type == "trainer": + self._parse_trainer_details(soup, card) + elif card_type == "energy": + self._parse_energy_details(soup, card) + + return card + + except Exception as e: + logger.error(f"Failed to parse card page {url}: {e}") + self.errors.append({"url": url, "error": str(e)}) + return None + + def _parse_pokemon_details(self, soup: BeautifulSoup, card: Card) -> None: + """Parse Pokemon-specific details from the page. + + Args: + soup: BeautifulSoup object of the card page. + card: Card object to populate. + """ + # Get HP + hp_match = soup.find(string=re.compile(r"HP\s*(\d+)", re.IGNORECASE)) + if hp_match: + hp_num = re.search(r"(\d+)", str(hp_match)) + if hp_num: + card.hp = int(hp_num.group(1)) + else: + # Try finding HP in the stat display + hp_elem = soup.find("span", string="HP") + if hp_elem: + hp_value = hp_elem.find_next("span") + if hp_value: + hp_text = hp_value.get_text(strip=True) + hp_num = re.search(r"(\d+)", hp_text) + if hp_num: + card.hp = int(hp_num.group(1)) + + # Get Pokemon type from first energy icon NOT in an attack row + # The card's type icon is in the header area, not in attack-summary-row__cost + for icon in soup.find_all("span", class_=re.compile(r"energy-icon--type-")): + parent = icon.parent + parent_classes = parent.get("class", []) if parent else [] + # Skip if this is an attack cost icon + if "attack-summary-row__cost" not in parent_classes: + for cls in icon.get("class", []): + if cls in ENERGY_TYPES: + card.pokemon_type = ENERGY_TYPES[cls] + break + if card.pokemon_type: + break + + # Get stage and evolution info + stage_text = soup.find(string=re.compile(r"Basic|Stage 1|Stage 2", re.IGNORECASE)) + if stage_text: + stage_lower = str(stage_text).lower() + if "stage 2" in stage_lower: + card.stage = "stage_2" + elif "stage 1" in stage_lower: + card.stage = "stage_1" + elif "basic" in stage_lower: + card.stage = "basic" + + # Get evolves_from + evolves_match = soup.find(string=re.compile(r"Evolves from", re.IGNORECASE)) + if evolves_match: + # Try to find the Pokemon name link nearby + parent = evolves_match.parent if hasattr(evolves_match, "parent") else None + if parent: + link = parent.find("a") + if link: + card.evolves_from = link.get_text(strip=True) + + # Get abilities + for ability_row in soup.find_all(class_="ability-summary-row"): + ability = self.parse_ability(ability_row) + if ability: + card.abilities.append(ability) + + # Get attacks + for attack_row in soup.find_all(class_="attack-summary-row"): + attack = self.parse_attack(attack_row) + if attack: + card.attacks.append(attack) + + # Get weakness + weakness_section = soup.find(string=re.compile(r"Weakness", re.IGNORECASE)) + if weakness_section: + parent = weakness_section.parent + if parent: + card.weakness_type = self.parse_energy_type(parent.parent) + # Look for +20 pattern + value_match = re.search( + r"\+(\d+)", parent.parent.get_text() if parent.parent else "" + ) + if value_match: + card.weakness_value = int(value_match.group(1)) + + # Get retreat cost (count colorless energy icons in retreat section) + retreat_section = soup.find(string=re.compile(r"Retreat", re.IGNORECASE)) + if retreat_section: + parent = retreat_section.parent + if parent and parent.parent: + retreat_icons = parent.parent.find_all( + "span", class_=re.compile(r"energy-icon--type-colorless") + ) + card.retreat_cost = len(retreat_icons) + + # Get illustrator + illustrator_match = soup.find(string=re.compile(r"Illustrated by", re.IGNORECASE)) + if illustrator_match: + card.illustrator = str(illustrator_match).replace("Illustrated by", "").strip() + + # Get flavor text (Pokemon description) + # This is usually in a paragraph after the attacks section + for p in soup.find_all("p"): + text = p.get_text(strip=True) + if text and len(text) > 50 and "Illustrated" not in text and "Artwork" not in text: + card.flavor_text = text + break + + def _parse_trainer_details(self, soup: BeautifulSoup, card: Card) -> None: + """Parse Trainer-specific details from the page. + + Args: + soup: BeautifulSoup object of the card page. + card: Card object to populate. + """ + # Get trainer type (Item, Supporter, Stadium) + type_match = soup.find(string=re.compile(r"Item|Supporter|Stadium", re.IGNORECASE)) + if type_match: + card.stage = str(type_match).strip().lower() + + # Get effect text (main card text for trainers) + # Usually in a description section + for elem in soup.find_all(["p", "div"]): + text = elem.get_text(strip=True) + if text and len(text) > 20 and "Illustrated" not in text: + card.flavor_text = text + break + + # Get illustrator + illustrator_match = soup.find(string=re.compile(r"Illustrated by", re.IGNORECASE)) + if illustrator_match: + card.illustrator = str(illustrator_match).replace("Illustrated by", "").strip() + + def _parse_energy_details(self, soup: BeautifulSoup, card: Card) -> None: + """Parse Energy-specific details from the page. + + Args: + soup: BeautifulSoup object of the card page. + card: Card object to populate. + """ + # Get energy type from the page + card.pokemon_type = self.parse_energy_type(soup) + + # Get illustrator + illustrator_match = soup.find(string=re.compile(r"Illustrated by", re.IGNORECASE)) + if illustrator_match: + card.illustrator = str(illustrator_match).replace("Illustrated by", "").strip() + + def scrape_card(self, card_id: str) -> Card | None: + """Scrape a single card by ID. + + Args: + card_id: Card ID in format "{set}-{number}-{name}" (e.g., "a1-132-gardevoir"). + + Returns: + Card object or None if scraping failed. + """ + # Parse card ID + match = re.match(r"([a-z0-9]+)-(\d+)-(.+)", card_id) + if not match: + logger.error(f"Invalid card ID format: {card_id}") + return None + + set_code = match.group(1) + card_number = int(match.group(2)) + card_name = match.group(3) + + url = f"{BASE_URL}/cards/{set_code}/{card_number}/{card_name}/" + logger.info(f"Scraping card: {url}") + + soup = self.fetch_page(url) + if not soup: + return None + + return self.parse_card_page(soup, url, set_code) + + def scrape_set(self, set_code: str, limit: int | None = None) -> list[Card]: + """Scrape all cards from a set. + + Args: + set_code: Set code (e.g., "a1", "a1a"). + limit: Maximum number of cards to scrape (for testing). + + Returns: + List of Card objects. + """ + if set_code not in SETS: + logger.error(f"Unknown set code: {set_code}") + return [] + + card_urls = self.get_card_urls_for_set(set_code) + if limit: + card_urls = card_urls[:limit] + + cards: list[Card] = [] + total = len(card_urls) + + for i, (card_number, card_name, url) in enumerate(card_urls, 1): + logger.info(f"[{i}/{total}] Scraping: {card_name} (#{card_number})") + + soup = self.fetch_page(url) + if soup: + card = self.parse_card_page(soup, url, set_code) + if card: + cards.append(card) + self.save_card(card) + + # Rate limiting + if i < total: + time.sleep(REQUEST_DELAY) + + return cards + + def save_card(self, card: Card) -> Path: + """Save a card to a JSON file. + + Args: + card: Card object to save. + + Returns: + Path to the saved file. + """ + set_dir = self.data_dir / card.set_code + set_dir.mkdir(parents=True, exist_ok=True) + + # Generate filename: {number:03d}-{name}.json + url_name = card.id.split("-", 2)[2] # Get name part from ID + filename = f"{card.card_number:03d}-{url_name}.json" + filepath = set_dir / filename + + with open(filepath, "w", encoding="utf-8") as f: + json.dump(card.to_dict(), f, indent=2, ensure_ascii=False) + + logger.debug(f"Saved: {filepath}") + return filepath + + def generate_index(self) -> Path: + """Generate the combined index file from existing card files. + + Returns: + Path to the index file. + """ + logger.info("Generating index...") + + index: dict[str, Any] = { + "generated_at": datetime.now(timezone.utc).isoformat(), + "schema_version": "1.0", + "sets": {}, + "cards": [], + "total_cards": 0, + } + + for set_code in SETS: + set_dir = self.data_dir / set_code + if not set_dir.exists(): + continue + + card_files = sorted(set_dir.glob("*.json")) + index["sets"][set_code] = { + "name": SETS[set_code]["name"], + "card_count": len(card_files), + } + + for card_file in card_files: + relative_path = f"{set_code}/{card_file.name}" + with open(card_file, encoding="utf-8") as f: + card_data = json.load(f) + + index["cards"].append( + { + "id": card_data["id"], + "name": card_data["name"], + "set_code": set_code, + "card_number": card_data["card_number"], + "file": relative_path, + } + ) + + index["total_cards"] = len(index["cards"]) + + # Sort cards by set and number + index["cards"].sort(key=lambda x: (x["set_code"], x["card_number"])) + + index_path = self.data_dir / "_index.json" + with open(index_path, "w", encoding="utf-8") as f: + json.dump(index, f, indent=2, ensure_ascii=False) + + logger.info(f"Index generated: {index_path} ({index['total_cards']} cards)") + return index_path + + def save_errors(self) -> Path | None: + """Save error log if there were any errors. + + Returns: + Path to the error log file, or None if no errors. + """ + if not self.errors: + return None + + error_log = self.data_dir / "_errors.log" + with open(error_log, "w", encoding="utf-8") as f: + f.write(f"Scraping errors - {datetime.now(timezone.utc).isoformat()}\n") + f.write("=" * 60 + "\n\n") + for error in self.errors: + f.write(f"URL: {error['url']}\n") + f.write(f"Error: {error['error']}\n\n") + + logger.warning(f"Errors logged to: {error_log}") + return error_log + + +# ============================================================================= +# CLI +# ============================================================================= + + +def main() -> int: + """Main entry point for the scraper CLI.""" + parser = argparse.ArgumentParser( + description="Scrape Pokemon TCG Pocket card data from pokemon-zone.com", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Scrape entire set + uv run python scripts/scrape_pokemon_pocket.py --set a1 + + # Scrape with limit (for testing) + uv run python scripts/scrape_pokemon_pocket.py --set a1 --limit 5 + + # Scrape single card by ID + uv run python scripts/scrape_pokemon_pocket.py --card a1-132-gardevoir + + # Regenerate index from existing card files + uv run python scripts/scrape_pokemon_pocket.py --reindex + """, + ) + + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument( + "--set", + choices=list(SETS.keys()), + help="Scrape all cards from a set", + ) + group.add_argument( + "--card", + type=str, + help="Scrape a single card by ID (e.g., a1-132-gardevoir)", + ) + group.add_argument( + "--reindex", + action="store_true", + help="Regenerate index from existing card files", + ) + + parser.add_argument( + "--limit", + type=int, + help="Maximum number of cards to scrape (for testing)", + ) + + args = parser.parse_args() + + scraper = PokemonPocketScraper() + + if args.reindex: + scraper.generate_index() + return 0 + + if args.card: + card = scraper.scrape_card(args.card) + if card: + scraper.save_card(card) + scraper.generate_index() + logger.info(f"Successfully scraped: {card.name}") + return 0 + else: + logger.error(f"Failed to scrape card: {args.card}") + return 1 + + if args.set: + cards = scraper.scrape_set(args.set, limit=args.limit) + scraper.generate_index() + scraper.save_errors() + + success_count = len(cards) + error_count = len(scraper.errors) + total = success_count + error_count + + logger.info(f"Scraping complete: {success_count}/{total} cards succeeded") + if error_count > 0: + logger.warning(f"{error_count} errors occurred (see _errors.log)") + return 1 + return 0 + + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/backend/uv.lock b/backend/uv.lock index 5dc976d..130f9ed 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -153,6 +153,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" }, ] +[[package]] +name = "beautifulsoup4" +version = "4.14.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload-time = "2025-11-30T15:08:26.084Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" }, +] + [[package]] name = "bidict" version = "0.23.1" @@ -203,6 +216,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, ] +[[package]] +name = "charset-normalizer" +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" }, +] + [[package]] name = "click" version = "8.3.1" @@ -508,12 +578,14 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "beautifulsoup4" }, { name = "black" }, { name = "httpx" }, { name = "mypy" }, { name = "pytest" }, { name = "pytest-asyncio" }, { name = "pytest-cov" }, + { name = "requests" }, { name = "ruff" }, ] @@ -535,12 +607,14 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "beautifulsoup4", specifier = ">=4.12.0" }, { name = "black", specifier = ">=26.1.0" }, { name = "httpx", specifier = ">=0.28.1" }, { name = "mypy", specifier = ">=1.19.1" }, { name = "pytest", specifier = ">=9.0.2" }, { name = "pytest-asyncio", specifier = ">=1.3.0" }, { name = "pytest-cov", specifier = ">=7.0.0" }, + { name = "requests", specifier = ">=2.31.0" }, { name = "ruff", specifier = ">=0.14.14" }, ] @@ -941,6 +1015,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/f0/8956f8a86b20d7bb9d6ac0187cf4cd54d8065bc9a1a09eb8011d4d326596/redis-7.1.0-py3-none-any.whl", hash = "sha256:23c52b208f92b56103e17c5d06bdc1a6c2c0b3106583985a76a18f83b265de2b", size = 354159, upload-time = "2025-11-19T15:54:38.064Z" }, ] +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, +] + [[package]] name = "rsa" version = "4.9.1" @@ -1000,6 +1089,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "soupsieve" +version = "2.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" }, +] + [[package]] name = "sqlalchemy" version = "2.0.46" @@ -1076,6 +1174,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, ] +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, +] + [[package]] name = "uvicorn" version = "0.40.0"