Add Pokemon Pocket card scraper for offline card data

- Add scrape_pokemon_pocket.py script to fetch card data from pokemon-zone.com
- Scrapes Pokemon, Trainer, and Energy cards with full metadata
- Includes image URLs for offline caching support
- Supports --set, --card, --limit, and --reindex CLI options
- Add beautifulsoup4 and requests as dev dependencies
- Create data/cards/ directory structure for card JSON files
This commit is contained in:
Cal Corum 2026-01-26 22:52:20 -06:00
parent 3ed67ea16b
commit 2517d241ac
10 changed files with 1273 additions and 0 deletions

View File

@ -0,0 +1,3 @@
# Card data directory
# Contains scraped Pokemon TCG Pocket card data in JSON format
# Structure: {set_code}/{number}-{name}.json

View File

@ -0,0 +1,52 @@
{
"generated_at": "2026-01-27T04:45:04.962697+00:00",
"schema_version": "1.0",
"sets": {
"a1": {
"name": "Genetic Apex",
"card_count": 5
},
"a1a": {
"name": "Mythical Island",
"card_count": 0
}
},
"cards": [
{
"id": "a1-001-bulbasaur",
"name": "Bulbasaur",
"set_code": "a1",
"card_number": 1,
"file": "a1/001-bulbasaur.json"
},
{
"id": "a1-002-ivysaur",
"name": "Ivysaur",
"set_code": "a1",
"card_number": 2,
"file": "a1/002-ivysaur.json"
},
{
"id": "a1-003-venusaur",
"name": "Venusaur",
"set_code": "a1",
"card_number": 3,
"file": "a1/003-venusaur.json"
},
{
"id": "a1-004-venusaur-ex",
"name": "Venusaur ex",
"set_code": "a1",
"card_number": 4,
"file": "a1/004-venusaur-ex.json"
},
{
"id": "a1-005-caterpie",
"name": "Caterpie",
"set_code": "a1",
"card_number": 5,
"file": "a1/005-caterpie.json"
}
],
"total_cards": 5
}

View File

@ -0,0 +1,38 @@
{
"id": "a1-001-bulbasaur",
"name": "Bulbasaur",
"set_code": "a1",
"set_name": "Genetic Apex",
"card_number": 1,
"rarity": "Common",
"card_type": "pokemon",
"image_url": "https://assets.pokemon-zone.com/game-assets/CardPreviews/cPK_10_000010_00_FUSHIGIDANE_C.webp",
"source_url": "https://www.pokemon-zone.com/cards/a1/1/bulbasaur/",
"hp": 70,
"pokemon_type": "grass",
"stage": "basic",
"evolves_from": null,
"is_ex": false,
"abilities": [],
"attacks": [
{
"name": "Vine Whip",
"cost": [
"grass",
"colorless"
],
"damage": 40,
"damage_modifier": null,
"effect_text": null,
"effect_id": null
}
],
"weakness": {
"type": "fire",
"value": 20
},
"resistance": null,
"retreat_cost": 1,
"flavor_text": null,
"illustrator": "Narumi Sato"
}

View File

@ -0,0 +1,39 @@
{
"id": "a1-002-ivysaur",
"name": "Ivysaur",
"set_code": "a1",
"set_name": "Genetic Apex",
"card_number": 2,
"rarity": "Uncommon",
"card_type": "pokemon",
"image_url": "https://assets.pokemon-zone.com/game-assets/CardPreviews/cPK_10_000020_00_FUSHIGISOU_U.webp",
"source_url": "https://www.pokemon-zone.com/cards/a1/2/ivysaur/",
"hp": 90,
"pokemon_type": "grass",
"stage": "stage_1",
"evolves_from": "Bulbasaur",
"is_ex": false,
"abilities": [],
"attacks": [
{
"name": "Razor Leaf",
"cost": [
"grass",
"colorless",
"colorless"
],
"damage": 60,
"damage_modifier": null,
"effect_text": null,
"effect_id": null
}
],
"weakness": {
"type": "fire",
"value": 20
},
"resistance": null,
"retreat_cost": 2,
"flavor_text": null,
"illustrator": "Kurata So"
}

View File

@ -0,0 +1,40 @@
{
"id": "a1-003-venusaur",
"name": "Venusaur",
"set_code": "a1",
"set_name": "Genetic Apex",
"card_number": 3,
"rarity": "Rare",
"card_type": "pokemon",
"image_url": "https://assets.pokemon-zone.com/game-assets/CardPreviews/cPK_10_000030_00_FUSHIGIBANA_R.webp",
"source_url": "https://www.pokemon-zone.com/cards/a1/3/venusaur/",
"hp": 160,
"pokemon_type": "grass",
"stage": "stage_2",
"evolves_from": "Ivysaur",
"is_ex": false,
"abilities": [],
"attacks": [
{
"name": "Mega Drain",
"cost": [
"grass",
"grass",
"colorless",
"colorless"
],
"damage": 80,
"damage_modifier": null,
"effect_text": "Heal 30 damage from this Pokémon.",
"effect_id": null
}
],
"weakness": {
"type": "fire",
"value": 20
},
"resistance": null,
"retreat_cost": 3,
"flavor_text": null,
"illustrator": "Ryota Murayama"
}

View File

@ -0,0 +1,52 @@
{
"id": "a1-004-venusaur-ex",
"name": "Venusaur ex",
"set_code": "a1",
"set_name": "Genetic Apex",
"card_number": 4,
"rarity": "Double Rare",
"card_type": "pokemon",
"image_url": "https://assets.pokemon-zone.com/game-assets/CardPreviews/cPK_10_000040_00_FUSHIGIBANAex_RR.webp",
"source_url": "https://www.pokemon-zone.com/cards/a1/4/venusaur-ex/",
"hp": 190,
"pokemon_type": "grass",
"stage": "stage_2",
"evolves_from": "Ivysaur",
"is_ex": true,
"abilities": [],
"attacks": [
{
"name": "Razor Leaf",
"cost": [
"grass",
"colorless",
"colorless"
],
"damage": 60,
"damage_modifier": null,
"effect_text": null,
"effect_id": null
},
{
"name": "Giant Bloom",
"cost": [
"grass",
"grass",
"colorless",
"colorless"
],
"damage": 100,
"damage_modifier": null,
"effect_text": "Heal 30 damage from this Pokémon.",
"effect_id": null
}
],
"weakness": {
"type": "fire",
"value": 20
},
"resistance": null,
"retreat_cost": 3,
"flavor_text": null,
"illustrator": "PLANETA CG Works"
}

View File

@ -0,0 +1,37 @@
{
"id": "a1-005-caterpie",
"name": "Caterpie",
"set_code": "a1",
"set_name": "Genetic Apex",
"card_number": 5,
"rarity": "Common",
"card_type": "pokemon",
"image_url": "https://assets.pokemon-zone.com/game-assets/CardPreviews/cPK_10_000050_00_CATERPIE_C.webp",
"source_url": "https://www.pokemon-zone.com/cards/a1/5/caterpie/",
"hp": 50,
"pokemon_type": "grass",
"stage": "basic",
"evolves_from": null,
"is_ex": false,
"abilities": [],
"attacks": [
{
"name": "Find a Friend",
"cost": [
"colorless"
],
"damage": null,
"damage_modifier": null,
"effect_text": "Put 1 randomPokémon from your deck into your hand.",
"effect_id": null
}
],
"weakness": {
"type": "fire",
"value": 20
},
"resistance": null,
"retreat_cost": 1,
"flavor_text": null,
"illustrator": "Miki Tanaka"
}

View File

@ -21,12 +21,14 @@ dependencies = [
[dependency-groups]
dev = [
"beautifulsoup4>=4.12.0",
"black>=26.1.0",
"httpx>=0.28.1",
"mypy>=1.19.1",
"pytest>=9.0.2",
"pytest-asyncio>=1.3.0",
"pytest-cov>=7.0.0",
"requests>=2.31.0",
"ruff>=0.14.14",
]

View File

@ -0,0 +1,903 @@
#!/usr/bin/env python
"""Scrape Pokemon TCG Pocket card data from pokemon-zone.com.
This script fetches card data from the Genetic Apex (A1) and Mythical Island (A1a)
sets and saves them as individual JSON files for use in the Mantimon TCG game engine.
Usage:
# Scrape entire set
uv run python scripts/scrape_pokemon_pocket.py --set a1
# Scrape with limit (for testing)
uv run python scripts/scrape_pokemon_pocket.py --set a1 --limit 5
# Scrape single card by ID
uv run python scripts/scrape_pokemon_pocket.py --card a1-132-gardevoir
# Regenerate index from existing card files
uv run python scripts/scrape_pokemon_pocket.py --reindex
Output:
- Individual card files: data/cards/{set}/{number}-{name}.json
- Combined index: data/cards/_index.json
- Error log: data/cards/_errors.log
"""
import argparse
import json
import logging
import re
import sys
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
import requests
from bs4 import BeautifulSoup, Tag
# =============================================================================
# Configuration
# =============================================================================
BASE_URL = "https://www.pokemon-zone.com"
DATA_DIR = Path(__file__).parent.parent / "data" / "cards"
REQUEST_DELAY = 1.5 # seconds between requests
MAX_RETRIES = 3
RETRY_DELAY = 5 # seconds
# Set info for validation and metadata
SETS = {
"a1": {"name": "Genetic Apex", "expected_cards": 286},
"a1a": {"name": "Mythical Island", "expected_cards": 86},
}
# Energy type mapping from CSS classes
ENERGY_TYPES = {
"energy-icon--type-grass": "grass",
"energy-icon--type-fire": "fire",
"energy-icon--type-water": "water",
"energy-icon--type-lightning": "lightning",
"energy-icon--type-psychic": "psychic",
"energy-icon--type-fighting": "fighting",
"energy-icon--type-darkness": "darkness",
"energy-icon--type-metal": "metal",
"energy-icon--type-colorless": "colorless",
"energy-icon--type-dragon": "dragon",
}
# Rarity code mapping from CSS classes (rarity-icon--rarity-X)
RARITY_CODES = {
"C": "Common",
"U": "Uncommon",
"R": "Rare",
"RR": "Double Rare",
"AR": "Art Rare",
"SAR": "Special Art Rare",
"UR": "Ultra Rare",
"IM": "Immersive",
"S": "Shiny",
"CR": "Crown Rare",
}
# =============================================================================
# Logging Setup
# =============================================================================
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
datefmt="%H:%M:%S",
)
logger = logging.getLogger(__name__)
# =============================================================================
# Data Classes
# =============================================================================
@dataclass
class Attack:
"""A Pokemon's attack."""
name: str
cost: list[str]
damage: int | None
damage_modifier: str | None # "+", "x", or None
effect_text: str | None
effect_id: str | None = None # To be mapped later
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
return {
"name": self.name,
"cost": self.cost,
"damage": self.damage,
"damage_modifier": self.damage_modifier,
"effect_text": self.effect_text,
"effect_id": self.effect_id,
}
@dataclass
class Ability:
"""A Pokemon's ability."""
name: str
effect_text: str
effect_id: str | None = None # To be mapped later
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
return {
"name": self.name,
"effect_text": self.effect_text,
"effect_id": self.effect_id,
}
@dataclass
class Card:
"""Complete card data."""
id: str
name: str
set_code: str
set_name: str
card_number: int
rarity: str
card_type: str # "pokemon", "trainer", "energy"
image_url: str | None = None # URL to card image for offline caching
hp: int | None = None
pokemon_type: str | None = None
stage: str | None = None # "basic", "stage_1", "stage_2"
evolves_from: str | None = None
is_ex: bool = False
abilities: list[Ability] = field(default_factory=list)
attacks: list[Attack] = field(default_factory=list)
weakness_type: str | None = None
weakness_value: int | None = None
resistance_type: str | None = None
resistance_value: int | None = None
retreat_cost: int = 0
flavor_text: str | None = None
illustrator: str | None = None
source_url: str = ""
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
data: dict[str, Any] = {
"id": self.id,
"name": self.name,
"set_code": self.set_code,
"set_name": self.set_name,
"card_number": self.card_number,
"rarity": self.rarity,
"card_type": self.card_type,
"image_url": self.image_url,
"source_url": self.source_url,
}
if self.card_type == "pokemon":
data.update(
{
"hp": self.hp,
"pokemon_type": self.pokemon_type,
"stage": self.stage,
"evolves_from": self.evolves_from,
"is_ex": self.is_ex,
"abilities": [a.to_dict() for a in self.abilities],
"attacks": [a.to_dict() for a in self.attacks],
"weakness": (
{"type": self.weakness_type, "value": self.weakness_value}
if self.weakness_type
else None
),
"resistance": (
{"type": self.resistance_type, "value": self.resistance_value}
if self.resistance_type
else None
),
"retreat_cost": self.retreat_cost,
"flavor_text": self.flavor_text,
"illustrator": self.illustrator,
}
)
elif self.card_type == "trainer":
data.update(
{
"trainer_type": self.stage, # Reusing stage field for trainer type
"effect_text": self.flavor_text, # Trainer effect
"illustrator": self.illustrator,
}
)
elif self.card_type == "energy":
data.update(
{
"energy_type": self.pokemon_type,
"illustrator": self.illustrator,
}
)
return data
# =============================================================================
# Scraper Class
# =============================================================================
class PokemonPocketScraper:
"""Scraper for Pokemon TCG Pocket card data from pokemon-zone.com."""
def __init__(self, data_dir: Path = DATA_DIR):
"""Initialize the scraper.
Args:
data_dir: Directory to save card data files.
"""
self.data_dir = data_dir
self.session = requests.Session()
self.session.headers.update(
{
"User-Agent": "MantimonTCG-CardScraper/1.0 (https://github.com/mantimon-tcg)",
"Accept": "text/html,application/xhtml+xml",
}
)
self.errors: list[dict[str, Any]] = []
def fetch_page(self, url: str) -> BeautifulSoup | None:
"""Fetch a page with retry logic.
Args:
url: URL to fetch.
Returns:
BeautifulSoup object or None if all retries failed.
"""
for attempt in range(MAX_RETRIES):
try:
response = self.session.get(url, timeout=30)
response.raise_for_status()
return BeautifulSoup(response.text, "html.parser")
except requests.RequestException as e:
logger.warning(f"Attempt {attempt + 1}/{MAX_RETRIES} failed for {url}: {e}")
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_DELAY)
self.errors.append({"url": url, "error": "Max retries exceeded"})
return None
def get_card_urls_for_set(self, set_code: str) -> list[tuple[int, str, str]]:
"""Get all card URLs for a set.
Args:
set_code: Set code (e.g., "a1", "a1a").
Returns:
List of (card_number, card_name, url) tuples.
"""
set_url = f"{BASE_URL}/sets/{set_code}/"
logger.info(f"Fetching set page: {set_url}")
soup = self.fetch_page(set_url)
if not soup:
logger.error(f"Failed to fetch set page for {set_code}")
return []
cards: list[tuple[int, str, str]] = []
# Find all card links - they follow pattern /cards/{set}/{number}/{name}/
pattern = re.compile(rf"^/cards/{set_code}/(\d+)/([^/]+)/$")
for link in soup.find_all("a", href=pattern):
href = link.get("href", "")
match = pattern.match(href)
if match:
card_number = int(match.group(1))
card_name = match.group(2)
# Avoid duplicates (page may have multiple links to same card)
card_tuple = (card_number, card_name, f"{BASE_URL}{href}")
if card_tuple not in cards:
cards.append(card_tuple)
# Sort by card number
cards.sort(key=lambda x: x[0])
logger.info(f"Found {len(cards)} cards in set {set_code}")
return cards
def parse_energy_type(self, element: Tag | None) -> str | None:
"""Extract energy type from an element containing an energy icon.
Args:
element: BeautifulSoup element that may contain energy icons.
Returns:
Energy type string or None.
"""
if not element:
return None
for icon in element.find_all("span", class_=re.compile(r"energy-icon--type-")):
for cls in icon.get("class", []):
if cls in ENERGY_TYPES:
return ENERGY_TYPES[cls]
return None
def parse_attack(self, attack_row: Tag) -> Attack | None:
"""Parse an attack from an attack-summary-row element.
Args:
attack_row: BeautifulSoup element for the attack row.
Returns:
Attack object or None if parsing failed.
"""
try:
# Get attack name
name_elem = attack_row.find(class_="attack-summary-row__name")
if not name_elem:
return None
name = name_elem.get_text(strip=True)
# Get energy cost
cost: list[str] = []
costs_elem = attack_row.find(class_="attack-summary-row__costs")
if costs_elem:
for cost_icon in costs_elem.find_all("span", class_=re.compile(r"energy-icon")):
for cls in cost_icon.get("class", []):
if cls in ENERGY_TYPES:
cost.append(ENERGY_TYPES[cls])
# Get damage
damage: int | None = None
damage_modifier: str | None = None
damage_elem = attack_row.find(class_="attack-summary-row__damage")
if damage_elem:
damage_text = damage_elem.get_text(strip=True)
# Parse damage like "60", "50+", "100x"
match = re.match(r"(\d+)([+x])?", damage_text)
if match:
damage = int(match.group(1))
damage_modifier = match.group(2)
# Get effect text
effect_text: str | None = None
footer_elem = attack_row.find(class_="attack-summary-row__footer")
if footer_elem:
effect_text = footer_elem.get_text(strip=True)
if not effect_text:
effect_text = None
return Attack(
name=name,
cost=cost,
damage=damage,
damage_modifier=damage_modifier,
effect_text=effect_text,
)
except Exception as e:
logger.warning(f"Failed to parse attack: {e}")
return None
def parse_ability(self, ability_row: Tag) -> Ability | None:
"""Parse an ability from an ability-summary-row element.
Args:
ability_row: BeautifulSoup element for the ability row.
Returns:
Ability object or None if parsing failed.
"""
try:
# Get ability name (text after "Ability" badge)
name_elem = ability_row.find(class_="ability-summary-row__name")
if not name_elem:
return None
# Remove the "Ability" badge text to get just the name
name_text = name_elem.get_text(strip=True)
name = re.sub(r"^Ability\s*", "", name_text)
# Get effect text
desc_elem = ability_row.find(class_="ability-summary-row__description")
effect_text = desc_elem.get_text(strip=True) if desc_elem else ""
return Ability(name=name, effect_text=effect_text)
except Exception as e:
logger.warning(f"Failed to parse ability: {e}")
return None
def parse_card_page(self, soup: BeautifulSoup, url: str, set_code: str) -> Card | None:
"""Parse a card page into a Card object.
Args:
soup: BeautifulSoup object of the card page.
url: URL of the card page (for error logging).
set_code: Set code for this card.
Returns:
Card object or None if parsing failed.
"""
try:
# Extract card number and name from URL
match = re.search(rf"/cards/{set_code}/(\d+)/([^/]+)/", url)
if not match:
logger.error(f"Could not parse card URL: {url}")
return None
card_number = int(match.group(1))
url_name = match.group(2)
# Get card name from page
name_elem = soup.find("h1")
if not name_elem:
logger.error(f"Could not find card name on page: {url}")
return None
name = name_elem.get_text(strip=True)
# Determine card type
card_type = "pokemon"
type_info = soup.find(string=re.compile(r"Pokémon|Trainer|Energy", re.IGNORECASE))
if type_info:
type_text = str(type_info).lower()
if "trainer" in type_text:
card_type = "trainer"
elif "energy" in type_text:
card_type = "energy"
# Create card ID
card_id = f"{set_code}-{card_number:03d}-{url_name}"
# Get rarity from CSS class (rarity-icon--rarity-X)
rarity = "Unknown"
rarity_icon = soup.find("span", class_="rarity-icon")
if rarity_icon:
for cls in rarity_icon.get("class", []):
if "rarity-icon--rarity-" in cls:
rarity_code = cls.replace("rarity-icon--rarity-", "")
rarity = RARITY_CODES.get(rarity_code, rarity_code)
break
# Get card image URL (first image in card-detail__card section)
image_url: str | None = None
card_section = soup.find("div", class_="card-detail__card")
if card_section:
img = card_section.find("img")
if img:
image_url = img.get("src")
# Remove query params to get full resolution
if image_url and "?" in image_url:
image_url = image_url.split("?")[0]
# Initialize card
card = Card(
id=card_id,
name=name,
set_code=set_code,
set_name=SETS.get(set_code, {}).get("name", set_code),
card_number=card_number,
rarity=rarity,
card_type=card_type,
image_url=image_url,
source_url=url,
is_ex="ex" in name.lower(),
)
if card_type == "pokemon":
self._parse_pokemon_details(soup, card)
elif card_type == "trainer":
self._parse_trainer_details(soup, card)
elif card_type == "energy":
self._parse_energy_details(soup, card)
return card
except Exception as e:
logger.error(f"Failed to parse card page {url}: {e}")
self.errors.append({"url": url, "error": str(e)})
return None
def _parse_pokemon_details(self, soup: BeautifulSoup, card: Card) -> None:
"""Parse Pokemon-specific details from the page.
Args:
soup: BeautifulSoup object of the card page.
card: Card object to populate.
"""
# Get HP
hp_match = soup.find(string=re.compile(r"HP\s*(\d+)", re.IGNORECASE))
if hp_match:
hp_num = re.search(r"(\d+)", str(hp_match))
if hp_num:
card.hp = int(hp_num.group(1))
else:
# Try finding HP in the stat display
hp_elem = soup.find("span", string="HP")
if hp_elem:
hp_value = hp_elem.find_next("span")
if hp_value:
hp_text = hp_value.get_text(strip=True)
hp_num = re.search(r"(\d+)", hp_text)
if hp_num:
card.hp = int(hp_num.group(1))
# Get Pokemon type from first energy icon NOT in an attack row
# The card's type icon is in the header area, not in attack-summary-row__cost
for icon in soup.find_all("span", class_=re.compile(r"energy-icon--type-")):
parent = icon.parent
parent_classes = parent.get("class", []) if parent else []
# Skip if this is an attack cost icon
if "attack-summary-row__cost" not in parent_classes:
for cls in icon.get("class", []):
if cls in ENERGY_TYPES:
card.pokemon_type = ENERGY_TYPES[cls]
break
if card.pokemon_type:
break
# Get stage and evolution info
stage_text = soup.find(string=re.compile(r"Basic|Stage 1|Stage 2", re.IGNORECASE))
if stage_text:
stage_lower = str(stage_text).lower()
if "stage 2" in stage_lower:
card.stage = "stage_2"
elif "stage 1" in stage_lower:
card.stage = "stage_1"
elif "basic" in stage_lower:
card.stage = "basic"
# Get evolves_from
evolves_match = soup.find(string=re.compile(r"Evolves from", re.IGNORECASE))
if evolves_match:
# Try to find the Pokemon name link nearby
parent = evolves_match.parent if hasattr(evolves_match, "parent") else None
if parent:
link = parent.find("a")
if link:
card.evolves_from = link.get_text(strip=True)
# Get abilities
for ability_row in soup.find_all(class_="ability-summary-row"):
ability = self.parse_ability(ability_row)
if ability:
card.abilities.append(ability)
# Get attacks
for attack_row in soup.find_all(class_="attack-summary-row"):
attack = self.parse_attack(attack_row)
if attack:
card.attacks.append(attack)
# Get weakness
weakness_section = soup.find(string=re.compile(r"Weakness", re.IGNORECASE))
if weakness_section:
parent = weakness_section.parent
if parent:
card.weakness_type = self.parse_energy_type(parent.parent)
# Look for +20 pattern
value_match = re.search(
r"\+(\d+)", parent.parent.get_text() if parent.parent else ""
)
if value_match:
card.weakness_value = int(value_match.group(1))
# Get retreat cost (count colorless energy icons in retreat section)
retreat_section = soup.find(string=re.compile(r"Retreat", re.IGNORECASE))
if retreat_section:
parent = retreat_section.parent
if parent and parent.parent:
retreat_icons = parent.parent.find_all(
"span", class_=re.compile(r"energy-icon--type-colorless")
)
card.retreat_cost = len(retreat_icons)
# Get illustrator
illustrator_match = soup.find(string=re.compile(r"Illustrated by", re.IGNORECASE))
if illustrator_match:
card.illustrator = str(illustrator_match).replace("Illustrated by", "").strip()
# Get flavor text (Pokemon description)
# This is usually in a paragraph after the attacks section
for p in soup.find_all("p"):
text = p.get_text(strip=True)
if text and len(text) > 50 and "Illustrated" not in text and "Artwork" not in text:
card.flavor_text = text
break
def _parse_trainer_details(self, soup: BeautifulSoup, card: Card) -> None:
"""Parse Trainer-specific details from the page.
Args:
soup: BeautifulSoup object of the card page.
card: Card object to populate.
"""
# Get trainer type (Item, Supporter, Stadium)
type_match = soup.find(string=re.compile(r"Item|Supporter|Stadium", re.IGNORECASE))
if type_match:
card.stage = str(type_match).strip().lower()
# Get effect text (main card text for trainers)
# Usually in a description section
for elem in soup.find_all(["p", "div"]):
text = elem.get_text(strip=True)
if text and len(text) > 20 and "Illustrated" not in text:
card.flavor_text = text
break
# Get illustrator
illustrator_match = soup.find(string=re.compile(r"Illustrated by", re.IGNORECASE))
if illustrator_match:
card.illustrator = str(illustrator_match).replace("Illustrated by", "").strip()
def _parse_energy_details(self, soup: BeautifulSoup, card: Card) -> None:
"""Parse Energy-specific details from the page.
Args:
soup: BeautifulSoup object of the card page.
card: Card object to populate.
"""
# Get energy type from the page
card.pokemon_type = self.parse_energy_type(soup)
# Get illustrator
illustrator_match = soup.find(string=re.compile(r"Illustrated by", re.IGNORECASE))
if illustrator_match:
card.illustrator = str(illustrator_match).replace("Illustrated by", "").strip()
def scrape_card(self, card_id: str) -> Card | None:
"""Scrape a single card by ID.
Args:
card_id: Card ID in format "{set}-{number}-{name}" (e.g., "a1-132-gardevoir").
Returns:
Card object or None if scraping failed.
"""
# Parse card ID
match = re.match(r"([a-z0-9]+)-(\d+)-(.+)", card_id)
if not match:
logger.error(f"Invalid card ID format: {card_id}")
return None
set_code = match.group(1)
card_number = int(match.group(2))
card_name = match.group(3)
url = f"{BASE_URL}/cards/{set_code}/{card_number}/{card_name}/"
logger.info(f"Scraping card: {url}")
soup = self.fetch_page(url)
if not soup:
return None
return self.parse_card_page(soup, url, set_code)
def scrape_set(self, set_code: str, limit: int | None = None) -> list[Card]:
"""Scrape all cards from a set.
Args:
set_code: Set code (e.g., "a1", "a1a").
limit: Maximum number of cards to scrape (for testing).
Returns:
List of Card objects.
"""
if set_code not in SETS:
logger.error(f"Unknown set code: {set_code}")
return []
card_urls = self.get_card_urls_for_set(set_code)
if limit:
card_urls = card_urls[:limit]
cards: list[Card] = []
total = len(card_urls)
for i, (card_number, card_name, url) in enumerate(card_urls, 1):
logger.info(f"[{i}/{total}] Scraping: {card_name} (#{card_number})")
soup = self.fetch_page(url)
if soup:
card = self.parse_card_page(soup, url, set_code)
if card:
cards.append(card)
self.save_card(card)
# Rate limiting
if i < total:
time.sleep(REQUEST_DELAY)
return cards
def save_card(self, card: Card) -> Path:
"""Save a card to a JSON file.
Args:
card: Card object to save.
Returns:
Path to the saved file.
"""
set_dir = self.data_dir / card.set_code
set_dir.mkdir(parents=True, exist_ok=True)
# Generate filename: {number:03d}-{name}.json
url_name = card.id.split("-", 2)[2] # Get name part from ID
filename = f"{card.card_number:03d}-{url_name}.json"
filepath = set_dir / filename
with open(filepath, "w", encoding="utf-8") as f:
json.dump(card.to_dict(), f, indent=2, ensure_ascii=False)
logger.debug(f"Saved: {filepath}")
return filepath
def generate_index(self) -> Path:
"""Generate the combined index file from existing card files.
Returns:
Path to the index file.
"""
logger.info("Generating index...")
index: dict[str, Any] = {
"generated_at": datetime.now(timezone.utc).isoformat(),
"schema_version": "1.0",
"sets": {},
"cards": [],
"total_cards": 0,
}
for set_code in SETS:
set_dir = self.data_dir / set_code
if not set_dir.exists():
continue
card_files = sorted(set_dir.glob("*.json"))
index["sets"][set_code] = {
"name": SETS[set_code]["name"],
"card_count": len(card_files),
}
for card_file in card_files:
relative_path = f"{set_code}/{card_file.name}"
with open(card_file, encoding="utf-8") as f:
card_data = json.load(f)
index["cards"].append(
{
"id": card_data["id"],
"name": card_data["name"],
"set_code": set_code,
"card_number": card_data["card_number"],
"file": relative_path,
}
)
index["total_cards"] = len(index["cards"])
# Sort cards by set and number
index["cards"].sort(key=lambda x: (x["set_code"], x["card_number"]))
index_path = self.data_dir / "_index.json"
with open(index_path, "w", encoding="utf-8") as f:
json.dump(index, f, indent=2, ensure_ascii=False)
logger.info(f"Index generated: {index_path} ({index['total_cards']} cards)")
return index_path
def save_errors(self) -> Path | None:
"""Save error log if there were any errors.
Returns:
Path to the error log file, or None if no errors.
"""
if not self.errors:
return None
error_log = self.data_dir / "_errors.log"
with open(error_log, "w", encoding="utf-8") as f:
f.write(f"Scraping errors - {datetime.now(timezone.utc).isoformat()}\n")
f.write("=" * 60 + "\n\n")
for error in self.errors:
f.write(f"URL: {error['url']}\n")
f.write(f"Error: {error['error']}\n\n")
logger.warning(f"Errors logged to: {error_log}")
return error_log
# =============================================================================
# CLI
# =============================================================================
def main() -> int:
"""Main entry point for the scraper CLI."""
parser = argparse.ArgumentParser(
description="Scrape Pokemon TCG Pocket card data from pokemon-zone.com",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Scrape entire set
uv run python scripts/scrape_pokemon_pocket.py --set a1
# Scrape with limit (for testing)
uv run python scripts/scrape_pokemon_pocket.py --set a1 --limit 5
# Scrape single card by ID
uv run python scripts/scrape_pokemon_pocket.py --card a1-132-gardevoir
# Regenerate index from existing card files
uv run python scripts/scrape_pokemon_pocket.py --reindex
""",
)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument(
"--set",
choices=list(SETS.keys()),
help="Scrape all cards from a set",
)
group.add_argument(
"--card",
type=str,
help="Scrape a single card by ID (e.g., a1-132-gardevoir)",
)
group.add_argument(
"--reindex",
action="store_true",
help="Regenerate index from existing card files",
)
parser.add_argument(
"--limit",
type=int,
help="Maximum number of cards to scrape (for testing)",
)
args = parser.parse_args()
scraper = PokemonPocketScraper()
if args.reindex:
scraper.generate_index()
return 0
if args.card:
card = scraper.scrape_card(args.card)
if card:
scraper.save_card(card)
scraper.generate_index()
logger.info(f"Successfully scraped: {card.name}")
return 0
else:
logger.error(f"Failed to scrape card: {args.card}")
return 1
if args.set:
cards = scraper.scrape_set(args.set, limit=args.limit)
scraper.generate_index()
scraper.save_errors()
success_count = len(cards)
error_count = len(scraper.errors)
total = success_count + error_count
logger.info(f"Scraping complete: {success_count}/{total} cards succeeded")
if error_count > 0:
logger.warning(f"{error_count} errors occurred (see _errors.log)")
return 1
return 0
return 1
if __name__ == "__main__":
sys.exit(main())

107
backend/uv.lock generated
View File

@ -153,6 +153,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" },
]
[[package]]
name = "beautifulsoup4"
version = "4.14.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "soupsieve" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload-time = "2025-11-30T15:08:26.084Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" },
]
[[package]]
name = "bidict"
version = "0.23.1"
@ -203,6 +216,63 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
]
[[package]]
name = "charset-normalizer"
version = "3.4.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
{ url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
{ url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
{ url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
{ url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
{ url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
{ url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
{ url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
{ url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
{ url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
{ url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
{ url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
{ url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
{ url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
{ url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
{ url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
{ url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" },
{ url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" },
{ url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" },
{ url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" },
{ url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" },
{ url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" },
{ url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" },
{ url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" },
{ url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" },
{ url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" },
{ url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" },
{ url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" },
{ url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" },
{ url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" },
{ url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" },
{ url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" },
{ url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" },
{ url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" },
{ url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" },
{ url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" },
{ url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" },
{ url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" },
{ url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" },
{ url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" },
{ url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" },
{ url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" },
{ url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" },
{ url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" },
{ url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" },
{ url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" },
{ url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" },
{ url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" },
{ url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
]
[[package]]
name = "click"
version = "8.3.1"
@ -508,12 +578,14 @@ dependencies = [
[package.dev-dependencies]
dev = [
{ name = "beautifulsoup4" },
{ name = "black" },
{ name = "httpx" },
{ name = "mypy" },
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "pytest-cov" },
{ name = "requests" },
{ name = "ruff" },
]
@ -535,12 +607,14 @@ requires-dist = [
[package.metadata.requires-dev]
dev = [
{ name = "beautifulsoup4", specifier = ">=4.12.0" },
{ name = "black", specifier = ">=26.1.0" },
{ name = "httpx", specifier = ">=0.28.1" },
{ name = "mypy", specifier = ">=1.19.1" },
{ name = "pytest", specifier = ">=9.0.2" },
{ name = "pytest-asyncio", specifier = ">=1.3.0" },
{ name = "pytest-cov", specifier = ">=7.0.0" },
{ name = "requests", specifier = ">=2.31.0" },
{ name = "ruff", specifier = ">=0.14.14" },
]
@ -941,6 +1015,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/89/f0/8956f8a86b20d7bb9d6ac0187cf4cd54d8065bc9a1a09eb8011d4d326596/redis-7.1.0-py3-none-any.whl", hash = "sha256:23c52b208f92b56103e17c5d06bdc1a6c2c0b3106583985a76a18f83b265de2b", size = 354159, upload-time = "2025-11-19T15:54:38.064Z" },
]
[[package]]
name = "requests"
version = "2.32.5"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "charset-normalizer" },
{ name = "idna" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
]
[[package]]
name = "rsa"
version = "4.9.1"
@ -1000,6 +1089,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
]
[[package]]
name = "soupsieve"
version = "2.8.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" },
]
[[package]]
name = "sqlalchemy"
version = "2.0.46"
@ -1076,6 +1174,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
]
[[package]]
name = "urllib3"
version = "2.6.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
]
[[package]]
name = "uvicorn"
version = "0.40.0"