Replace silent error hiding with explicit failures

Three changes to fail fast instead of silently degrading:

1. GameService.create_game: Raise GameCreationError when energy card
   definition not found instead of logging warning and continuing.
   A deck with missing energy cards is fundamentally broken.

2. CardService.load_all: Collect all card file load failures and raise
   CardServiceLoadError at end with comprehensive error report. Prevents
   startup with partial card data that causes cryptic runtime errors.
   New exceptions: CardLoadError, CardServiceLoadError

3. GameStateManager.recover_active_games: Return RecoveryResult dataclass
   with recovered count, failed game IDs with error messages, and total.
   Enables proper monitoring and alerting for corrupted game state.

Tests added for energy card error case. Existing tests updated for
new RecoveryResult return type.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Cal Corum 2026-01-29 18:48:06 -06:00
parent cd3cc892f4
commit 55e02ceb21
5 changed files with 183 additions and 42 deletions

View File

@ -58,6 +58,36 @@ class SetInfo(BaseModel):
card_count: int card_count: int
class CardLoadError(Exception):
"""Raised when a card definition file fails to load.
Attributes:
file_path: Path to the file that failed to load.
reason: Description of why loading failed.
"""
def __init__(self, file_path: str, reason: str) -> None:
self.file_path = file_path
self.reason = reason
super().__init__(f"Failed to load card from {file_path}: {reason}")
class CardServiceLoadError(Exception):
"""Raised when CardService fails to load one or more card files.
Collects all individual load failures for reporting.
Attributes:
failures: List of (file_path, error_message) tuples.
"""
def __init__(self, failures: list[tuple[str, str]]) -> None:
self.failures = failures
file_list = "\n ".join(f"{path}: {err}" for path, err in failures[:5])
more = f"\n ... and {len(failures) - 5} more" if len(failures) > 5 else ""
super().__init__(f"Failed to load {len(failures)} card definition(s):\n {file_list}{more}")
class CardService: class CardService:
"""Load and serve card definitions from JSON files. """Load and serve card definitions from JSON files.
@ -109,7 +139,7 @@ class CardService:
Raises: Raises:
FileNotFoundError: If the definitions directory doesn't exist. FileNotFoundError: If the definitions directory doesn't exist.
ValueError: If any card definition is invalid. CardServiceLoadError: If any card definition files fail to load.
""" """
if self._loaded: if self._loaded:
logger.warning("CardService.load_all() called but cards already loaded") logger.warning("CardService.load_all() called but cards already loaded")
@ -132,14 +162,21 @@ class CardService:
) )
) )
# Collect all failures across card types
all_failures: list[tuple[str, str]] = []
# Load Pokemon cards # Load Pokemon cards
await self._load_card_type("pokemon", CardType.POKEMON) all_failures.extend(await self._load_card_type("pokemon", CardType.POKEMON))
# Load Trainer cards # Load Trainer cards
await self._load_card_type("trainer", CardType.TRAINER) all_failures.extend(await self._load_card_type("trainer", CardType.TRAINER))
# Load Energy cards # Load Energy cards
await self._load_energy_cards() all_failures.extend(await self._load_energy_cards())
# Fail fast if any card files couldn't be loaded
if all_failures:
raise CardServiceLoadError(all_failures)
self._loaded = True self._loaded = True
logger.info( logger.info(
@ -149,17 +186,21 @@ class CardService:
f"{len(self._by_type[CardType.ENERGY])} Energy)" f"{len(self._by_type[CardType.ENERGY])} Energy)"
) )
async def _load_card_type(self, subdir: str, card_type: CardType) -> None: async def _load_card_type(self, subdir: str, card_type: CardType) -> list[tuple[str, str]]:
"""Load all cards of a specific type from a subdirectory. """Load all cards of a specific type from a subdirectory.
Args: Args:
subdir: Subdirectory name (e.g., "pokemon", "trainer"). subdir: Subdirectory name (e.g., "pokemon", "trainer").
card_type: The CardType these cards should be. card_type: The CardType these cards should be.
Returns:
List of (file_path, error_message) tuples for any files that failed.
""" """
failures: list[tuple[str, str]] = []
type_dir = self._definitions_dir / subdir type_dir = self._definitions_dir / subdir
if not type_dir.exists(): if not type_dir.exists():
logger.debug(f"Directory {type_dir} does not exist, skipping") logger.debug(f"Directory {type_dir} does not exist, skipping")
return return failures
for set_dir in type_dir.iterdir(): for set_dir in type_dir.iterdir():
if not set_dir.is_dir(): if not set_dir.is_dir():
@ -170,45 +211,62 @@ class CardService:
self._by_set[set_code] = [] self._by_set[set_code] = []
for card_file in set_dir.glob("*.json"): for card_file in set_dir.glob("*.json"):
card = await self._load_card_file(card_file) try:
if card: card = await self._load_card_file(card_file)
self._index_card(card) self._index_card(card)
except CardLoadError as e:
failures.append((e.file_path, e.reason))
async def _load_energy_cards(self) -> None: return failures
"""Load energy cards from the energy subdirectory."""
async def _load_energy_cards(self) -> list[tuple[str, str]]:
"""Load energy cards from the energy subdirectory.
Returns:
List of (file_path, error_message) tuples for any files that failed.
"""
failures: list[tuple[str, str]] = []
energy_dir = self._definitions_dir / "energy" energy_dir = self._definitions_dir / "energy"
if not energy_dir.exists(): if not energy_dir.exists():
logger.debug(f"Directory {energy_dir} does not exist, skipping") logger.debug(f"Directory {energy_dir} does not exist, skipping")
return return failures
# Energy can be in subdirectories (e.g., energy/basic/) or directly in energy/ # Energy can be in subdirectories (e.g., energy/basic/) or directly in energy/
for item in energy_dir.iterdir(): for item in energy_dir.iterdir():
if item.is_file() and item.suffix == ".json": if item.is_file() and item.suffix == ".json":
card = await self._load_card_file(item) try:
if card: card = await self._load_card_file(item)
self._index_card(card) self._index_card(card)
except CardLoadError as e:
failures.append((e.file_path, e.reason))
elif item.is_dir(): elif item.is_dir():
for card_file in item.glob("*.json"): for card_file in item.glob("*.json"):
card = await self._load_card_file(card_file) try:
if card: card = await self._load_card_file(card_file)
self._index_card(card) self._index_card(card)
except CardLoadError as e:
failures.append((e.file_path, e.reason))
async def _load_card_file(self, file_path: Path) -> CardDefinition | None: return failures
async def _load_card_file(self, file_path: Path) -> CardDefinition:
"""Load and validate a single card definition file. """Load and validate a single card definition file.
Args: Args:
file_path: Path to the JSON file. file_path: Path to the JSON file.
Returns: Returns:
CardDefinition if valid, None if loading failed. CardDefinition if valid.
Raises:
CardLoadError: If loading or validation fails.
""" """
try: try:
with open(file_path) as f: with open(file_path) as f:
card_data = json.load(f) card_data = json.load(f)
return CardDefinition.model_validate(card_data) return CardDefinition.model_validate(card_data)
except Exception as e: except Exception as e:
logger.error(f"Failed to load card from {file_path}: {e}") raise CardLoadError(str(file_path), str(e)) from e
return None
def _index_card(self, card: CardDefinition) -> None: def _index_card(self, card: CardDefinition) -> None:
"""Add a card to all indexes. """Add a card to all indexes.

View File

@ -771,19 +771,23 @@ class GameService:
# Energy card IDs follow pattern: energy-basic-{type} # Energy card IDs follow pattern: energy-basic-{type}
energy_id = f"energy-basic-{energy_type}" energy_id = f"energy-basic-{energy_type}"
energy_def = self._card_service.get_card(energy_id) energy_def = self._card_service.get_card(energy_id)
if energy_def: if energy_def is None:
energy1_cards.extend([energy_def] * qty) raise GameCreationError(
else: f"Energy card definition not found: {energy_id}. "
logger.warning(f"Energy card not found: {energy_id}") "Check that card data is loaded correctly."
)
energy1_cards.extend([energy_def] * qty)
energy2_cards = [] energy2_cards = []
for energy_type, qty in deck2_entry.energy_cards.items(): for energy_type, qty in deck2_entry.energy_cards.items():
energy_id = f"energy-basic-{energy_type}" energy_id = f"energy-basic-{energy_type}"
energy_def = self._card_service.get_card(energy_id) energy_def = self._card_service.get_card(energy_id)
if energy_def: if energy_def is None:
energy2_cards.extend([energy_def] * qty) raise GameCreationError(
else: f"Energy card definition not found: {energy_id}. "
logger.warning(f"Energy card not found: {energy_id}") "Check that card data is loaded correctly."
)
energy2_cards.extend([energy_def] * qty)
# Convert CardDefinitions to CardInstances with unique IDs # Convert CardDefinitions to CardInstances with unique IDs
deck1_instances = self._cards_to_instances(deck1_cards, p1_str, "main") deck1_instances = self._cards_to_instances(deck1_cards, p1_str, "main")

View File

@ -43,6 +43,7 @@ Example:
""" """
import logging import logging
from dataclasses import dataclass, field
from datetime import UTC, datetime from datetime import UTC, datetime
from uuid import UUID from uuid import UUID
@ -57,6 +58,22 @@ from app.db.session import get_session
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class RecoveryResult:
"""Result of game recovery operation.
Attributes:
recovered: Number of games successfully recovered.
failed: List of (game_id, error_message) tuples for failed recoveries.
total: Total number of games attempted.
"""
recovered: int
failed: tuple[tuple[str, str], ...] = field(default_factory=tuple)
total: int = 0
# Redis key patterns # Redis key patterns
GAME_KEY_PREFIX = "game:" GAME_KEY_PREFIX = "game:"
@ -376,7 +393,7 @@ class GameStateManager:
async def recover_active_games( async def recover_active_games(
self, self,
session: AsyncSession | None = None, session: AsyncSession | None = None,
) -> int: ) -> RecoveryResult:
"""Recover all active games from Postgres to Redis. """Recover all active games from Postgres to Redis.
Called on server startup to restore game state. Loads all games Called on server startup to restore game state. Loads all games
@ -386,31 +403,49 @@ class GameStateManager:
session: Optional existing session. session: Optional existing session.
Returns: Returns:
Number of games recovered. RecoveryResult with counts and any failures.
Example: Example:
@app.on_event("startup") @app.on_event("startup")
async def startup(): async def startup():
count = await game_state_manager.recover_active_games() result = await game_state_manager.recover_active_games()
logger.info(f"Recovered {count} active games") logger.info(f"Recovered {result.recovered}/{result.total} games")
if result.failed:
logger.error(f"Failed to recover {len(result.failed)} games")
""" """
async def _recover(db: AsyncSession) -> int: async def _recover(db: AsyncSession) -> RecoveryResult:
count = 0 recovered = 0
failures: list[tuple[str, str]] = []
result = await db.execute(select(ActiveGame)) result = await db.execute(select(ActiveGame))
active_games = result.scalars().all() active_games = result.scalars().all()
total = len(active_games)
for active_game in active_games: for active_game in active_games:
game_id = str(active_game.id)
try: try:
state = GameState.model_validate(active_game.game_state) state = GameState.model_validate(active_game.game_state)
await self.save_to_cache(state) await self.save_to_cache(state)
count += 1 recovered += 1
logger.debug(f"Recovered game: {active_game.id}") logger.debug(f"Recovered game: {game_id}")
except Exception as e: except Exception as e:
logger.error(f"Failed to recover game {active_game.id}: {e}") error_msg = str(e)
failures.append((game_id, error_msg))
logger.error(f"Failed to recover game {game_id}: {error_msg}")
logger.info(f"Recovered {count} active games from database") logger.info(f"Recovered {recovered}/{total} active games from database")
return count if failures:
logger.warning(
f"Failed to recover {len(failures)} games: "
f"{[gid for gid, _ in failures[:5]]}"
+ (f" and {len(failures) - 5} more" if len(failures) > 5 else "")
)
return RecoveryResult(
recovered=recovered,
failed=tuple(failures),
total=total,
)
if session: if session:
return await _recover(session) return await _recover(session)

View File

@ -455,8 +455,10 @@ class TestHighLevelOperations:
helper.store.clear() helper.store.clear()
# Recover (pass session) # Recover (pass session)
count = await manager.recover_active_games(session=db_session) result = await manager.recover_active_games(session=db_session)
assert count == 2 assert result.recovered == 2
assert result.total == 2
assert len(result.failed) == 0
# Both should be in cache now # Both should be in cache now
assert await manager.cache_exists(game1.game_id) assert await manager.cache_exists(game1.game_id)
@ -672,10 +674,15 @@ class TestAdditionalCoverage:
helper.store.clear() helper.store.clear()
# Recovery should handle the error and still recover the valid game # Recovery should handle the error and still recover the valid game
count = await manager.recover_active_games(session=db_session) result = await manager.recover_active_games(session=db_session)
# Only the valid game should be recovered (invalid one fails validation) # Only the valid game should be recovered (invalid one fails validation)
assert count == 1 assert result.recovered == 1
assert result.total == 2
assert len(result.failed) == 1
# Verify the failed game ID is reported
failed_ids = [gid for gid, _ in result.failed]
assert invalid_game_id in failed_ids
assert await manager.cache_exists(valid_game.game_id) assert await manager.cache_exists(valid_game.game_id)
assert not await manager.cache_exists(invalid_game_id) assert not await manager.cache_exists(invalid_game_id)

View File

@ -1200,6 +1200,43 @@ class TestCreateGame:
assert "No basic Pokemon" in str(exc_info.value) assert "No basic Pokemon" in str(exc_info.value)
@pytest.mark.asyncio
async def test_create_game_missing_energy_card_raises_error(
self,
mock_state_manager: AsyncMock,
mock_deck_service: AsyncMock,
) -> None:
"""Test that missing energy card definition raises GameCreationError.
If an energy card definition cannot be found (e.g., 'energy-basic-fire'
is not in the card registry), game creation should fail immediately
with a clear error rather than silently creating a broken game.
"""
from app.core.config import RulesConfig
from app.services.game_service import GameService
# CardService that returns None for energy cards (simulating missing data)
mock_card_service = MagicMock()
mock_card_service.get_card = MagicMock(return_value=None)
service = GameService(
state_manager=mock_state_manager,
card_service=mock_card_service,
)
with pytest.raises(GameCreationError) as exc_info:
await service.create_game(
player1_id=uuid4(),
player2_id=uuid4(),
deck1_id=uuid4(),
deck2_id=uuid4(),
rules_config=RulesConfig(),
deck_service=mock_deck_service,
)
assert "Energy card definition not found" in str(exc_info.value)
assert "energy-basic-" in str(exc_info.value)
class TestDefaultEngineFactory: class TestDefaultEngineFactory:
"""Tests for the _default_engine_factory method. """Tests for the _default_engine_factory method.