Replace silent error hiding with explicit failures

Three changes to fail fast instead of silently degrading:

1. GameService.create_game: Raise GameCreationError when energy card
   definition not found instead of logging warning and continuing.
   A deck with missing energy cards is fundamentally broken.

2. CardService.load_all: Collect all card file load failures and raise
   CardServiceLoadError at end with comprehensive error report. Prevents
   startup with partial card data that causes cryptic runtime errors.
   New exceptions: CardLoadError, CardServiceLoadError

3. GameStateManager.recover_active_games: Return RecoveryResult dataclass
   with recovered count, failed game IDs with error messages, and total.
   Enables proper monitoring and alerting for corrupted game state.

Tests added for energy card error case. Existing tests updated for
new RecoveryResult return type.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Cal Corum 2026-01-29 18:48:06 -06:00
parent cd3cc892f4
commit 55e02ceb21
5 changed files with 183 additions and 42 deletions

View File

@ -58,6 +58,36 @@ class SetInfo(BaseModel):
card_count: int
class CardLoadError(Exception):
"""Raised when a card definition file fails to load.
Attributes:
file_path: Path to the file that failed to load.
reason: Description of why loading failed.
"""
def __init__(self, file_path: str, reason: str) -> None:
self.file_path = file_path
self.reason = reason
super().__init__(f"Failed to load card from {file_path}: {reason}")
class CardServiceLoadError(Exception):
"""Raised when CardService fails to load one or more card files.
Collects all individual load failures for reporting.
Attributes:
failures: List of (file_path, error_message) tuples.
"""
def __init__(self, failures: list[tuple[str, str]]) -> None:
self.failures = failures
file_list = "\n ".join(f"{path}: {err}" for path, err in failures[:5])
more = f"\n ... and {len(failures) - 5} more" if len(failures) > 5 else ""
super().__init__(f"Failed to load {len(failures)} card definition(s):\n {file_list}{more}")
class CardService:
"""Load and serve card definitions from JSON files.
@ -109,7 +139,7 @@ class CardService:
Raises:
FileNotFoundError: If the definitions directory doesn't exist.
ValueError: If any card definition is invalid.
CardServiceLoadError: If any card definition files fail to load.
"""
if self._loaded:
logger.warning("CardService.load_all() called but cards already loaded")
@ -132,14 +162,21 @@ class CardService:
)
)
# Collect all failures across card types
all_failures: list[tuple[str, str]] = []
# Load Pokemon cards
await self._load_card_type("pokemon", CardType.POKEMON)
all_failures.extend(await self._load_card_type("pokemon", CardType.POKEMON))
# Load Trainer cards
await self._load_card_type("trainer", CardType.TRAINER)
all_failures.extend(await self._load_card_type("trainer", CardType.TRAINER))
# Load Energy cards
await self._load_energy_cards()
all_failures.extend(await self._load_energy_cards())
# Fail fast if any card files couldn't be loaded
if all_failures:
raise CardServiceLoadError(all_failures)
self._loaded = True
logger.info(
@ -149,17 +186,21 @@ class CardService:
f"{len(self._by_type[CardType.ENERGY])} Energy)"
)
async def _load_card_type(self, subdir: str, card_type: CardType) -> None:
async def _load_card_type(self, subdir: str, card_type: CardType) -> list[tuple[str, str]]:
"""Load all cards of a specific type from a subdirectory.
Args:
subdir: Subdirectory name (e.g., "pokemon", "trainer").
card_type: The CardType these cards should be.
Returns:
List of (file_path, error_message) tuples for any files that failed.
"""
failures: list[tuple[str, str]] = []
type_dir = self._definitions_dir / subdir
if not type_dir.exists():
logger.debug(f"Directory {type_dir} does not exist, skipping")
return
return failures
for set_dir in type_dir.iterdir():
if not set_dir.is_dir():
@ -170,45 +211,62 @@ class CardService:
self._by_set[set_code] = []
for card_file in set_dir.glob("*.json"):
card = await self._load_card_file(card_file)
if card:
try:
card = await self._load_card_file(card_file)
self._index_card(card)
except CardLoadError as e:
failures.append((e.file_path, e.reason))
async def _load_energy_cards(self) -> None:
"""Load energy cards from the energy subdirectory."""
return failures
async def _load_energy_cards(self) -> list[tuple[str, str]]:
"""Load energy cards from the energy subdirectory.
Returns:
List of (file_path, error_message) tuples for any files that failed.
"""
failures: list[tuple[str, str]] = []
energy_dir = self._definitions_dir / "energy"
if not energy_dir.exists():
logger.debug(f"Directory {energy_dir} does not exist, skipping")
return
return failures
# Energy can be in subdirectories (e.g., energy/basic/) or directly in energy/
for item in energy_dir.iterdir():
if item.is_file() and item.suffix == ".json":
card = await self._load_card_file(item)
if card:
try:
card = await self._load_card_file(item)
self._index_card(card)
except CardLoadError as e:
failures.append((e.file_path, e.reason))
elif item.is_dir():
for card_file in item.glob("*.json"):
card = await self._load_card_file(card_file)
if card:
try:
card = await self._load_card_file(card_file)
self._index_card(card)
except CardLoadError as e:
failures.append((e.file_path, e.reason))
async def _load_card_file(self, file_path: Path) -> CardDefinition | None:
return failures
async def _load_card_file(self, file_path: Path) -> CardDefinition:
"""Load and validate a single card definition file.
Args:
file_path: Path to the JSON file.
Returns:
CardDefinition if valid, None if loading failed.
CardDefinition if valid.
Raises:
CardLoadError: If loading or validation fails.
"""
try:
with open(file_path) as f:
card_data = json.load(f)
return CardDefinition.model_validate(card_data)
except Exception as e:
logger.error(f"Failed to load card from {file_path}: {e}")
return None
raise CardLoadError(str(file_path), str(e)) from e
def _index_card(self, card: CardDefinition) -> None:
"""Add a card to all indexes.

View File

@ -771,19 +771,23 @@ class GameService:
# Energy card IDs follow pattern: energy-basic-{type}
energy_id = f"energy-basic-{energy_type}"
energy_def = self._card_service.get_card(energy_id)
if energy_def:
energy1_cards.extend([energy_def] * qty)
else:
logger.warning(f"Energy card not found: {energy_id}")
if energy_def is None:
raise GameCreationError(
f"Energy card definition not found: {energy_id}. "
"Check that card data is loaded correctly."
)
energy1_cards.extend([energy_def] * qty)
energy2_cards = []
for energy_type, qty in deck2_entry.energy_cards.items():
energy_id = f"energy-basic-{energy_type}"
energy_def = self._card_service.get_card(energy_id)
if energy_def:
energy2_cards.extend([energy_def] * qty)
else:
logger.warning(f"Energy card not found: {energy_id}")
if energy_def is None:
raise GameCreationError(
f"Energy card definition not found: {energy_id}. "
"Check that card data is loaded correctly."
)
energy2_cards.extend([energy_def] * qty)
# Convert CardDefinitions to CardInstances with unique IDs
deck1_instances = self._cards_to_instances(deck1_cards, p1_str, "main")

View File

@ -43,6 +43,7 @@ Example:
"""
import logging
from dataclasses import dataclass, field
from datetime import UTC, datetime
from uuid import UUID
@ -57,6 +58,22 @@ from app.db.session import get_session
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class RecoveryResult:
"""Result of game recovery operation.
Attributes:
recovered: Number of games successfully recovered.
failed: List of (game_id, error_message) tuples for failed recoveries.
total: Total number of games attempted.
"""
recovered: int
failed: tuple[tuple[str, str], ...] = field(default_factory=tuple)
total: int = 0
# Redis key patterns
GAME_KEY_PREFIX = "game:"
@ -376,7 +393,7 @@ class GameStateManager:
async def recover_active_games(
self,
session: AsyncSession | None = None,
) -> int:
) -> RecoveryResult:
"""Recover all active games from Postgres to Redis.
Called on server startup to restore game state. Loads all games
@ -386,31 +403,49 @@ class GameStateManager:
session: Optional existing session.
Returns:
Number of games recovered.
RecoveryResult with counts and any failures.
Example:
@app.on_event("startup")
async def startup():
count = await game_state_manager.recover_active_games()
logger.info(f"Recovered {count} active games")
result = await game_state_manager.recover_active_games()
logger.info(f"Recovered {result.recovered}/{result.total} games")
if result.failed:
logger.error(f"Failed to recover {len(result.failed)} games")
"""
async def _recover(db: AsyncSession) -> int:
count = 0
async def _recover(db: AsyncSession) -> RecoveryResult:
recovered = 0
failures: list[tuple[str, str]] = []
result = await db.execute(select(ActiveGame))
active_games = result.scalars().all()
total = len(active_games)
for active_game in active_games:
game_id = str(active_game.id)
try:
state = GameState.model_validate(active_game.game_state)
await self.save_to_cache(state)
count += 1
logger.debug(f"Recovered game: {active_game.id}")
recovered += 1
logger.debug(f"Recovered game: {game_id}")
except Exception as e:
logger.error(f"Failed to recover game {active_game.id}: {e}")
error_msg = str(e)
failures.append((game_id, error_msg))
logger.error(f"Failed to recover game {game_id}: {error_msg}")
logger.info(f"Recovered {count} active games from database")
return count
logger.info(f"Recovered {recovered}/{total} active games from database")
if failures:
logger.warning(
f"Failed to recover {len(failures)} games: "
f"{[gid for gid, _ in failures[:5]]}"
+ (f" and {len(failures) - 5} more" if len(failures) > 5 else "")
)
return RecoveryResult(
recovered=recovered,
failed=tuple(failures),
total=total,
)
if session:
return await _recover(session)

View File

@ -455,8 +455,10 @@ class TestHighLevelOperations:
helper.store.clear()
# Recover (pass session)
count = await manager.recover_active_games(session=db_session)
assert count == 2
result = await manager.recover_active_games(session=db_session)
assert result.recovered == 2
assert result.total == 2
assert len(result.failed) == 0
# Both should be in cache now
assert await manager.cache_exists(game1.game_id)
@ -672,10 +674,15 @@ class TestAdditionalCoverage:
helper.store.clear()
# Recovery should handle the error and still recover the valid game
count = await manager.recover_active_games(session=db_session)
result = await manager.recover_active_games(session=db_session)
# Only the valid game should be recovered (invalid one fails validation)
assert count == 1
assert result.recovered == 1
assert result.total == 2
assert len(result.failed) == 1
# Verify the failed game ID is reported
failed_ids = [gid for gid, _ in result.failed]
assert invalid_game_id in failed_ids
assert await manager.cache_exists(valid_game.game_id)
assert not await manager.cache_exists(invalid_game_id)

View File

@ -1200,6 +1200,43 @@ class TestCreateGame:
assert "No basic Pokemon" in str(exc_info.value)
@pytest.mark.asyncio
async def test_create_game_missing_energy_card_raises_error(
self,
mock_state_manager: AsyncMock,
mock_deck_service: AsyncMock,
) -> None:
"""Test that missing energy card definition raises GameCreationError.
If an energy card definition cannot be found (e.g., 'energy-basic-fire'
is not in the card registry), game creation should fail immediately
with a clear error rather than silently creating a broken game.
"""
from app.core.config import RulesConfig
from app.services.game_service import GameService
# CardService that returns None for energy cards (simulating missing data)
mock_card_service = MagicMock()
mock_card_service.get_card = MagicMock(return_value=None)
service = GameService(
state_manager=mock_state_manager,
card_service=mock_card_service,
)
with pytest.raises(GameCreationError) as exc_info:
await service.create_game(
player1_id=uuid4(),
player2_id=uuid4(),
deck1_id=uuid4(),
deck2_id=uuid4(),
rules_config=RulesConfig(),
deck_service=mock_deck_service,
)
assert "Energy card definition not found" in str(exc_info.value)
assert "energy-basic-" in str(exc_info.value)
class TestDefaultEngineFactory:
"""Tests for the _default_engine_factory method.