paper-dynasty-database/tests/test_postgame_refractor.py

"""Integration tests for WP-13: Post-Game Callback Integration.

Tests cover both post-game callback endpoints:
  POST /api/v2/season-stats/update-game/{game_id}
  POST /api/v2/refractor/evaluate-game/{game_id}

All tests run against a named shared-memory SQLite database so that Peewee
model queries inside the route handlers (which execute in the TestClient's
thread) and test fixture setup/assertions (which execute in the pytest thread)
use the same underlying database connection.  This is necessary because
SQLite :memory: databases are per-connection — a new thread gets a new empty
database unless a shared-cache URI is used.

The WP-13 tests therefore manage their own database fixture (_wp13_db) and do
not use the conftest autouse setup_test_db.  The module-level setup_wp13_db
fixture creates tables before each test and drops them after.

The season_stats service 'db' reference is patched at module level so that
db.atomic() inside update_season_stats() operates on _wp13_db.

Test matrix:
  test_update_game_creates_season_stats_rows
      POST to update-game, assert player_season_stats rows are created.
  test_update_game_response_shape
      Response contains {"updated": N, "skipped": false}.
  test_update_game_idempotent
      Second POST to same game_id returns skipped=true, stats unchanged.
  test_evaluate_game_increases_current_value
      After update-game, POST to evaluate-game, assert current_value > 0.
  test_evaluate_game_tier_advancement
      Set up card near tier threshold, game pushes past it, assert tier advanced.
  test_evaluate_game_no_tier_advancement
      Player accumulates too few stats — tier stays at 0.
  test_evaluate_game_tier_ups_in_response
      Tier-up appears in tier_ups list with correct fields.
  test_evaluate_game_skips_players_without_state
      Players in game but without RefractorCardState are silently skipped.
  test_auth_required_update_game
      Missing bearer token returns 401 on update-game.
  test_auth_required_evaluate_game
      Missing bearer token returns 401 on evaluate-game.
"""

import os

# Set API_TOKEN before any app imports so that app.dependencies.AUTH_TOKEN
# is initialised to the same value as our test bearer token.
os.environ.setdefault("API_TOKEN", "test-token")

import app.services.season_stats as _season_stats_module
import pytest
from fastapi import FastAPI, Request
from fastapi.testclient import TestClient
from peewee import SqliteDatabase

from app.db_engine import (
    Cardset,
    RefractorCardState,
    RefractorCosmetic,
    RefractorTierBoost,
    RefractorTrack,
    MlbPlayer,
    Pack,
    PackType,
    Player,
    BattingSeasonStats,
    PitchingSeasonStats,
    ProcessedGame,
    Rarity,
    Roster,
    RosterSlot,
    ScoutClaim,
    ScoutOpportunity,
    StratGame,
    StratPlay,
    Decision,
    Team,
    Card,
    Event,
)

# ---------------------------------------------------------------------------
# Shared-memory SQLite database for WP-13 tests.
# A named shared-memory URI allows multiple connections (and therefore
# multiple threads) to share the same in-memory database, which is required
# because TestClient routes run in a different thread than pytest fixtures.
# ---------------------------------------------------------------------------
_wp13_db = SqliteDatabase(
    "file:wp13test?mode=memory&cache=shared",
    uri=True,
    pragmas={"foreign_keys": 1},
)

_WP13_MODELS = [
    Rarity,
    Event,
    Cardset,
    MlbPlayer,
    Player,
    Team,
    PackType,
    Pack,
    Card,
    Roster,
    RosterSlot,
    StratGame,
    StratPlay,
    Decision,
    ScoutOpportunity,
    ScoutClaim,
    BattingSeasonStats,
    PitchingSeasonStats,
    ProcessedGame,
    RefractorTrack,
    RefractorCardState,
    RefractorTierBoost,
    RefractorCosmetic,
]

# Patch the service-layer 'db' reference to use our shared test database so
# that db.atomic() in update_season_stats() operates on the same connection.
_season_stats_module.db = _wp13_db

# ---------------------------------------------------------------------------
# Auth header used by every authenticated request
# ---------------------------------------------------------------------------
AUTH_HEADER = {"Authorization": "Bearer test-token"}


# ---------------------------------------------------------------------------
# Database fixture — binds all models to _wp13_db and creates/drops tables
# ---------------------------------------------------------------------------


@pytest.fixture(autouse=True)
def setup_wp13_db():
    """Bind WP-13 models to the shared-memory SQLite db and create tables.

    autouse=True so every test in this module automatically gets a fresh
    schema.  Tables are dropped in reverse dependency order after each test.

    This fixture replaces (and disables) the conftest autouse setup_test_db
    for tests in this module because we need a different database backend
    (shared-cache URI rather than :memory:) to support multi-thread access
    via TestClient.
    """
    _wp13_db.bind(_WP13_MODELS)
    _wp13_db.connect(reuse_if_open=True)
    _wp13_db.create_tables(_WP13_MODELS)
    yield _wp13_db
    _wp13_db.drop_tables(list(reversed(_WP13_MODELS)), safe=True)


# ---------------------------------------------------------------------------
# Slim test app — only mounts the two routers under test.
# A db_middleware ensures the shared-cache connection is open for each request.
# ---------------------------------------------------------------------------


def _build_test_app() -> FastAPI:
    """Build a minimal FastAPI instance with just the WP-13 routers.

    A db_middleware calls _wp13_db.connect(reuse_if_open=True) before each
    request so that the route handler thread can use the shared-memory SQLite
    connection even though it runs in a different thread from the fixture.
    """
    from app.routers_v2.season_stats import router as ss_router
    from app.routers_v2.refractor import router as evo_router

    test_app = FastAPI()

    @test_app.middleware("http")
    async def db_middleware(request: Request, call_next):
        _wp13_db.connect(reuse_if_open=True)
        return await call_next(request)

    test_app.include_router(ss_router)
    test_app.include_router(evo_router)
    return test_app


# ---------------------------------------------------------------------------
# TestClient fixture — function-scoped so it uses the per-test db binding.
# ---------------------------------------------------------------------------


@pytest.fixture
def client(setup_wp13_db):
    """FastAPI TestClient backed by the slim test app and shared-memory SQLite."""
    with TestClient(_build_test_app()) as c:
        yield c


# ---------------------------------------------------------------------------
# Shared helper factories (mirrors test_season_stats_update.py style)
# ---------------------------------------------------------------------------


def _make_cardset():
    cs, _ = Cardset.get_or_create(
        name="WP13 Test Set",
        defaults={"description": "wp13 cardset", "total_cards": 100},
    )
    return cs


def _make_rarity():
    r, _ = Rarity.get_or_create(value=1, name="Common", defaults={"color": "#ffffff"})
    return r


def _make_player(name: str, pos: str = "1B") -> Player:
    return Player.create(
        p_name=name,
        rarity=_make_rarity(),
        cardset=_make_cardset(),
        set_num=1,
        pos_1=pos,
        image="https://example.com/img.png",
        mlbclub="TST",
        franchise="TST",
        description=f"wp13 test: {name}",
    )


def _make_team(abbrev: str, gmid: int) -> Team:
    return Team.create(
        abbrev=abbrev,
        sname=abbrev,
        lname=f"Team {abbrev}",
        gmid=gmid,
        gmname=f"gm_{abbrev.lower()}",
        gsheet="https://docs.google.com/spreadsheets/wp13",
        wallet=500,
        team_value=1000,
        collection_value=1000,
        season=11,
        is_ai=False,
    )


def _make_game(team_a, team_b) -> StratGame:
    return StratGame.create(
        season=11,
        game_type="ranked",
        away_team=team_a,
        home_team=team_b,
    )


def _make_play(game, play_num, batter, batter_team, pitcher, pitcher_team, **stats):
    """Create a StratPlay with sensible zero-defaults for all stat columns."""
    defaults = dict(
        on_base_code="000",
        inning_half="top",
        inning_num=1,
        batting_order=1,
        starting_outs=0,
        away_score=0,
        home_score=0,
        pa=0,
        ab=0,
        hit=0,
        run=0,
        double=0,
        triple=0,
        homerun=0,
        bb=0,
        so=0,
        hbp=0,
        rbi=0,
        sb=0,
        cs=0,
        outs=0,
        sac=0,
        ibb=0,
        gidp=0,
        bphr=0,
        bpfo=0,
        bp1b=0,
        bplo=0,
    )
    defaults.update(stats)
    return StratPlay.create(
        game=game,
        play_num=play_num,
        batter=batter,
        batter_team=batter_team,
        pitcher=pitcher,
        pitcher_team=pitcher_team,
        **defaults,
    )


def _make_track(
    name: str = "WP13 Batter Track", card_type: str = "batter"
) -> RefractorTrack:
    track, _ = RefractorTrack.get_or_create(
        name=name,
        defaults=dict(
            card_type=card_type,
            formula="pa + tb * 2",
            t1_threshold=37,
            t2_threshold=149,
            t3_threshold=448,
            t4_threshold=896,
        ),
    )
    return track


def _make_state(
    player, team, track, current_tier=0, current_value=0.0
) -> RefractorCardState:
    return RefractorCardState.create(
        player=player,
        team=team,
        track=track,
        current_tier=current_tier,
        current_value=current_value,
        fully_evolved=False,
        last_evaluated_at=None,
    )


# ---------------------------------------------------------------------------
# Tests: POST /api/v2/season-stats/update-game/{game_id}
# ---------------------------------------------------------------------------


def test_update_game_creates_season_stats_rows(client):
    """POST update-game creates player_season_stats rows for players in the game.

    What: Set up a batter and pitcher in a game with 3 PA for the batter.
    After the endpoint call, assert a BattingSeasonStats row exists with pa=3.

    Why: This is the core write path.  If the row is not created, the
    evolution evaluator will always see zero career stats.
    """
    team_a = _make_team("WU1", gmid=20001)
    team_b = _make_team("WU2", gmid=20002)
    batter = _make_player("WP13 Batter A")
    pitcher = _make_player("WP13 Pitcher A", pos="SP")
    game = _make_game(team_a, team_b)

    for i in range(3):
        _make_play(game, i + 1, batter, team_a, pitcher, team_b, pa=1, ab=1, outs=1)

    resp = client.post(
        f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER
    )
    assert resp.status_code == 200

    stats = BattingSeasonStats.get_or_none(
        (BattingSeasonStats.player == batter)
        & (BattingSeasonStats.team == team_a)
        & (BattingSeasonStats.season == 11)
    )
    assert stats is not None
    assert stats.pa == 3


def test_update_game_response_shape(client):
    """POST update-game returns {"updated": N, "skipped": false}.

    What: A game with one batter and one pitcher produces updated >= 1 and
    skipped is false on the first call.

    Why: The bot relies on 'updated' to log how many rows were touched and
    'skipped' to detect re-delivery.
    """
    team_a = _make_team("WS1", gmid=20011)
    team_b = _make_team("WS2", gmid=20012)
    batter = _make_player("WP13 Batter S")
    pitcher = _make_player("WP13 Pitcher S", pos="SP")
    game = _make_game(team_a, team_b)

    _make_play(game, 1, batter, team_a, pitcher, team_b, pa=1, ab=1, outs=1)

    resp = client.post(
        f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER
    )
    assert resp.status_code == 200
    data = resp.json()

    assert "updated" in data
    assert data["updated"] >= 1
    assert data["skipped"] is False


def test_update_game_idempotent(client):
    """Calling update-game twice for the same game returns skipped=true on second call.

    What: Process a game once (pa=3), then call the endpoint again with the
    same game_id.  The second response must have skipped=true and updated=0,
    and pa in the DB must still be 3 (not 6).

    Why: The bot infrastructure may deliver game-complete events more than
    once.  Double-counting would corrupt all evolution stats downstream.
    """
    team_a = _make_team("WI1", gmid=20021)
    team_b = _make_team("WI2", gmid=20022)
    batter = _make_player("WP13 Batter I")
    pitcher = _make_player("WP13 Pitcher I", pos="SP")
    game = _make_game(team_a, team_b)

    for i in range(3):
        _make_play(game, i + 1, batter, team_a, pitcher, team_b, pa=1, ab=1, outs=1)

    resp1 = client.post(
        f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER
    )
    assert resp1.status_code == 200
    assert resp1.json()["skipped"] is False

    resp2 = client.post(
        f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER
    )
    assert resp2.status_code == 200
    data2 = resp2.json()
    assert data2["skipped"] is True
    assert data2["updated"] == 0

    stats = BattingSeasonStats.get(
        (BattingSeasonStats.player == batter) & (BattingSeasonStats.team == team_a)
    )
    assert stats.pa == 3  # not 6


# ---------------------------------------------------------------------------
# Tests: POST /api/v2/refractor/evaluate-game/{game_id}
# ---------------------------------------------------------------------------


def test_evaluate_game_increases_current_value(client):
    """After update-game, evaluate-game raises the card's current_value above 0.

    What: Batter with a RefractorCardState gets 3 hits (pa=3, hit=3) from a
    game.  update-game writes those stats; evaluate-game then recomputes the
    value.  current_value in the DB must be > 0 after the evaluate call.

    Why: This is the end-to-end path: stats in -> evaluate -> value updated.
    If current_value stays 0, the card will never advance regardless of how
    many games are played.
    """
    team_a = _make_team("WE1", gmid=20031)
    team_b = _make_team("WE2", gmid=20032)
    batter = _make_player("WP13 Batter E")
    pitcher = _make_player("WP13 Pitcher E", pos="SP")
    game = _make_game(team_a, team_b)
    track = _make_track()
    _make_state(batter, team_a, track)

    for i in range(3):
        _make_play(
            game, i + 1, batter, team_a, pitcher, team_b, pa=1, ab=1, hit=1, outs=0
        )

    client.post(f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER)
    resp = client.post(
        f"/api/v2/refractor/evaluate-game/{game.id}", headers=AUTH_HEADER
    )
    assert resp.status_code == 200

    state = RefractorCardState.get(
        (RefractorCardState.player == batter) & (RefractorCardState.team == team_a)
    )
    assert state.current_value > 0


def test_evaluate_game_tier_advancement(client):
    """A game that pushes a card past a tier threshold advances the tier.

    What: Set the batter's career value just below T1 (37) by manually seeding
    a prior BattingSeasonStats row with pa=34.  Then add a game that brings the
    total past 37 and call evaluate-game.  current_tier must advance to >= 1.

    Why: Tier advancement is the core deliverable of card evolution.  If the
    threshold comparison is off-by-one or the tier is never written, the card
    will never visually evolve.
    """
    team_a = _make_team("WT1", gmid=20041)
    team_b = _make_team("WT2", gmid=20042)
    batter = _make_player("WP13 Batter T")
    pitcher = _make_player("WP13 Pitcher T", pos="SP")
    game = _make_game(team_a, team_b)
    track = _make_track(name="WP13 Tier Adv Track")
    _make_state(batter, team_a, track, current_tier=0, current_value=34.0)

    # Seed prior stats: 34 PA (value = 34; T1 threshold = 37)
    BattingSeasonStats.create(
        player=batter,
        team=team_a,
        season=10,  # previous season
        pa=34,
    )

    # Game adds 4 more PA (total pa=38 > T1=37)
    for i in range(4):
        _make_play(game, i + 1, batter, team_a, pitcher, team_b, pa=1, ab=1, outs=1)

    client.post(f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER)
    resp = client.post(
        f"/api/v2/refractor/evaluate-game/{game.id}", headers=AUTH_HEADER
    )
    assert resp.status_code == 200

    updated_state = RefractorCardState.get(
        (RefractorCardState.player == batter) & (RefractorCardState.team == team_a)
    )
    assert updated_state.current_tier >= 1


def test_evaluate_game_no_tier_advancement(client):
    """A game with insufficient stats does not advance the tier.

    What: A batter starts at tier=0 with current_value=0.  The game adds only
    2 PA (value=2 which is < T1 threshold of 37).  After evaluate-game the
    tier must still be 0.

    Why: We need to confirm the threshold guard works correctly — cards should
    not advance prematurely before earning the required stats.
    """
    team_a = _make_team("WN1", gmid=20051)
    team_b = _make_team("WN2", gmid=20052)
    batter = _make_player("WP13 Batter N")
    pitcher = _make_player("WP13 Pitcher N", pos="SP")
    game = _make_game(team_a, team_b)
    track = _make_track(name="WP13 No-Adv Track")
    _make_state(batter, team_a, track, current_tier=0)

    # Only 2 PA — far below T1=37
    for i in range(2):
        _make_play(game, i + 1, batter, team_a, pitcher, team_b, pa=1, ab=1, outs=1)

    client.post(f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER)
    resp = client.post(
        f"/api/v2/refractor/evaluate-game/{game.id}", headers=AUTH_HEADER
    )
    assert resp.status_code == 200
    data = resp.json()

    assert data["tier_ups"] == []

    state = RefractorCardState.get(
        (RefractorCardState.player == batter) & (RefractorCardState.team == team_a)
    )
    assert state.current_tier == 0


def test_evaluate_game_tier_ups_in_response(client):
    """evaluate-game response includes a tier_ups entry when a player advances.

    What: Seed a batter at tier=0 with pa=34 (just below T1=37).  A game adds
    4 PA pushing total to 38.  The response tier_ups list must contain one
    entry with the correct fields: player_id, team_id, player_name, old_tier,
    new_tier, current_value, track_name.

    Why: The bot uses tier_ups to trigger in-game notifications and visual card
    upgrade animations.  A missing or malformed entry would silently skip the
    announcement.
    """
    team_a = _make_team("WR1", gmid=20061)
    team_b = _make_team("WR2", gmid=20062)
    batter = _make_player("WP13 Batter R")
    pitcher = _make_player("WP13 Pitcher R", pos="SP")
    game = _make_game(team_a, team_b)
    track = _make_track(name="WP13 Tier-Ups Track")
    _make_state(batter, team_a, track, current_tier=0)

    # Seed prior stats below threshold
    BattingSeasonStats.create(player=batter, team=team_a, season=10, pa=34)

    # Game pushes past T1
    for i in range(4):
        _make_play(game, i + 1, batter, team_a, pitcher, team_b, pa=1, ab=1, outs=1)

    client.post(f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER)
    resp = client.post(
        f"/api/v2/refractor/evaluate-game/{game.id}", headers=AUTH_HEADER
    )
    assert resp.status_code == 200
    data = resp.json()

    assert data["evaluated"] >= 1
    assert len(data["tier_ups"]) == 1

    tu = data["tier_ups"][0]
    assert tu["player_id"] == batter.player_id
    assert tu["team_id"] == team_a.id
    assert tu["player_name"] == "WP13 Batter R"
    assert tu["old_tier"] == 0
    assert tu["new_tier"] >= 1
    assert tu["current_value"] > 0
    assert tu["track_name"] == "WP13 Tier-Ups Track"


def test_evaluate_game_skips_players_without_state(client):
    """Players in a game without a RefractorCardState are silently skipped.

    What: A game has two players: one with a card state and one without.
    After evaluate-game, evaluated should be 1 (only the player with state)
    and the endpoint must return 200 without errors.

    Why: Not every player on a roster will have started their evolution journey.
    A hard 404 or 500 for missing states would break the entire batch.
    """
    team_a = _make_team("WK1", gmid=20071)
    team_b = _make_team("WK2", gmid=20072)
    batter_with_state = _make_player("WP13 Batter WithState")
    batter_no_state = _make_player("WP13 Batter NoState")
    pitcher = _make_player("WP13 Pitcher K", pos="SP")
    game = _make_game(team_a, team_b)
    track = _make_track(name="WP13 Skip Track")

    # Only batter_with_state gets a RefractorCardState
    _make_state(batter_with_state, team_a, track)

    _make_play(game, 1, batter_with_state, team_a, pitcher, team_b, pa=1, ab=1, outs=1)
    _make_play(game, 2, batter_no_state, team_a, pitcher, team_b, pa=1, ab=1, outs=1)

    client.post(f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER)
    resp = client.post(
        f"/api/v2/refractor/evaluate-game/{game.id}", headers=AUTH_HEADER
    )
    assert resp.status_code == 200
    data = resp.json()

    # Only 1 evaluation (the player with a state)
    assert data["evaluated"] == 1


# ---------------------------------------------------------------------------
# Tests: Auth required on both endpoints
# ---------------------------------------------------------------------------


def test_auth_required_update_game(client):
    """Missing bearer token on update-game returns 401.

    What: POST to update-game without any Authorization header.

    Why: Both endpoints are production-only callbacks that should never be
    accessible without a valid bearer token.
    """
    team_a = _make_team("WA1", gmid=20081)
    team_b = _make_team("WA2", gmid=20082)
    game = _make_game(team_a, team_b)

    resp = client.post(f"/api/v2/season-stats/update-game/{game.id}")
    assert resp.status_code == 401


def test_auth_required_evaluate_game(client):
    """Missing bearer token on evaluate-game returns 401.

    What: POST to evaluate-game without any Authorization header.

    Why: Same security requirement as update-game — callbacks must be
    authenticated to prevent replay attacks and unauthorized stat manipulation.
    """
    team_a = _make_team("WB1", gmid=20091)
    team_b = _make_team("WB2", gmid=20092)
    game = _make_game(team_a, team_b)

    resp = client.post(f"/api/v2/refractor/evaluate-game/{game.id}")
    assert resp.status_code == 401


# ---------------------------------------------------------------------------
# T1-3: evaluate-game with non-existent game_id
# ---------------------------------------------------------------------------


def test_evaluate_game_nonexistent_game_id(client):
    """POST /refractor/evaluate-game/99999 with a game_id that does not exist.

    What: There is no StratGame row with id=99999.  The endpoint queries
    StratPlay for plays in that game, finds zero rows, builds an empty
    pairs set, and returns without evaluating anyone.

    Why: Documents the confirmed behaviour: 200 with {"evaluated": 0,
    "tier_ups": []}.  The endpoint does not treat a missing game as an
    error because StratPlay.select().where(game_id=N) returning 0 rows is
    a valid (if unusual) outcome — there are simply no players to evaluate.

    If the implementation is ever changed to return 404 for missing games,
    this test will fail and alert the developer to update the contract.
    """
    resp = client.post("/api/v2/refractor/evaluate-game/99999", headers=AUTH_HEADER)
    assert resp.status_code == 200
    data = resp.json()
    assert data["evaluated"] == 0
    assert data["tier_ups"] == []


# ---------------------------------------------------------------------------
# T2-3: evaluate-game with zero plays
# ---------------------------------------------------------------------------


def test_evaluate_game_zero_plays(client):
    """evaluate-game on a game with no StratPlay rows returns empty results.

    What: Create a StratGame but insert zero StratPlay rows for it.  POST
    to evaluate-game for that game_id.

    Why: The endpoint builds its player list from StratPlay rows.  A game
    with no plays has no players to evaluate.  Verify the endpoint does not
    crash and returns the expected empty-batch shape rather than raising a
    KeyError or returning an unexpected structure.
    """
    team_a = _make_team("ZP1", gmid=20101)
    team_b = _make_team("ZP2", gmid=20102)
    game = _make_game(team_a, team_b)
    # Intentionally no plays created

    resp = client.post(
        f"/api/v2/refractor/evaluate-game/{game.id}", headers=AUTH_HEADER
    )
    assert resp.status_code == 200
    data = resp.json()
    assert data["evaluated"] == 0
    assert data["tier_ups"] == []


# ---------------------------------------------------------------------------
# T2-9: Per-player error isolation in evaluate_game
# ---------------------------------------------------------------------------


def test_evaluate_game_error_isolation(client, monkeypatch):
    """An exception raised for one player does not abort the rest of the batch.

    What: Create two batters in the same game.  Both have RefractorCardState
    rows.  Patch evaluate_card in the refractor router to raise RuntimeError
    on the first call and succeed on the second.  Verify the endpoint returns
    200, evaluated==1 (not 0 or 2), and no tier_ups from the failing player.

    Why: The evaluate-game loop catches per-player exceptions and logs them.
    If the isolation breaks, a single bad card would silently drop all
    evaluations for the rest of the game.  The 'evaluated' count is the
    observable signal that error isolation is functioning.

    Implementation note: we patch the evaluate_card function inside the
    router module directly so that the test is independent of how the router
    imports it.  We use a counter to let the first call fail and the second
    succeed.
    """
    from app.services import refractor_evaluator

    team_a = _make_team("EI1", gmid=20111)
    team_b = _make_team("EI2", gmid=20112)

    batter_fail = _make_player("WP13 Fail Batter", pos="1B")
    batter_ok = _make_player("WP13 Ok Batter", pos="1B")
    pitcher = _make_player("WP13 EI Pitcher", pos="SP")

    game = _make_game(team_a, team_b)

    # Both batters need season stats and a track/state so they are not
    # skipped by the "no state" guard before evaluate_card is called.
    track = _make_track(name="EI Batter Track")
    _make_state(batter_fail, team_a, track)
    _make_state(batter_ok, team_a, track)

    _make_play(game, 1, batter_fail, team_a, pitcher, team_b, pa=1, ab=1, outs=1)
    _make_play(game, 2, batter_ok, team_a, pitcher, team_b, pa=1, ab=1, outs=1)

    # The real evaluate_card for batter_ok so we know what it returns
    real_evaluate = refractor_evaluator.evaluate_card

    call_count = {"n": 0}
    fail_player_id = batter_fail.player_id

    def patched_evaluate(player_id, team_id, **kwargs):
        call_count["n"] += 1
        if player_id == fail_player_id:
            raise RuntimeError("simulated per-player error")
        return real_evaluate(player_id, team_id, **kwargs)

    # The router does `from ..services.refractor_evaluator import evaluate_card`
    # inside the async function body, so the local import re-resolves on each
    # call.  Patching the function on its source module ensures the local `from`
    # import picks up our patched version when the route handler executes.
    monkeypatch.setattr(
        "app.services.refractor_evaluator.evaluate_card", patched_evaluate
    )

    resp = client.post(
        f"/api/v2/refractor/evaluate-game/{game.id}", headers=AUTH_HEADER
    )
    assert resp.status_code == 200
    data = resp.json()

    # One player succeeded; one was caught by the exception handler
    assert data["evaluated"] == 1
    # The failing player must not appear in tier_ups
    failing_ids = [tu["player_id"] for tu in data["tier_ups"]]
    assert fail_player_id not in failing_ids