Merge pull request 'test: refractor system Tier 3 test coverage' (#171) from test/refractor-tier3 into main

2026-03-25 04:13:17 +00:00 · 2026-03-25 04:13:17 +00:00 · de9b511ae9
commit de9b511ae9
parent 74284fe5a3 906d6e575a
3 changed files with 477 additions and 3 deletions
--- a/tests/test_refractor_evaluator.py
+++ b/tests/test_refractor_evaluator.py
@ -412,3 +412,212 @@ class TestReturnShape:
        assert isinstance(ts, str) and len(ts) > 0
        # Must be parseable as a datetime
        datetime.fromisoformat(ts)
+
+
+class TestFullyEvolvedFlagCorrection:
+    """T3-7: fully_evolved/tier mismatch is corrected by evaluate_card.
+
+    A database corruption where fully_evolved=True but current_tier < 4 can
+    occur if the flag was set incorrectly by a migration or external script.
+    evaluate_card must re-derive fully_evolved from the freshly-computed tier
+    (after the no-regression max() is applied), not trust the stored flag.
+    """
+
+    def test_fully_evolved_flag_corrected_when_tier_below_4(self, batter_track):
+        """fully_evolved=True with current_tier=3 is corrected to False after evaluation.
+
+        What: Manually set database state to fully_evolved=True, current_tier=3
+        (a corruption scenario — tier 3 cannot be "fully evolved" since T4 is
+        the maximum tier).  Provide stats that compute to a value in the T3
+        range (value=500, which is >= T3=448 but < T4=896).
+
+        After evaluate_card:
+          - computed value = 500 → new_tier = 3
+          - no-regression: max(current_tier=3, new_tier=3) = 3  → tier stays 3
+          - fully_evolved = (3 >= 4) = False  → flag is corrected
+
+        Why: The evaluator always recomputes fully_evolved from the final
+        current_tier rather than preserving the stored flag.  This ensures
+        that a corrupted fully_evolved=True at tier<4 is silently repaired
+        on the next evaluation without requiring a separate migration.
+        """
+        # Inject corruption: fully_evolved=True but tier=3
+        state = CardStateStub.create(
+            player_id=1,
+            team_id=1,
+            track=batter_track,
+            current_tier=3,
+            current_value=500.0,
+            fully_evolved=True,  # intentionally wrong
+            last_evaluated_at=None,
+        )
+        # Stats that compute to value=500: pa=500, no hits → value=500+0=500
+        # T3 threshold=448, T4 threshold=896 → tier=3, NOT 4
+        _make_stats(1, 1, 1, pa=500)
+
+        result = _eval(1, 1)
+
+        assert result["current_tier"] == 3, (
+            f"Expected tier=3 after evaluation with value=500, got {result['current_tier']}"
+        )
+        assert result["fully_evolved"] is False, (
+            "fully_evolved should have been corrected to False for tier=3, "
+            f"got {result['fully_evolved']}"
+        )
+
+        # Confirm the database row was updated (not just the return dict)
+        state_reloaded = CardStateStub.get_by_id(state.id)
+        assert state_reloaded.fully_evolved is False, (
+            "fully_evolved was not persisted as False after correction"
+        )
+
+    def test_fully_evolved_flag_preserved_when_tier_reaches_4(self, batter_track):
+        """fully_evolved=True with current_tier=3 stays True when new stats push to T4.
+
+        What: Same corruption setup as above (fully_evolved=True, tier=3),
+        but now provide stats with value=900 (>= T4=896).
+
+        After evaluate_card:
+          - computed value = 900 → new_tier = 4
+          - no-regression: max(current_tier=3, new_tier=4) = 4  → advances to 4
+          - fully_evolved = (4 >= 4) = True  → flag stays True (correctly)
+
+        Why: Confirms the evaluator correctly sets fully_evolved=True when
+        the re-computed tier legitimately reaches T4 regardless of whether
+        the stored flag was already True before evaluation.
+        """
+        CardStateStub.create(
+            player_id=1,
+            team_id=1,
+            track=batter_track,
+            current_tier=3,
+            current_value=500.0,
+            fully_evolved=True,  # stored flag (will be re-derived)
+            last_evaluated_at=None,
+        )
+        # pa=900 → value=900 >= T4=896 → new_tier=4
+        _make_stats(1, 1, 1, pa=900)
+
+        result = _eval(1, 1)
+
+        assert result["current_tier"] == 4, (
+            f"Expected tier=4 for value=900, got {result['current_tier']}"
+        )
+        assert result["fully_evolved"] is True, (
+            f"Expected fully_evolved=True for tier=4, got {result['fully_evolved']}"
+        )
+
+
+class TestMultiTeamStatIsolation:
+    """T3-8: A player's refractor value is isolated to a specific team's stats.
+
+    The evaluator queries BattingSeasonStats WHERE player_id=? AND team_id=?.
+    When a player has stats on two different teams in the same season, each
+    team's RefractorCardState must reflect only that team's stats — not a
+    combined total.
+    """
+
+    def test_multi_team_same_season_stats_isolated(self, batter_track):
+        """Each team's refractor value reflects only that team's stats, not combined.
+
+        What: Create one player with BattingSeasonStats on team_id=1 (pa=80)
+        and team_id=2 (pa=120) in the same season.  Create a RefractorCardState
+        for each team.  Evaluate each team's card separately and verify:
+          - Team 1 state: value = 80 → tier = T1 (80 >= T1=37, < T2=149)
+          - Team 2 state: value = 120 → tier = T1 (120 >= T1=37, < T2=149)
+          - Neither value equals the combined total (80+120=200 → would be T2)
+
+        Why: Confirms the `WHERE player_id=? AND team_id=?` filter in the
+        evaluator is correctly applied.  Without proper team isolation, the
+        combined total of 200 would cross the T2 threshold (149) and both
+        states would be incorrectly assigned to T2.  This is a critical
+        correctness requirement: a player traded between teams should have
+        separate refractor progressions for their time with each franchise.
+        """
+        # Stats on team 1: pa=80 → value=80 (T1: 37<=80<149)
+        _make_stats(player_id=1, team_id=1, season=11, pa=80)
+        # Stats on team 2: pa=120 → value=120 (T1: 37<=120<149)
+        _make_stats(player_id=1, team_id=2, season=11, pa=120)
+
+        # combined pa would be 200 → value=200 → T2 (149<=200<448)
+        # Each team must see only its own stats, not 200
+
+        _make_state(player_id=1, team_id=1, track=batter_track)
+        _make_state(player_id=1, team_id=2, track=batter_track)
+
+        result_team1 = _eval(player_id=1, team_id=1)
+        result_team2 = _eval(player_id=1, team_id=2)
+
+        # Team 1: only pa=80 counted → value=80 → T1
+        assert result_team1["current_value"] == 80.0, (
+            f"Team 1 value should be 80.0 (its own stats only), "
+            f"got {result_team1['current_value']}"
+        )
+        assert result_team1["current_tier"] == 1, (
+            f"Team 1 tier should be T1 for value=80, got {result_team1['current_tier']}"
+        )
+
+        # Team 2: only pa=120 counted → value=120 → T1
+        assert result_team2["current_value"] == 120.0, (
+            f"Team 2 value should be 120.0 (its own stats only), "
+            f"got {result_team2['current_value']}"
+        )
+        assert result_team2["current_tier"] == 1, (
+            f"Team 2 tier should be T1 for value=120, got {result_team2['current_tier']}"
+        )
+
+        # Sanity: neither team crossed T2 (which would happen if stats were combined)
+        assert (
+            result_team1["current_tier"] != 2 and result_team2["current_tier"] != 2
+        ), (
+            "At least one team was incorrectly assigned T2 — stats may have been combined"
+        )
+
+    def test_multi_team_different_seasons_isolated(self, batter_track):
+        """Stats for the same player across multiple seasons remain per-team isolated.
+
+        What: Same player with two seasons of stats for each of two teams:
+          - team_id=1: season 10 pa=90, season 11 pa=70 → combined=160
+          - team_id=2: season 10 pa=100, season 11 pa=80 → combined=180
+
+        After evaluation:
+          - Team 1: value=160 → T2 (149<=160<448)
+          - Team 2: value=180 → T2 (149<=180<448)
+
+        The test confirms that cross-team season aggregation does not bleed
+        stats from team 2 into team 1's calculation or vice versa.
+
+        Why: Multi-season aggregation and multi-team isolation must work
+        together.  A bug that incorrectly sums all player stats regardless
+        of team would produce combined values of 340 → T2, which coincidentally
+        passes, but the per-team values and tiers would be wrong.
+        This test uses values where cross-contamination would produce a
+        materially different value (340 vs 160/180), catching that class of bug.
+        """
+        # Team 1 stats: total pa=160 → value=160 → T2
+        _make_stats(player_id=1, team_id=1, season=10, pa=90)
+        _make_stats(player_id=1, team_id=1, season=11, pa=70)
+
+        # Team 2 stats: total pa=180 → value=180 → T2
+        _make_stats(player_id=1, team_id=2, season=10, pa=100)
+        _make_stats(player_id=1, team_id=2, season=11, pa=80)
+
+        _make_state(player_id=1, team_id=1, track=batter_track)
+        _make_state(player_id=1, team_id=2, track=batter_track)
+
+        result_team1 = _eval(player_id=1, team_id=1)
+        result_team2 = _eval(player_id=1, team_id=2)
+
+        assert result_team1["current_value"] == 160.0, (
+            f"Team 1 multi-season value should be 160.0, got {result_team1['current_value']}"
+        )
+        assert result_team1["current_tier"] == 2, (
+            f"Team 1 tier should be T2 for value=160, got {result_team1['current_tier']}"
+        )
+
+        assert result_team2["current_value"] == 180.0, (
+            f"Team 2 multi-season value should be 180.0, got {result_team2['current_value']}"
+        )
+        assert result_team2["current_tier"] == 2, (
+            f"Team 2 tier should be T2 for value=180, got {result_team2['current_tier']}"
+        )
--- a/tests/test_refractor_state_api.py
+++ b/tests/test_refractor_state_api.py
@ -34,6 +34,11 @@ Test matrix
  test_get_card_404_no_state          -- card with no RefractorCardState returns 404
  test_duplicate_cards_share_state    -- two cards same player+team return the same state row
  test_auth_required                  -- missing token returns 401 on both endpoints
+
+Tier 3 tests (T3-6) use a SQLite-backed TestClient and run without a PostgreSQL
+connection.  They test GET /api/v2/refractor/cards/{card_id} when the state row
+has last_evaluated_at=None (card initialised but never evaluated).
+  test_get_card_state_last_evaluated_at_null -- last_evaluated_at: null in response
 """

 import os
@ -640,12 +645,12 @@ def test_auth_required(client, seeded_data):


 # ===========================================================================
-# SQLite-backed tests for T2-4, T2-5, T2-6
+# SQLite-backed tests for T2-4, T2-5, T2-6, T3-6
 #
 # These tests use the same shared-memory SQLite pattern as test_postgame_refractor
 # so they run without a PostgreSQL connection.  They test the
-# GET /api/v2/teams/{team_id}/refractors and POST /refractor/cards/{card_id}/evaluate
-# endpoints in isolation.
+# GET /api/v2/teams/{team_id}/refractors, POST /refractor/cards/{card_id}/evaluate,
+# and GET /api/v2/refractor/cards/{card_id} endpoints in isolation.
 # ===========================================================================

 _state_api_db = SqliteDatabase(
@ -895,3 +900,59 @@ def test_evaluate_card_zero_stats_stays_t0(setup_state_api_db, state_api_client)
    data = resp.json()
    assert data["current_tier"] == 0
    assert data["current_value"] == 0.0
+
+
+# ---------------------------------------------------------------------------
+# T3-6: GET /refractor/cards/{card_id} — last_evaluated_at is None
+# ---------------------------------------------------------------------------
+
+
+def test_get_card_state_last_evaluated_at_null(setup_state_api_db, state_api_client):
+    """GET /refractor/cards/{card_id} returns last_evaluated_at: null for un-evaluated card.
+
+    What: Create a Player, Team, Card, and RefractorCardState where
+    last_evaluated_at is explicitly None (the state was initialised via a
+    pack-open hook but has never been through the evaluator).  Call
+    GET /api/v2/refractor/cards/{card_id} and verify:
+      - The response status is 200 (not a 500 crash from calling .isoformat() on None).
+      - The response body contains the key 'last_evaluated_at'.
+      - The value of 'last_evaluated_at' is JSON null (Python None after parsing).
+
+    Why: The _build_card_state_response helper serialises last_evaluated_at
+    with `state.last_evaluated_at.isoformat() if state.last_evaluated_at else None`.
+    This test confirms that the None branch is exercised and the field is always
+    present in the response envelope.  Callers must be able to distinguish
+    "never evaluated" (null) from a real ISO-8601 timestamp, and the API must
+    not crash on a newly-created card that has not yet been evaluated.
+    """
+    team = _sa_make_team("SA_T36", gmid=30360)
+    player = _sa_make_player("T36 Batter", pos="1B")
+    track = _sa_make_track("batter")
+    card = _sa_make_card(player, team)
+
+    # Create state with last_evaluated_at=None — simulates a freshly initialised
+    # card that has not yet been through the evaluator
+    RefractorCardState.create(
+        player=player,
+        team=team,
+        track=track,
+        current_tier=0,
+        current_value=0.0,
+        fully_evolved=False,
+        last_evaluated_at=None,
+    )
+
+    resp = state_api_client.get(
+        f"/api/v2/refractor/cards/{card.id}", headers=AUTH_HEADER
+    )
+    assert resp.status_code == 200, f"Expected 200, got {resp.status_code}: {resp.text}"
+    data = resp.json()
+
+    # 'last_evaluated_at' must be present as a key even when the value is null
+    assert "last_evaluated_at" in data, (
+        "Response is missing the 'last_evaluated_at' key"
+    )
+    assert data["last_evaluated_at"] is None, (
+        f"Expected last_evaluated_at=null for un-evaluated card, "
+        f"got {data['last_evaluated_at']!r}"
+    )
--- a/tests/test_refractor_track_api.py
+++ b/tests/test_refractor_track_api.py
@ -11,12 +11,47 @@ Tests auto-skip when POSTGRES_HOST is not set.
 Test data is inserted via psycopg2 before the test module runs and deleted
 afterwards so the tests are repeatable.  ON CONFLICT keeps the table clean
 even if a previous run did not complete teardown.
+
+Tier 3 tests (T3-1) in this file use a SQLite-backed TestClient so they run
+without a PostgreSQL connection.  They test the card_type filter edge cases:
+an unrecognised card_type string and an empty string should both return an
+empty list (200 with count=0) rather than an error.
 """

 import os

 import pytest
+from fastapi import FastAPI, Request
 from fastapi.testclient import TestClient
+from peewee import SqliteDatabase
+
+os.environ.setdefault("API_TOKEN", "test-token")
+
+from app.db_engine import (  # noqa: E402
+    BattingSeasonStats,
+    Card,
+    Cardset,
+    Decision,
+    Event,
+    MlbPlayer,
+    Pack,
+    PackType,
+    PitchingSeasonStats,
+    Player,
+    ProcessedGame,
+    Rarity,
+    RefractorCardState,
+    RefractorCosmetic,
+    RefractorTierBoost,
+    RefractorTrack,
+    Roster,
+    RosterSlot,
+    ScoutClaim,
+    ScoutOpportunity,
+    StratGame,
+    StratPlay,
+    Team,
+)

 POSTGRES_HOST = os.environ.get("POSTGRES_HOST")
 _skip_no_pg = pytest.mark.skipif(
@ -130,3 +165,172 @@ def test_auth_required(client, seeded_tracks):
    track_id = seeded_tracks[0]
    resp_single = client.get(f"/api/v2/refractor/tracks/{track_id}")
    assert resp_single.status_code == 401
+
+
+# ===========================================================================
+# SQLite-backed tests for T3-1: invalid card_type query parameter
+#
+# These tests run without a PostgreSQL connection.  They verify that the
+# card_type filter on GET /api/v2/refractor/tracks handles values that match
+# no known track (an unrecognised string, an empty string) gracefully: the
+# endpoint must return 200 with {"count": 0, "items": []}, not a 4xx/5xx.
+# ===========================================================================
+
+_track_api_db = SqliteDatabase(
+    "file:trackapitest?mode=memory&cache=shared",
+    uri=True,
+    pragmas={"foreign_keys": 1},
+)
+
+_TRACK_API_MODELS = [
+    Rarity,
+    Event,
+    Cardset,
+    MlbPlayer,
+    Player,
+    Team,
+    PackType,
+    Pack,
+    Card,
+    Roster,
+    RosterSlot,
+    StratGame,
+    StratPlay,
+    Decision,
+    ScoutOpportunity,
+    ScoutClaim,
+    BattingSeasonStats,
+    PitchingSeasonStats,
+    ProcessedGame,
+    RefractorTrack,
+    RefractorCardState,
+    RefractorTierBoost,
+    RefractorCosmetic,
+]
+
+
+@pytest.fixture(autouse=False)
+def setup_track_api_db():
+    """Bind track-API test models to shared-memory SQLite and create tables.
+
+    Inserts exactly two tracks (batter, sp) so the filter tests have a
+    non-empty table to query against — confirming that the WHERE predicate
+    excludes them rather than the table simply being empty.
+    """
+    _track_api_db.bind(_TRACK_API_MODELS)
+    _track_api_db.connect(reuse_if_open=True)
+    _track_api_db.create_tables(_TRACK_API_MODELS)
+
+    # Seed two real tracks so the table is not empty
+    RefractorTrack.get_or_create(
+        name="T3-1 Batter Track",
+        defaults=dict(
+            card_type="batter",
+            formula="pa + tb * 2",
+            t1_threshold=37,
+            t2_threshold=149,
+            t3_threshold=448,
+            t4_threshold=896,
+        ),
+    )
+    RefractorTrack.get_or_create(
+        name="T3-1 SP Track",
+        defaults=dict(
+            card_type="sp",
+            formula="ip + k",
+            t1_threshold=10,
+            t2_threshold=40,
+            t3_threshold=120,
+            t4_threshold=240,
+        ),
+    )
+
+    yield _track_api_db
+    _track_api_db.drop_tables(list(reversed(_TRACK_API_MODELS)), safe=True)
+
+
+def _build_track_api_app() -> FastAPI:
+    """Minimal FastAPI app containing only the refractor router for T3-1 tests."""
+    from app.routers_v2.refractor import router as refractor_router
+
+    app = FastAPI()
+
+    @app.middleware("http")
+    async def db_middleware(request: Request, call_next):
+        _track_api_db.connect(reuse_if_open=True)
+        return await call_next(request)
+
+    app.include_router(refractor_router)
+    return app
+
+
+@pytest.fixture
+def track_api_client(setup_track_api_db):
+    """FastAPI TestClient for the SQLite-backed T3-1 track filter tests."""
+    with TestClient(_build_track_api_app()) as c:
+        yield c
+
+
+# ---------------------------------------------------------------------------
+# T3-1a: card_type=foo (unrecognised value) returns empty list
+# ---------------------------------------------------------------------------
+
+
+def test_invalid_card_type_returns_empty_list(setup_track_api_db, track_api_client):
+    """GET /tracks?card_type=foo returns 200 with count=0, not a 4xx/5xx.
+
+    What: Query the track list with a card_type value ('foo') that matches
+    no row in refractor_track.  The table contains batter and sp tracks so
+    the result must be an empty list rather than a full list (which would
+    indicate the filter was ignored).
+
+    Why: The endpoint applies `WHERE card_type == card_type` when the
+    parameter is not None.  An unrecognised value is a valid no-match query
+    — the contract is an empty list, not a validation error.  Returning
+    a 422 Unprocessable Entity or 500 here would break clients that probe
+    for tracks by card type before knowing which types are registered.
+    """
+    resp = track_api_client.get(
+        "/api/v2/refractor/tracks?card_type=foo", headers=AUTH_HEADER
+    )
+    assert resp.status_code == 200
+    data = resp.json()
+    assert data["count"] == 0, (
+        f"Expected count=0 for unknown card_type 'foo', got {data['count']}"
+    )
+    assert data["items"] == [], (
+        f"Expected empty items list for unknown card_type 'foo', got {data['items']}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# T3-1b: card_type= (empty string) returns empty list
+# ---------------------------------------------------------------------------
+
+
+def test_empty_string_card_type_returns_empty_list(
+    setup_track_api_db, track_api_client
+):
+    """GET /tracks?card_type= (empty string) returns 200 with count=0.
+
+    What: Pass an empty string as the card_type query parameter.  No track
+    has card_type='' so the response must be an empty list with count=0.
+
+    Why: An empty string is not None — FastAPI will pass it through as ''
+    rather than treating it as an absent parameter.  The WHERE predicate
+    `card_type == ''` produces no matches, which is the correct silent
+    no-results behaviour.  This guards against regressions where an empty
+    string might be mishandled as a None/absent value and accidentally return
+    all tracks, or raise a server error.
+    """
+    resp = track_api_client.get(
+        "/api/v2/refractor/tracks?card_type=", headers=AUTH_HEADER
+    )
+    assert resp.status_code == 200
+    data = resp.json()
+    assert data["count"] == 0, (
+        f"Expected count=0 for empty card_type string, got {data['count']}"
+    )
+    assert data["items"] == [], (
+        f"Expected empty items list for empty card_type string, got {data['items']}"
+    )