Merge pull request 'test: refractor system Tier 3 test coverage' (#171) from test/refractor-tier3 into main
All checks were successful
Build Docker Image / build (push) Successful in 8m11s

This commit is contained in:
cal 2026-03-25 04:13:17 +00:00
commit de9b511ae9
3 changed files with 477 additions and 3 deletions

View File

@ -412,3 +412,212 @@ class TestReturnShape:
assert isinstance(ts, str) and len(ts) > 0
# Must be parseable as a datetime
datetime.fromisoformat(ts)
class TestFullyEvolvedFlagCorrection:
"""T3-7: fully_evolved/tier mismatch is corrected by evaluate_card.
A database corruption where fully_evolved=True but current_tier < 4 can
occur if the flag was set incorrectly by a migration or external script.
evaluate_card must re-derive fully_evolved from the freshly-computed tier
(after the no-regression max() is applied), not trust the stored flag.
"""
def test_fully_evolved_flag_corrected_when_tier_below_4(self, batter_track):
"""fully_evolved=True with current_tier=3 is corrected to False after evaluation.
What: Manually set database state to fully_evolved=True, current_tier=3
(a corruption scenario tier 3 cannot be "fully evolved" since T4 is
the maximum tier). Provide stats that compute to a value in the T3
range (value=500, which is >= T3=448 but < T4=896).
After evaluate_card:
- computed value = 500 new_tier = 3
- no-regression: max(current_tier=3, new_tier=3) = 3 tier stays 3
- fully_evolved = (3 >= 4) = False flag is corrected
Why: The evaluator always recomputes fully_evolved from the final
current_tier rather than preserving the stored flag. This ensures
that a corrupted fully_evolved=True at tier<4 is silently repaired
on the next evaluation without requiring a separate migration.
"""
# Inject corruption: fully_evolved=True but tier=3
state = CardStateStub.create(
player_id=1,
team_id=1,
track=batter_track,
current_tier=3,
current_value=500.0,
fully_evolved=True, # intentionally wrong
last_evaluated_at=None,
)
# Stats that compute to value=500: pa=500, no hits → value=500+0=500
# T3 threshold=448, T4 threshold=896 → tier=3, NOT 4
_make_stats(1, 1, 1, pa=500)
result = _eval(1, 1)
assert result["current_tier"] == 3, (
f"Expected tier=3 after evaluation with value=500, got {result['current_tier']}"
)
assert result["fully_evolved"] is False, (
"fully_evolved should have been corrected to False for tier=3, "
f"got {result['fully_evolved']}"
)
# Confirm the database row was updated (not just the return dict)
state_reloaded = CardStateStub.get_by_id(state.id)
assert state_reloaded.fully_evolved is False, (
"fully_evolved was not persisted as False after correction"
)
def test_fully_evolved_flag_preserved_when_tier_reaches_4(self, batter_track):
"""fully_evolved=True with current_tier=3 stays True when new stats push to T4.
What: Same corruption setup as above (fully_evolved=True, tier=3),
but now provide stats with value=900 (>= T4=896).
After evaluate_card:
- computed value = 900 new_tier = 4
- no-regression: max(current_tier=3, new_tier=4) = 4 advances to 4
- fully_evolved = (4 >= 4) = True flag stays True (correctly)
Why: Confirms the evaluator correctly sets fully_evolved=True when
the re-computed tier legitimately reaches T4 regardless of whether
the stored flag was already True before evaluation.
"""
CardStateStub.create(
player_id=1,
team_id=1,
track=batter_track,
current_tier=3,
current_value=500.0,
fully_evolved=True, # stored flag (will be re-derived)
last_evaluated_at=None,
)
# pa=900 → value=900 >= T4=896 → new_tier=4
_make_stats(1, 1, 1, pa=900)
result = _eval(1, 1)
assert result["current_tier"] == 4, (
f"Expected tier=4 for value=900, got {result['current_tier']}"
)
assert result["fully_evolved"] is True, (
f"Expected fully_evolved=True for tier=4, got {result['fully_evolved']}"
)
class TestMultiTeamStatIsolation:
"""T3-8: A player's refractor value is isolated to a specific team's stats.
The evaluator queries BattingSeasonStats WHERE player_id=? AND team_id=?.
When a player has stats on two different teams in the same season, each
team's RefractorCardState must reflect only that team's stats not a
combined total.
"""
def test_multi_team_same_season_stats_isolated(self, batter_track):
"""Each team's refractor value reflects only that team's stats, not combined.
What: Create one player with BattingSeasonStats on team_id=1 (pa=80)
and team_id=2 (pa=120) in the same season. Create a RefractorCardState
for each team. Evaluate each team's card separately and verify:
- Team 1 state: value = 80 tier = T1 (80 >= T1=37, < T2=149)
- Team 2 state: value = 120 tier = T1 (120 >= T1=37, < T2=149)
- Neither value equals the combined total (80+120=200 would be T2)
Why: Confirms the `WHERE player_id=? AND team_id=?` filter in the
evaluator is correctly applied. Without proper team isolation, the
combined total of 200 would cross the T2 threshold (149) and both
states would be incorrectly assigned to T2. This is a critical
correctness requirement: a player traded between teams should have
separate refractor progressions for their time with each franchise.
"""
# Stats on team 1: pa=80 → value=80 (T1: 37<=80<149)
_make_stats(player_id=1, team_id=1, season=11, pa=80)
# Stats on team 2: pa=120 → value=120 (T1: 37<=120<149)
_make_stats(player_id=1, team_id=2, season=11, pa=120)
# combined pa would be 200 → value=200 → T2 (149<=200<448)
# Each team must see only its own stats, not 200
_make_state(player_id=1, team_id=1, track=batter_track)
_make_state(player_id=1, team_id=2, track=batter_track)
result_team1 = _eval(player_id=1, team_id=1)
result_team2 = _eval(player_id=1, team_id=2)
# Team 1: only pa=80 counted → value=80 → T1
assert result_team1["current_value"] == 80.0, (
f"Team 1 value should be 80.0 (its own stats only), "
f"got {result_team1['current_value']}"
)
assert result_team1["current_tier"] == 1, (
f"Team 1 tier should be T1 for value=80, got {result_team1['current_tier']}"
)
# Team 2: only pa=120 counted → value=120 → T1
assert result_team2["current_value"] == 120.0, (
f"Team 2 value should be 120.0 (its own stats only), "
f"got {result_team2['current_value']}"
)
assert result_team2["current_tier"] == 1, (
f"Team 2 tier should be T1 for value=120, got {result_team2['current_tier']}"
)
# Sanity: neither team crossed T2 (which would happen if stats were combined)
assert (
result_team1["current_tier"] != 2 and result_team2["current_tier"] != 2
), (
"At least one team was incorrectly assigned T2 — stats may have been combined"
)
def test_multi_team_different_seasons_isolated(self, batter_track):
"""Stats for the same player across multiple seasons remain per-team isolated.
What: Same player with two seasons of stats for each of two teams:
- team_id=1: season 10 pa=90, season 11 pa=70 combined=160
- team_id=2: season 10 pa=100, season 11 pa=80 combined=180
After evaluation:
- Team 1: value=160 T2 (149<=160<448)
- Team 2: value=180 T2 (149<=180<448)
The test confirms that cross-team season aggregation does not bleed
stats from team 2 into team 1's calculation or vice versa.
Why: Multi-season aggregation and multi-team isolation must work
together. A bug that incorrectly sums all player stats regardless
of team would produce combined values of 340 T2, which coincidentally
passes, but the per-team values and tiers would be wrong.
This test uses values where cross-contamination would produce a
materially different value (340 vs 160/180), catching that class of bug.
"""
# Team 1 stats: total pa=160 → value=160 → T2
_make_stats(player_id=1, team_id=1, season=10, pa=90)
_make_stats(player_id=1, team_id=1, season=11, pa=70)
# Team 2 stats: total pa=180 → value=180 → T2
_make_stats(player_id=1, team_id=2, season=10, pa=100)
_make_stats(player_id=1, team_id=2, season=11, pa=80)
_make_state(player_id=1, team_id=1, track=batter_track)
_make_state(player_id=1, team_id=2, track=batter_track)
result_team1 = _eval(player_id=1, team_id=1)
result_team2 = _eval(player_id=1, team_id=2)
assert result_team1["current_value"] == 160.0, (
f"Team 1 multi-season value should be 160.0, got {result_team1['current_value']}"
)
assert result_team1["current_tier"] == 2, (
f"Team 1 tier should be T2 for value=160, got {result_team1['current_tier']}"
)
assert result_team2["current_value"] == 180.0, (
f"Team 2 multi-season value should be 180.0, got {result_team2['current_value']}"
)
assert result_team2["current_tier"] == 2, (
f"Team 2 tier should be T2 for value=180, got {result_team2['current_tier']}"
)

View File

@ -34,6 +34,11 @@ Test matrix
test_get_card_404_no_state -- card with no RefractorCardState returns 404
test_duplicate_cards_share_state -- two cards same player+team return the same state row
test_auth_required -- missing token returns 401 on both endpoints
Tier 3 tests (T3-6) use a SQLite-backed TestClient and run without a PostgreSQL
connection. They test GET /api/v2/refractor/cards/{card_id} when the state row
has last_evaluated_at=None (card initialised but never evaluated).
test_get_card_state_last_evaluated_at_null -- last_evaluated_at: null in response
"""
import os
@ -640,12 +645,12 @@ def test_auth_required(client, seeded_data):
# ===========================================================================
# SQLite-backed tests for T2-4, T2-5, T2-6
# SQLite-backed tests for T2-4, T2-5, T2-6, T3-6
#
# These tests use the same shared-memory SQLite pattern as test_postgame_refractor
# so they run without a PostgreSQL connection. They test the
# GET /api/v2/teams/{team_id}/refractors and POST /refractor/cards/{card_id}/evaluate
# endpoints in isolation.
# GET /api/v2/teams/{team_id}/refractors, POST /refractor/cards/{card_id}/evaluate,
# and GET /api/v2/refractor/cards/{card_id} endpoints in isolation.
# ===========================================================================
_state_api_db = SqliteDatabase(
@ -895,3 +900,59 @@ def test_evaluate_card_zero_stats_stays_t0(setup_state_api_db, state_api_client)
data = resp.json()
assert data["current_tier"] == 0
assert data["current_value"] == 0.0
# ---------------------------------------------------------------------------
# T3-6: GET /refractor/cards/{card_id} — last_evaluated_at is None
# ---------------------------------------------------------------------------
def test_get_card_state_last_evaluated_at_null(setup_state_api_db, state_api_client):
"""GET /refractor/cards/{card_id} returns last_evaluated_at: null for un-evaluated card.
What: Create a Player, Team, Card, and RefractorCardState where
last_evaluated_at is explicitly None (the state was initialised via a
pack-open hook but has never been through the evaluator). Call
GET /api/v2/refractor/cards/{card_id} and verify:
- The response status is 200 (not a 500 crash from calling .isoformat() on None).
- The response body contains the key 'last_evaluated_at'.
- The value of 'last_evaluated_at' is JSON null (Python None after parsing).
Why: The _build_card_state_response helper serialises last_evaluated_at
with `state.last_evaluated_at.isoformat() if state.last_evaluated_at else None`.
This test confirms that the None branch is exercised and the field is always
present in the response envelope. Callers must be able to distinguish
"never evaluated" (null) from a real ISO-8601 timestamp, and the API must
not crash on a newly-created card that has not yet been evaluated.
"""
team = _sa_make_team("SA_T36", gmid=30360)
player = _sa_make_player("T36 Batter", pos="1B")
track = _sa_make_track("batter")
card = _sa_make_card(player, team)
# Create state with last_evaluated_at=None — simulates a freshly initialised
# card that has not yet been through the evaluator
RefractorCardState.create(
player=player,
team=team,
track=track,
current_tier=0,
current_value=0.0,
fully_evolved=False,
last_evaluated_at=None,
)
resp = state_api_client.get(
f"/api/v2/refractor/cards/{card.id}", headers=AUTH_HEADER
)
assert resp.status_code == 200, f"Expected 200, got {resp.status_code}: {resp.text}"
data = resp.json()
# 'last_evaluated_at' must be present as a key even when the value is null
assert "last_evaluated_at" in data, (
"Response is missing the 'last_evaluated_at' key"
)
assert data["last_evaluated_at"] is None, (
f"Expected last_evaluated_at=null for un-evaluated card, "
f"got {data['last_evaluated_at']!r}"
)

View File

@ -11,12 +11,47 @@ Tests auto-skip when POSTGRES_HOST is not set.
Test data is inserted via psycopg2 before the test module runs and deleted
afterwards so the tests are repeatable. ON CONFLICT keeps the table clean
even if a previous run did not complete teardown.
Tier 3 tests (T3-1) in this file use a SQLite-backed TestClient so they run
without a PostgreSQL connection. They test the card_type filter edge cases:
an unrecognised card_type string and an empty string should both return an
empty list (200 with count=0) rather than an error.
"""
import os
import pytest
from fastapi import FastAPI, Request
from fastapi.testclient import TestClient
from peewee import SqliteDatabase
os.environ.setdefault("API_TOKEN", "test-token")
from app.db_engine import ( # noqa: E402
BattingSeasonStats,
Card,
Cardset,
Decision,
Event,
MlbPlayer,
Pack,
PackType,
PitchingSeasonStats,
Player,
ProcessedGame,
Rarity,
RefractorCardState,
RefractorCosmetic,
RefractorTierBoost,
RefractorTrack,
Roster,
RosterSlot,
ScoutClaim,
ScoutOpportunity,
StratGame,
StratPlay,
Team,
)
POSTGRES_HOST = os.environ.get("POSTGRES_HOST")
_skip_no_pg = pytest.mark.skipif(
@ -130,3 +165,172 @@ def test_auth_required(client, seeded_tracks):
track_id = seeded_tracks[0]
resp_single = client.get(f"/api/v2/refractor/tracks/{track_id}")
assert resp_single.status_code == 401
# ===========================================================================
# SQLite-backed tests for T3-1: invalid card_type query parameter
#
# These tests run without a PostgreSQL connection. They verify that the
# card_type filter on GET /api/v2/refractor/tracks handles values that match
# no known track (an unrecognised string, an empty string) gracefully: the
# endpoint must return 200 with {"count": 0, "items": []}, not a 4xx/5xx.
# ===========================================================================
_track_api_db = SqliteDatabase(
"file:trackapitest?mode=memory&cache=shared",
uri=True,
pragmas={"foreign_keys": 1},
)
_TRACK_API_MODELS = [
Rarity,
Event,
Cardset,
MlbPlayer,
Player,
Team,
PackType,
Pack,
Card,
Roster,
RosterSlot,
StratGame,
StratPlay,
Decision,
ScoutOpportunity,
ScoutClaim,
BattingSeasonStats,
PitchingSeasonStats,
ProcessedGame,
RefractorTrack,
RefractorCardState,
RefractorTierBoost,
RefractorCosmetic,
]
@pytest.fixture(autouse=False)
def setup_track_api_db():
"""Bind track-API test models to shared-memory SQLite and create tables.
Inserts exactly two tracks (batter, sp) so the filter tests have a
non-empty table to query against confirming that the WHERE predicate
excludes them rather than the table simply being empty.
"""
_track_api_db.bind(_TRACK_API_MODELS)
_track_api_db.connect(reuse_if_open=True)
_track_api_db.create_tables(_TRACK_API_MODELS)
# Seed two real tracks so the table is not empty
RefractorTrack.get_or_create(
name="T3-1 Batter Track",
defaults=dict(
card_type="batter",
formula="pa + tb * 2",
t1_threshold=37,
t2_threshold=149,
t3_threshold=448,
t4_threshold=896,
),
)
RefractorTrack.get_or_create(
name="T3-1 SP Track",
defaults=dict(
card_type="sp",
formula="ip + k",
t1_threshold=10,
t2_threshold=40,
t3_threshold=120,
t4_threshold=240,
),
)
yield _track_api_db
_track_api_db.drop_tables(list(reversed(_TRACK_API_MODELS)), safe=True)
def _build_track_api_app() -> FastAPI:
"""Minimal FastAPI app containing only the refractor router for T3-1 tests."""
from app.routers_v2.refractor import router as refractor_router
app = FastAPI()
@app.middleware("http")
async def db_middleware(request: Request, call_next):
_track_api_db.connect(reuse_if_open=True)
return await call_next(request)
app.include_router(refractor_router)
return app
@pytest.fixture
def track_api_client(setup_track_api_db):
"""FastAPI TestClient for the SQLite-backed T3-1 track filter tests."""
with TestClient(_build_track_api_app()) as c:
yield c
# ---------------------------------------------------------------------------
# T3-1a: card_type=foo (unrecognised value) returns empty list
# ---------------------------------------------------------------------------
def test_invalid_card_type_returns_empty_list(setup_track_api_db, track_api_client):
"""GET /tracks?card_type=foo returns 200 with count=0, not a 4xx/5xx.
What: Query the track list with a card_type value ('foo') that matches
no row in refractor_track. The table contains batter and sp tracks so
the result must be an empty list rather than a full list (which would
indicate the filter was ignored).
Why: The endpoint applies `WHERE card_type == card_type` when the
parameter is not None. An unrecognised value is a valid no-match query
the contract is an empty list, not a validation error. Returning
a 422 Unprocessable Entity or 500 here would break clients that probe
for tracks by card type before knowing which types are registered.
"""
resp = track_api_client.get(
"/api/v2/refractor/tracks?card_type=foo", headers=AUTH_HEADER
)
assert resp.status_code == 200
data = resp.json()
assert data["count"] == 0, (
f"Expected count=0 for unknown card_type 'foo', got {data['count']}"
)
assert data["items"] == [], (
f"Expected empty items list for unknown card_type 'foo', got {data['items']}"
)
# ---------------------------------------------------------------------------
# T3-1b: card_type= (empty string) returns empty list
# ---------------------------------------------------------------------------
def test_empty_string_card_type_returns_empty_list(
setup_track_api_db, track_api_client
):
"""GET /tracks?card_type= (empty string) returns 200 with count=0.
What: Pass an empty string as the card_type query parameter. No track
has card_type='' so the response must be an empty list with count=0.
Why: An empty string is not None FastAPI will pass it through as ''
rather than treating it as an absent parameter. The WHERE predicate
`card_type == ''` produces no matches, which is the correct silent
no-results behaviour. This guards against regressions where an empty
string might be mishandled as a None/absent value and accidentally return
all tracks, or raise a server error.
"""
resp = track_api_client.get(
"/api/v2/refractor/tracks?card_type=", headers=AUTH_HEADER
)
assert resp.status_code == 200
data = resp.json()
assert data["count"] == 0, (
f"Expected count=0 for empty card_type string, got {data['count']}"
)
assert data["items"] == [], (
f"Expected empty items list for empty card_type string, got {data['items']}"
)