Implements all gap tests identified in the PO review for the refractor
card progression system (Phase 1 foundation).
TIER 1 (critical):
- T1-1: Negative singles guard in compute_batter_value — documents that
hits=1, doubles=1, triples=1 produces singles=-1 and flows through
unclamped (value=8.0, not 10.0)
- T1-2: SP tier boundary precision with floats — outs=29 (IP=9.666) stays
T0, outs=30 (IP=10.0) promotes to T1; also covers T2 float boundary
- T1-3: evaluate-game with non-existent game_id returns 200 with empty results
- T1-4: Seed threshold ordering + positivity invariant (t1<t2<t3<t4, all >0)
TIER 2 (high):
- T2-1: fully_evolved=True persists when stats are zeroed or drop below
previous tier — no-regression applies to both tier and fully_evolved flag
- T2-2: Parametrized edge cases for _determine_card_type: DH, C, 2B, empty
string, None, and compound "SP/RP" (resolves to "sp", SP checked first)
- T2-3: evaluate-game with zero StratPlay rows returns empty batch result
- T2-4: GET /teams/{id}/refractors with valid team and zero states is empty
- T2-5: GET /teams/99999/refractors documents 200+empty (no team existence check)
- T2-6: POST /cards/{id}/evaluate with zero season stats stays at T0 value=0.0
- T2-9: Per-player error isolation — patches source module so router's local
from-import picks up the patched version; one failure, one success = evaluated=1
- T2-10: Each card_type has exactly one RefractorTrack after seeding
All 101 tests pass (15 PostgreSQL-only tests skip without POSTGRES_HOST).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
415 lines
14 KiB
Python
415 lines
14 KiB
Python
"""Tests for the refractor evaluator service (WP-08).
|
|
|
|
Unit tests verify tier assignment, advancement, partial progress, idempotency,
|
|
full refractor tier, and no-regression behaviour without touching any database,
|
|
using stub Peewee models bound to an in-memory SQLite database.
|
|
|
|
The formula engine (WP-09) and Peewee models (WP-05/WP-07) are not imported
|
|
from db_engine/formula_engine; instead the tests supply minimal stubs and
|
|
inject them via the _stats_model, _state_model, _compute_value_fn, and
|
|
_tier_from_value_fn overrides on evaluate_card().
|
|
|
|
Stub track thresholds (batter):
|
|
T1: 37 T2: 149 T3: 448 T4: 896
|
|
|
|
Useful reference values:
|
|
value=30 → T0 (below T1=37)
|
|
value=50 → T1 (37 <= 50 < 149)
|
|
value=100 → T1 (stays T1; T2 threshold is 149)
|
|
value=160 → T2 (149 <= 160 < 448)
|
|
value=900 → T4 (>= 896) → fully_evolved
|
|
"""
|
|
|
|
import pytest
|
|
from datetime import datetime
|
|
from peewee import (
|
|
BooleanField,
|
|
CharField,
|
|
DateTimeField,
|
|
FloatField,
|
|
ForeignKeyField,
|
|
IntegerField,
|
|
Model,
|
|
SqliteDatabase,
|
|
)
|
|
|
|
from app.services.refractor_evaluator import evaluate_card
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Stub models — mirror WP-01/WP-04/WP-07 schema without importing db_engine
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_test_db = SqliteDatabase(":memory:")
|
|
|
|
|
|
class TrackStub(Model):
|
|
"""Minimal RefractorTrack stub for evaluator tests."""
|
|
|
|
card_type = CharField(unique=True)
|
|
t1_threshold = IntegerField()
|
|
t2_threshold = IntegerField()
|
|
t3_threshold = IntegerField()
|
|
t4_threshold = IntegerField()
|
|
|
|
class Meta:
|
|
database = _test_db
|
|
table_name = "refractor_track"
|
|
|
|
|
|
class CardStateStub(Model):
|
|
"""Minimal RefractorCardState stub for evaluator tests."""
|
|
|
|
player_id = IntegerField()
|
|
team_id = IntegerField()
|
|
track = ForeignKeyField(TrackStub)
|
|
current_tier = IntegerField(default=0)
|
|
current_value = FloatField(default=0.0)
|
|
fully_evolved = BooleanField(default=False)
|
|
last_evaluated_at = DateTimeField(null=True)
|
|
|
|
class Meta:
|
|
database = _test_db
|
|
table_name = "refractor_card_state"
|
|
indexes = ((("player_id", "team_id"), True),)
|
|
|
|
|
|
class StatsStub(Model):
|
|
"""Minimal PlayerSeasonStats stub for evaluator tests."""
|
|
|
|
player_id = IntegerField()
|
|
team_id = IntegerField()
|
|
season = IntegerField()
|
|
pa = IntegerField(default=0)
|
|
hits = IntegerField(default=0)
|
|
doubles = IntegerField(default=0)
|
|
triples = IntegerField(default=0)
|
|
hr = IntegerField(default=0)
|
|
outs = IntegerField(default=0)
|
|
strikeouts = IntegerField(default=0)
|
|
|
|
class Meta:
|
|
database = _test_db
|
|
table_name = "player_season_stats"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Formula stubs — avoid importing app.services.formula_engine before WP-09
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _compute_value(card_type: str, stats) -> float:
|
|
"""Stub compute_value_for_track: returns pa for batter, outs/3+k for pitchers."""
|
|
if card_type == "batter":
|
|
singles = stats.hits - stats.doubles - stats.triples - stats.hr
|
|
tb = singles + 2 * stats.doubles + 3 * stats.triples + 4 * stats.hr
|
|
return float(stats.pa + tb * 2)
|
|
return stats.outs / 3 + stats.strikeouts
|
|
|
|
|
|
def _tier_from_value(value: float, track) -> int:
|
|
"""Stub tier_from_value using TrackStub fields t1_threshold/t2_threshold/etc."""
|
|
if isinstance(track, dict):
|
|
t1, t2, t3, t4 = (
|
|
track["t1_threshold"],
|
|
track["t2_threshold"],
|
|
track["t3_threshold"],
|
|
track["t4_threshold"],
|
|
)
|
|
else:
|
|
t1, t2, t3, t4 = (
|
|
track.t1_threshold,
|
|
track.t2_threshold,
|
|
track.t3_threshold,
|
|
track.t4_threshold,
|
|
)
|
|
if value >= t4:
|
|
return 4
|
|
if value >= t3:
|
|
return 3
|
|
if value >= t2:
|
|
return 2
|
|
if value >= t1:
|
|
return 1
|
|
return 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fixtures
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _db():
|
|
"""Create tables before each test and drop them afterwards."""
|
|
_test_db.connect(reuse_if_open=True)
|
|
_test_db.create_tables([TrackStub, CardStateStub, StatsStub])
|
|
yield
|
|
_test_db.drop_tables([StatsStub, CardStateStub, TrackStub])
|
|
|
|
|
|
@pytest.fixture()
|
|
def batter_track():
|
|
return TrackStub.create(
|
|
card_type="batter",
|
|
t1_threshold=37,
|
|
t2_threshold=149,
|
|
t3_threshold=448,
|
|
t4_threshold=896,
|
|
)
|
|
|
|
|
|
@pytest.fixture()
|
|
def sp_track():
|
|
return TrackStub.create(
|
|
card_type="sp",
|
|
t1_threshold=10,
|
|
t2_threshold=40,
|
|
t3_threshold=120,
|
|
t4_threshold=240,
|
|
)
|
|
|
|
|
|
def _make_state(player_id, team_id, track, current_tier=0, current_value=0.0):
|
|
return CardStateStub.create(
|
|
player_id=player_id,
|
|
team_id=team_id,
|
|
track=track,
|
|
current_tier=current_tier,
|
|
current_value=current_value,
|
|
fully_evolved=False,
|
|
last_evaluated_at=None,
|
|
)
|
|
|
|
|
|
def _make_stats(player_id, team_id, season, **kwargs):
|
|
return StatsStub.create(
|
|
player_id=player_id, team_id=team_id, season=season, **kwargs
|
|
)
|
|
|
|
|
|
def _eval(player_id, team_id):
|
|
return evaluate_card(
|
|
player_id,
|
|
team_id,
|
|
_stats_model=StatsStub,
|
|
_state_model=CardStateStub,
|
|
_compute_value_fn=_compute_value,
|
|
_tier_from_value_fn=_tier_from_value,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Unit tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestTierAssignment:
|
|
"""Tier assigned from computed value against track thresholds."""
|
|
|
|
def test_value_below_t1_stays_t0(self, batter_track):
|
|
"""value=30 is below T1 threshold (37) → tier stays 0."""
|
|
_make_state(1, 1, batter_track)
|
|
# pa=30, no extra hits → value = 30 + 0 = 30 < 37
|
|
_make_stats(1, 1, 1, pa=30)
|
|
result = _eval(1, 1)
|
|
assert result["current_tier"] == 0
|
|
|
|
def test_value_at_t1_threshold_assigns_tier_1(self, batter_track):
|
|
"""value=50 → T1 (37 <= 50 < 149)."""
|
|
_make_state(1, 1, batter_track)
|
|
# pa=50, no hits → value = 50 + 0 = 50
|
|
_make_stats(1, 1, 1, pa=50)
|
|
result = _eval(1, 1)
|
|
assert result["current_tier"] == 1
|
|
|
|
def test_tier_advancement_to_t2(self, batter_track):
|
|
"""value=160 → T2 (149 <= 160 < 448)."""
|
|
_make_state(1, 1, batter_track)
|
|
# pa=160, no hits → value = 160
|
|
_make_stats(1, 1, 1, pa=160)
|
|
result = _eval(1, 1)
|
|
assert result["current_tier"] == 2
|
|
|
|
def test_partial_progress_stays_t1(self, batter_track):
|
|
"""value=100 with T2=149 → stays T1, does not advance to T2."""
|
|
_make_state(1, 1, batter_track)
|
|
# pa=100 → value = 100, T2 threshold = 149 → tier 1
|
|
_make_stats(1, 1, 1, pa=100)
|
|
result = _eval(1, 1)
|
|
assert result["current_tier"] == 1
|
|
assert result["fully_evolved"] is False
|
|
|
|
def test_fully_evolved_at_t4(self, batter_track):
|
|
"""value >= T4 (896) → tier=4 and fully_evolved=True."""
|
|
_make_state(1, 1, batter_track)
|
|
# pa=900 → value = 900 >= 896
|
|
_make_stats(1, 1, 1, pa=900)
|
|
result = _eval(1, 1)
|
|
assert result["current_tier"] == 4
|
|
assert result["fully_evolved"] is True
|
|
|
|
|
|
class TestNoRegression:
|
|
"""current_tier never decreases."""
|
|
|
|
def test_tier_never_decreases(self, batter_track):
|
|
"""If current_tier=2 and new value only warrants T1, tier stays 2."""
|
|
# Seed state at tier 2
|
|
_make_state(1, 1, batter_track, current_tier=2, current_value=160.0)
|
|
# Sparse stats: value=50 → would be T1, but current is T2
|
|
_make_stats(1, 1, 1, pa=50)
|
|
result = _eval(1, 1)
|
|
assert result["current_tier"] == 2 # no regression
|
|
|
|
def test_tier_advances_when_value_improves(self, batter_track):
|
|
"""If current_tier=1 and new value warrants T3, tier advances to 3."""
|
|
_make_state(1, 1, batter_track, current_tier=1, current_value=50.0)
|
|
# pa=500 → value = 500 >= 448 → T3
|
|
_make_stats(1, 1, 1, pa=500)
|
|
result = _eval(1, 1)
|
|
assert result["current_tier"] == 3
|
|
|
|
|
|
class TestIdempotency:
|
|
"""Calling evaluate_card twice with same stats returns the same result."""
|
|
|
|
def test_idempotent_same_result(self, batter_track):
|
|
"""Two evaluations with identical stats produce the same tier and value."""
|
|
_make_state(1, 1, batter_track)
|
|
_make_stats(1, 1, 1, pa=160)
|
|
result1 = _eval(1, 1)
|
|
result2 = _eval(1, 1)
|
|
assert result1["current_tier"] == result2["current_tier"]
|
|
assert result1["current_value"] == result2["current_value"]
|
|
assert result1["fully_evolved"] == result2["fully_evolved"]
|
|
|
|
def test_idempotent_at_fully_evolved(self, batter_track):
|
|
"""Repeated evaluation at T4 remains fully_evolved=True."""
|
|
_make_state(1, 1, batter_track)
|
|
_make_stats(1, 1, 1, pa=900)
|
|
_eval(1, 1)
|
|
result = _eval(1, 1)
|
|
assert result["current_tier"] == 4
|
|
assert result["fully_evolved"] is True
|
|
|
|
|
|
class TestCareerTotals:
|
|
"""Stats are summed across all seasons for the player/team pair."""
|
|
|
|
def test_multi_season_stats_summed(self, batter_track):
|
|
"""Stats from two seasons are aggregated into a single career total."""
|
|
_make_state(1, 1, batter_track)
|
|
# Season 1: pa=80, Season 2: pa=90 → total pa=170 → value=170 → T2
|
|
_make_stats(1, 1, 1, pa=80)
|
|
_make_stats(1, 1, 2, pa=90)
|
|
result = _eval(1, 1)
|
|
assert result["current_tier"] == 2
|
|
assert result["current_value"] == 170.0
|
|
|
|
def test_zero_stats_stays_t0(self, batter_track):
|
|
"""No stats rows → all zeros → value=0 → tier=0."""
|
|
_make_state(1, 1, batter_track)
|
|
result = _eval(1, 1)
|
|
assert result["current_tier"] == 0
|
|
assert result["current_value"] == 0.0
|
|
|
|
def test_other_team_stats_not_included(self, batter_track):
|
|
"""Stats for the same player on a different team are not counted."""
|
|
_make_state(1, 1, batter_track)
|
|
_make_stats(1, 1, 1, pa=50)
|
|
# Same player, different team — should not count
|
|
_make_stats(1, 2, 1, pa=200)
|
|
result = _eval(1, 1)
|
|
# Only pa=50 counted → value=50 → T1
|
|
assert result["current_tier"] == 1
|
|
assert result["current_value"] == 50.0
|
|
|
|
|
|
class TestFullyEvolvedPersistence:
|
|
"""T2-1: fully_evolved=True is preserved even when stats drop or are absent."""
|
|
|
|
def test_fully_evolved_persists_when_stats_zeroed(self, batter_track):
|
|
"""Card at T4/fully_evolved=True stays fully_evolved after stats are removed.
|
|
|
|
What: Set up a RefractorCardState at tier=4 with fully_evolved=True.
|
|
Then call evaluate_card with no season stats rows (zero career totals).
|
|
The evaluator computes value=0 -> new_tier=0, but current_tier must
|
|
stay at 4 (no regression) and fully_evolved must remain True.
|
|
|
|
Why: fully_evolved is a permanent achievement flag — it must not be
|
|
revoked if a team's stats are rolled back, corrected, or simply not
|
|
yet imported. The no-regression rule (max(current, new)) prevents
|
|
tier demotion; this test confirms that fully_evolved follows the same
|
|
protection.
|
|
"""
|
|
# Seed state at T4 fully_evolved
|
|
_make_state(1, 1, batter_track, current_tier=4, current_value=900.0)
|
|
# No stats rows — career totals will be all zeros
|
|
# (no _make_stats call)
|
|
|
|
result = _eval(1, 1)
|
|
|
|
# The no-regression rule keeps tier at 4
|
|
assert result["current_tier"] == 4, (
|
|
f"Expected tier=4 (no regression), got {result['current_tier']}"
|
|
)
|
|
# fully_evolved must still be True since tier >= 4
|
|
assert result["fully_evolved"] is True, (
|
|
"fully_evolved was reset to False after re-evaluation with zero stats"
|
|
)
|
|
|
|
def test_fully_evolved_persists_with_partial_stats(self, batter_track):
|
|
"""Card at T4 stays fully_evolved even with stats below T1.
|
|
|
|
What: Same setup as above but with a season stats row giving value=30
|
|
(below T1=37). The computed tier would be 0, but current_tier must
|
|
not regress from 4.
|
|
|
|
Why: Validates that no-regression applies regardless of whether stats
|
|
are zero or merely insufficient for the achieved tier.
|
|
"""
|
|
_make_state(1, 1, batter_track, current_tier=4, current_value=900.0)
|
|
# pa=30 -> value=30, which is below T1=37 -> computed tier=0
|
|
_make_stats(1, 1, 1, pa=30)
|
|
|
|
result = _eval(1, 1)
|
|
|
|
assert result["current_tier"] == 4
|
|
assert result["fully_evolved"] is True
|
|
|
|
|
|
class TestMissingState:
|
|
"""ValueError when no card state exists for (player_id, team_id)."""
|
|
|
|
def test_missing_state_raises(self, batter_track):
|
|
"""evaluate_card raises ValueError when no state row exists."""
|
|
# No card state created
|
|
with pytest.raises(ValueError, match="No refractor_card_state"):
|
|
_eval(99, 99)
|
|
|
|
|
|
class TestReturnShape:
|
|
"""Return dict has the expected keys and types."""
|
|
|
|
def test_return_keys(self, batter_track):
|
|
"""Result dict contains all expected keys."""
|
|
_make_state(1, 1, batter_track)
|
|
result = _eval(1, 1)
|
|
assert set(result.keys()) == {
|
|
"player_id",
|
|
"team_id",
|
|
"current_tier",
|
|
"current_value",
|
|
"fully_evolved",
|
|
"last_evaluated_at",
|
|
}
|
|
|
|
def test_last_evaluated_at_is_iso_string(self, batter_track):
|
|
"""last_evaluated_at is a non-empty ISO-8601 string."""
|
|
_make_state(1, 1, batter_track)
|
|
result = _eval(1, 1)
|
|
ts = result["last_evaluated_at"]
|
|
assert isinstance(ts, str) and len(ts) > 0
|
|
# Must be parseable as a datetime
|
|
datetime.fromisoformat(ts)
|