feat: Refractor Phase 2 integration — wire boost into evaluate-game

When a card reaches a new Refractor tier during game evaluation, the
system now creates a boosted variant card with modified ratings. This
connects the Phase 2 Foundation pure functions (PR #176) to the live
evaluate-game endpoint.

Key changes:
- evaluate_card() gains dry_run parameter so apply_tier_boost() is the
  sole writer of current_tier, ensuring atomicity with variant creation
- apply_tier_boost() orchestrates the full boost flow: source card
  lookup, boost application, variant card + ratings creation, audit
  record, and atomic state mutations inside db.atomic()
- evaluate_game() calls evaluate_card(dry_run=True) then loops through
  intermediate tiers on tier-up, with error isolation per player
- Display stat helpers compute fresh avg/obp/slg for variant cards
- REFRACTOR_BOOST_ENABLED env var provides a kill switch
- 51 new tests: unit tests for display stats, integration tests for
  orchestration, HTTP endpoint tests for multi-tier jumps, pitcher
  path, kill switch, atomicity, idempotency, and cross-player isolation
- Clarified all "79-sum" references to note the 108-total card invariant

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Cal Corum 2026-03-30 13:04:52 -05:00
parent 70f984392d
commit 6a176af7da
7 changed files with 2798 additions and 58 deletions

View File

@ -52,7 +52,9 @@ def _build_card_state_response(state, player_name=None) -> dict:
"current_value": state.current_value,
"fully_evolved": state.fully_evolved,
"last_evaluated_at": (
state.last_evaluated_at.isoformat() if state.last_evaluated_at else None
state.last_evaluated_at.isoformat()
if hasattr(state.last_evaluated_at, "isoformat")
else state.last_evaluated_at or None
),
"track": track_dict,
"next_threshold": next_threshold,
@ -305,7 +307,10 @@ async def evaluate_game(game_id: int, token: str = Depends(oauth2_scheme)):
logging.warning("Bad Token: [REDACTED]")
raise HTTPException(status_code=401, detail="Unauthorized")
import os
from ..db_engine import RefractorCardState, Player, StratPlay
from ..services.refractor_boost import apply_tier_boost
from ..services.refractor_evaluator import evaluate_card
plays = list(StratPlay.select().where(StratPlay.game == game_id))
@ -320,6 +325,8 @@ async def evaluate_game(game_id: int, token: str = Depends(oauth2_scheme)):
evaluated = 0
tier_ups = []
boost_enabled = os.environ.get("REFRACTOR_BOOST_ENABLED", "true").lower() != "false"
for player_id, team_id in pairs:
try:
state = RefractorCardState.get_or_none(
@ -330,11 +337,16 @@ async def evaluate_game(game_id: int, token: str = Depends(oauth2_scheme)):
continue
old_tier = state.current_tier
result = evaluate_card(player_id, team_id)
# Use dry_run=True so that current_tier is NOT written here.
# apply_tier_boost() writes current_tier + variant atomically on
# tier-up. If no tier-up occurs, apply_tier_boost is not called
# and the tier stays at old_tier (correct behaviour).
result = evaluate_card(player_id, team_id, dry_run=True)
evaluated += 1
new_tier = result.get("current_tier", old_tier)
if new_tier > old_tier:
# Use computed_tier (what the formula says) to detect tier-ups.
computed_tier = result.get("computed_tier", old_tier)
if computed_tier > old_tier:
player_name = "Unknown"
try:
p = Player.get_by_id(player_id)
@ -342,17 +354,66 @@ async def evaluate_game(game_id: int, token: str = Depends(oauth2_scheme)):
except Exception:
pass
tier_ups.append(
{
"player_id": player_id,
"team_id": team_id,
"player_name": player_name,
"old_tier": old_tier,
"new_tier": new_tier,
"current_value": result.get("current_value", 0),
"track_name": state.track.name if state.track else "Unknown",
}
)
# Phase 2: Apply rating boosts for each tier gained.
# apply_tier_boost() writes current_tier + variant atomically.
# If it fails, current_tier stays at old_tier — automatic retry next game.
boost_result = None
if not boost_enabled:
# Boost disabled via REFRACTOR_BOOST_ENABLED=false.
# Skip notification — current_tier was not written (dry_run),
# so reporting a tier-up would be a false notification.
continue
card_type = state.track.card_type if state.track else None
if card_type:
last_successful_tier = old_tier
failing_tier = old_tier + 1
try:
for tier in range(old_tier + 1, computed_tier + 1):
failing_tier = tier
boost_result = apply_tier_boost(
player_id, team_id, tier, card_type
)
last_successful_tier = tier
except Exception as boost_exc:
logger.warning(
f"Refractor boost failed for player={player_id} "
f"team={team_id} tier={failing_tier}: {boost_exc}"
)
# Report only the tiers that actually succeeded.
# If none succeeded, skip the tier_up notification entirely.
if last_successful_tier == old_tier:
continue
# At least one intermediate tier was committed; report that.
computed_tier = last_successful_tier
else:
# No card_type means no track — skip boost and skip notification.
# A false tier-up notification must not be sent when the boost
# was never applied (current_tier was never written to DB).
logger.warning(
f"Refractor boost skipped for player={player_id} "
f"team={team_id}: no card_type on track"
)
continue
tier_up_entry = {
"player_id": player_id,
"team_id": team_id,
"player_name": player_name,
"old_tier": old_tier,
"new_tier": computed_tier,
"current_value": result.get("current_value", 0),
"track_name": state.track.name if state.track else "Unknown",
}
# Non-breaking addition: include boost info when available.
if boost_result:
tier_up_entry["variant_created"] = boost_result.get(
"variant_created"
)
tier_ups.append(tier_up_entry)
except Exception as exc:
logger.warning(
f"Refractor eval failed for player={player_id} team={team_id}: {exc}"

View File

@ -1,8 +1,9 @@
"""Refractor rating boost service (Phase 2).
Pure functions for computing boosted card ratings when a player
reaches a new Refractor tier. Called by the orchestration layer
in apply_tier_boost().
reaches a new Refractor tier. The module-level 'db' variable is used by
apply_tier_boost() for atomic writes; tests patch this reference to redirect
writes to a shared-memory SQLite database.
Batter boost: fixed +0.5 to four offensive columns per tier.
Pitcher boost: 1.5 TB-budget priority algorithm per tier.
@ -13,6 +14,24 @@ import hashlib
import json
import logging
# Module-level db reference imported lazily so that this module can be
# imported before app.db_engine is fully initialised (e.g. in tests that
# patch DATABASE_TYPE before importing db_engine).
# Tests that need to redirect DB writes should patch this attribute at module
# level: `import app.services.refractor_boost as m; m.db = test_db`.
db = None
def _get_db():
"""Return the module-level db, importing lazily on first use."""
global db
if db is None:
from app.db_engine import db as _db # noqa: PLC0415
db = _db
return db
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
@ -77,7 +96,7 @@ PITCHER_PRIORITY: list[tuple[str, int]] = [
("hbp", 1),
]
# All 18 variable outcome columns that must sum to 79.
# All 18 variable outcome columns (sum to 79; x-checks add 29 for 108 total).
PITCHER_OUTCOME_COLUMNS: list[str] = [
"homerun",
"bp_homerun",
@ -313,3 +332,363 @@ def compute_variant_hash(
raw = hashlib.sha256(json.dumps(inputs, sort_keys=True).encode()).hexdigest()
result = int(raw[:8], 16)
return result if result != 0 else 1 # variant=0 is reserved
# ---------------------------------------------------------------------------
# Display stat helpers
# ---------------------------------------------------------------------------
def compute_batter_display_stats(ratings: dict) -> dict:
"""Compute avg/obp/slg from batter outcome columns.
Uses the same formulas as the BattingCardRatingsModel Pydantic validator
so that variant card display stats are always consistent with the boosted
chance values. All denominators are 108 (the full card chance total).
Args:
ratings: Dict containing at minimum all BATTER_OUTCOME_COLUMNS as
numeric (int or float) values.
Returns:
Dict with keys 'avg', 'obp', 'slg' as floats.
"""
avg = (
ratings["homerun"]
+ ratings["bp_homerun"] / 2
+ ratings["triple"]
+ ratings["double_three"]
+ ratings["double_two"]
+ ratings["double_pull"]
+ ratings["single_two"]
+ ratings["single_one"]
+ ratings["single_center"]
+ ratings["bp_single"] / 2
) / 108
obp = (ratings["hbp"] + ratings["walk"]) / 108 + avg
slg = (
ratings["homerun"] * 4
+ ratings["bp_homerun"] * 2
+ ratings["triple"] * 3
+ ratings["double_three"] * 2
+ ratings["double_two"] * 2
+ ratings["double_pull"] * 2
+ ratings["single_two"]
+ ratings["single_one"]
+ ratings["single_center"]
+ ratings["bp_single"] / 2
) / 108
return {"avg": avg, "obp": obp, "slg": slg}
def compute_pitcher_display_stats(ratings: dict) -> dict:
"""Compute avg/obp/slg from pitcher outcome columns.
Uses the same formulas as the PitchingCardRatingsModel Pydantic validator
so that variant card display stats are always consistent with the boosted
chance values. All denominators are 108 (the full card chance total).
Args:
ratings: Dict containing at minimum all PITCHER_OUTCOME_COLUMNS as
numeric (int or float) values.
Returns:
Dict with keys 'avg', 'obp', 'slg' as floats.
"""
avg = (
ratings["homerun"]
+ ratings["bp_homerun"] / 2
+ ratings["triple"]
+ ratings["double_three"]
+ ratings["double_two"]
+ ratings["double_cf"]
+ ratings["single_two"]
+ ratings["single_one"]
+ ratings["single_center"]
+ ratings["bp_single"] / 2
) / 108
obp = (ratings["hbp"] + ratings["walk"]) / 108 + avg
slg = (
ratings["homerun"] * 4
+ ratings["bp_homerun"] * 2
+ ratings["triple"] * 3
+ ratings["double_three"] * 2
+ ratings["double_two"] * 2
+ ratings["double_cf"] * 2
+ ratings["single_two"]
+ ratings["single_one"]
+ ratings["single_center"]
+ ratings["bp_single"] / 2
) / 108
return {"avg": avg, "obp": obp, "slg": slg}
# ---------------------------------------------------------------------------
# Orchestration: apply_tier_boost
# ---------------------------------------------------------------------------
def apply_tier_boost(
player_id: int,
team_id: int,
new_tier: int,
card_type: str,
_batting_card_model=None,
_batting_ratings_model=None,
_pitching_card_model=None,
_pitching_ratings_model=None,
_card_model=None,
_state_model=None,
_audit_model=None,
) -> dict:
"""Create a boosted variant card for a tier-up.
IMPORTANT: This function is the SOLE writer of current_tier on
RefractorCardState when a tier-up occurs. The evaluator computes
the new tier but does NOT write it this function writes tier +
variant + audit atomically inside a single db.atomic() block.
If this function fails, the tier stays at its old value and will
be retried on the next game evaluation.
Orchestrates the full flow (card creation outside atomic; state
mutations inside db.atomic()):
1. Determine source variant (variant=0 for T1, previous tier's hash for T2+)
2. Fetch source card and ratings rows
3. Apply boost formula (batter or pitcher) per vs_hand split
4. Assert 108-sum after boost for both batters and pitchers
5. Compute new variant hash
6. Create new card row with new variant (idempotency: skip if exists)
7. Create new ratings rows for both vs_hand splits (idempotency: skip if exists)
8. Inside db.atomic():
a. Write RefractorBoostAudit record
b. Update RefractorCardState: current_tier, variant, fully_evolved
c. Propagate variant to all Card rows for (player_id, team_id)
Args:
player_id: Player primary key.
team_id: Team primary key.
new_tier: The tier being reached (1-4).
card_type: One of 'batter', 'sp', 'rp'.
_batting_card_model: Injectable stub for BattingCard (used in tests).
_batting_ratings_model: Injectable stub for BattingCardRatings.
_pitching_card_model: Injectable stub for PitchingCard.
_pitching_ratings_model: Injectable stub for PitchingCardRatings.
_card_model: Injectable stub for Card.
_state_model: Injectable stub for RefractorCardState.
_audit_model: Injectable stub for RefractorBoostAudit.
Returns:
Dict with 'variant_created' (int) and 'boost_deltas' (per-split dict).
Raises:
ValueError: If the source card or ratings are missing, or if
RefractorCardState is not found for (player_id, team_id).
"""
# Lazy model imports — same pattern as refractor_evaluator.py.
if _batting_card_model is None:
from app.db_engine import BattingCard as _batting_card_model # noqa: PLC0415
if _batting_ratings_model is None:
from app.db_engine import BattingCardRatings as _batting_ratings_model # noqa: PLC0415
if _pitching_card_model is None:
from app.db_engine import PitchingCard as _pitching_card_model # noqa: PLC0415
if _pitching_ratings_model is None:
from app.db_engine import PitchingCardRatings as _pitching_ratings_model # noqa: PLC0415
if _card_model is None:
from app.db_engine import Card as _card_model # noqa: PLC0415
if _state_model is None:
from app.db_engine import RefractorCardState as _state_model # noqa: PLC0415
if _audit_model is None:
from app.db_engine import RefractorBoostAudit as _audit_model # noqa: PLC0415
_db = _get_db()
if card_type not in ("batter", "sp", "rp"):
raise ValueError(
f"Invalid card_type={card_type!r}; expected one of 'batter', 'sp', 'rp'"
)
is_batter = card_type == "batter"
CardModel = _batting_card_model if is_batter else _pitching_card_model
RatingsModel = _batting_ratings_model if is_batter else _pitching_ratings_model
fk_field = "battingcard" if is_batter else "pitchingcard"
# 1. Determine source variant.
if new_tier == 1:
source_variant = 0
else:
source_variant = compute_variant_hash(player_id, new_tier - 1)
# 2. Fetch source card and ratings rows.
source_card = CardModel.get_or_none(
(CardModel.player == player_id) & (CardModel.variant == source_variant)
)
if source_card is None:
raise ValueError(
f"No {'batting' if is_batter else 'pitching'}card for "
f"player={player_id} variant={source_variant}"
)
ratings_rows = list(
RatingsModel.select().where(getattr(RatingsModel, fk_field) == source_card.id)
)
if not ratings_rows:
raise ValueError(f"No ratings rows for card_id={source_card.id}")
# 3. Apply boost to each vs_hand split.
boost_fn = apply_batter_boost if is_batter else apply_pitcher_boost
outcome_cols = BATTER_OUTCOME_COLUMNS if is_batter else PITCHER_OUTCOME_COLUMNS
boosted_splits: dict[str, dict] = {}
for row in ratings_rows:
# Build the ratings dict: outcome columns + (pitcher) x-check columns.
ratings_dict: dict = {col: getattr(row, col) for col in outcome_cols}
if not is_batter:
for col in PITCHER_XCHECK_COLUMNS:
ratings_dict[col] = getattr(row, col)
boosted = boost_fn(ratings_dict)
# 4. Assert 108-sum invariant after boost (Peewee bypasses Pydantic validators).
if is_batter:
boosted_sum = sum(boosted[col] for col in BATTER_OUTCOME_COLUMNS)
else:
boosted_sum = sum(boosted[col] for col in PITCHER_OUTCOME_COLUMNS) + sum(
boosted[col] for col in PITCHER_XCHECK_COLUMNS
)
if abs(boosted_sum - 108.0) >= 0.01:
raise ValueError(
f"108-sum invariant violated after boost for player={player_id} "
f"vs_hand={row.vs_hand}: sum={boosted_sum:.6f}"
)
boosted_splits[row.vs_hand] = boosted
# 5. Compute new variant hash.
new_variant = compute_variant_hash(player_id, new_tier)
# 6. Create new card row (idempotency: skip if exists).
existing_card = CardModel.get_or_none(
(CardModel.player == player_id) & (CardModel.variant == new_variant)
)
if existing_card is not None:
new_card = existing_card
else:
if is_batter:
clone_fields = [
"steal_low",
"steal_high",
"steal_auto",
"steal_jump",
"bunting",
"hit_and_run",
"running",
"offense_col",
"hand",
]
else:
clone_fields = [
"balk",
"wild_pitch",
"hold",
"starter_rating",
"relief_rating",
"closer_rating",
"batting",
"offense_col",
"hand",
]
card_data: dict = {
"player": player_id,
"variant": new_variant,
"image_url": None, # No rendered image for variant cards yet.
}
for fname in clone_fields:
card_data[fname] = getattr(source_card, fname)
new_card = CardModel.create(**card_data)
# 7. Create new ratings rows for each split (idempotency: skip if exists).
display_stats_fn = (
compute_batter_display_stats if is_batter else compute_pitcher_display_stats
)
for vs_hand, boosted_ratings in boosted_splits.items():
existing_ratings = RatingsModel.get_or_none(
(getattr(RatingsModel, fk_field) == new_card.id)
& (RatingsModel.vs_hand == vs_hand)
)
if existing_ratings is not None:
continue # Idempotency: already written.
ratings_data: dict = {
fk_field: new_card.id,
"vs_hand": vs_hand,
}
# Outcome columns (boosted values).
ratings_data.update({col: boosted_ratings[col] for col in outcome_cols})
# X-check columns for pitchers (unchanged by boost, copy from boosted dict).
if not is_batter:
for col in PITCHER_XCHECK_COLUMNS:
ratings_data[col] = boosted_ratings[col]
# Direction rates for batters: copy from source row.
if is_batter:
source_row = next(r for r in ratings_rows if r.vs_hand == vs_hand)
for rate_col in ("pull_rate", "center_rate", "slap_rate"):
ratings_data[rate_col] = getattr(source_row, rate_col)
# Compute fresh display stats from boosted chance columns.
display_stats = display_stats_fn(boosted_ratings)
ratings_data.update(display_stats)
RatingsModel.create(**ratings_data)
# 8. Load card state — needed for atomic state mutations.
card_state = _state_model.get_or_none(
(_state_model.player == player_id) & (_state_model.team == team_id)
)
if card_state is None:
raise ValueError(
f"No refractor_card_state for player={player_id} team={team_id}"
)
# All state mutations in a single atomic block.
with _db.atomic():
# 8a. Write audit record.
# boost_delta_json stores per-split boosted values including x-check columns
# for pitchers so the full card can be reconstructed from the audit.
audit_data: dict = {
"card_state": card_state.id,
"tier": new_tier,
"variant_created": new_variant,
"boost_delta_json": json.dumps(boosted_splits, default=str),
}
if is_batter:
audit_data["battingcard"] = new_card.id
else:
audit_data["pitchingcard"] = new_card.id
_audit_model.create(**audit_data)
# 8b. Update RefractorCardState — this is the SOLE tier write on tier-up.
card_state.current_tier = new_tier
card_state.fully_evolved = new_tier >= 4
card_state.variant = new_variant
card_state.save()
# 8c. Propagate variant to all Card rows for (player_id, team_id).
_card_model.update(variant=new_variant).where(
(_card_model.player == player_id) & (_card_model.team == team_id)
).execute()
logger.debug(
"refractor_boost: applied T%s boost for player=%s team=%s variant=%s",
new_tier,
player_id,
team_id,
new_variant,
)
return {
"variant_created": new_variant,
"boost_deltas": dict(boosted_splits),
}

View File

@ -9,9 +9,20 @@ evaluate_card() is the main entry point:
4. Compare value to track thresholds to determine new_tier
5. Update card_state.current_value = computed value
6. Update card_state.current_tier = max(current_tier, new_tier) no regression
7. Update card_state.fully_evolved = (new_tier >= 4)
(SKIPPED when dry_run=True)
7. Update card_state.fully_evolved = (current_tier >= 4)
(SKIPPED when dry_run=True)
8. Update card_state.last_evaluated_at = NOW()
When dry_run=True, only steps 5 and 8 are written (current_value and
last_evaluated_at). Steps 67 (current_tier and fully_evolved) are intentionally
skipped so that the evaluate-game endpoint can detect a pending tier-up and
delegate the tier write to apply_tier_boost(), which writes tier + variant
atomically. The return dict always includes both "computed_tier" (what the
formula says the tier should be) and "computed_fully_evolved" (whether the
computed tier implies full evolution) so callers can make decisions without
reading the database again.
Idempotent: calling multiple times with the same data produces the same result.
Depends on WP-05 (RefractorCardState), WP-07 (BattingSeasonStats/PitchingSeasonStats),
@ -47,6 +58,7 @@ class _CareerTotals:
def evaluate_card(
player_id: int,
team_id: int,
dry_run: bool = False,
_stats_model=None,
_state_model=None,
_compute_value_fn=None,
@ -56,15 +68,26 @@ def evaluate_card(
Sums all BattingSeasonStats or PitchingSeasonStats rows (based on
card_type) for (player_id, team_id) across all seasons, then delegates
formula computation and tier classification to the formula engine. The result is written back to refractor_card_state and
returned as a dict.
formula computation and tier classification to the formula engine. The
result is written back to refractor_card_state and returned as a dict.
current_tier never decreases (no regression):
card_state.current_tier = max(card_state.current_tier, new_tier)
When dry_run=True, only current_value and last_evaluated_at are written
current_tier and fully_evolved are NOT updated. This allows the caller
(evaluate-game endpoint) to detect a tier-up and delegate the tier write
to apply_tier_boost(), which writes tier + variant atomically. The return
dict always includes "computed_tier" (what the formula says the tier should
be) in addition to "current_tier" (what is actually stored in the DB).
Args:
player_id: Player primary key.
team_id: Team primary key.
dry_run: When True, skip writing current_tier and fully_evolved so
that apply_tier_boost() can write them atomically with variant
creation. Defaults to False (existing behaviour for the manual
/evaluate endpoint).
_stats_model: Override for BattingSeasonStats/PitchingSeasonStats
(used in tests to inject a stub model with all stat fields).
_state_model: Override for RefractorCardState (used in tests to avoid
@ -75,8 +98,10 @@ def evaluate_card(
(used in tests).
Returns:
Dict with updated current_tier, current_value, fully_evolved,
last_evaluated_at (ISO-8601 string).
Dict with current_tier, computed_tier, current_value, fully_evolved,
last_evaluated_at (ISO-8601 string). "computed_tier" reflects what
the formula computed; "current_tier" reflects what is stored in the DB
(which may differ when dry_run=True and a tier-up is pending).
Raises:
ValueError: If no refractor_card_state row exists for (player_id, team_id).
@ -169,21 +194,30 @@ def evaluate_card(
value = _compute_value_fn(track.card_type, totals)
new_tier = _tier_from_value_fn(value, track)
# 58. Update card state (no tier regression)
# 58. Update card state.
now = datetime.now()
computed_tier = new_tier
computed_fully_evolved = computed_tier >= 4
# Always update value and timestamp; current_tier and fully_evolved are
# skipped when dry_run=True so that apply_tier_boost() can write them
# atomically with variant creation on tier-up.
card_state.current_value = value
card_state.current_tier = max(card_state.current_tier, new_tier)
card_state.fully_evolved = card_state.current_tier >= 4
card_state.last_evaluated_at = now
if not dry_run:
card_state.current_tier = max(card_state.current_tier, new_tier)
card_state.fully_evolved = card_state.current_tier >= 4
card_state.save()
logging.debug(
"refractor_eval: player=%s team=%s value=%.2f tier=%s fully_evolved=%s",
"refractor_eval: player=%s team=%s value=%.2f computed_tier=%s "
"stored_tier=%s dry_run=%s",
player_id,
team_id,
value,
computed_tier,
card_state.current_tier,
card_state.fully_evolved,
dry_run,
)
return {
@ -191,6 +225,8 @@ def evaluate_card(
"team_id": team_id,
"current_value": card_state.current_value,
"current_tier": card_state.current_tier,
"computed_tier": computed_tier,
"computed_fully_evolved": computed_fully_evolved,
"fully_evolved": card_state.fully_evolved,
"last_evaluated_at": card_state.last_evaluated_at.isoformat(),
}

View File

@ -48,35 +48,41 @@ import os
os.environ.setdefault("API_TOKEN", "test-token")
import app.services.season_stats as _season_stats_module
import app.services.refractor_boost as _refractor_boost_module
import pytest
from fastapi import FastAPI, Request
from fastapi.testclient import TestClient
from peewee import SqliteDatabase
from app.db_engine import (
BattingCard,
BattingCardRatings,
Cardset,
RefractorCardState,
RefractorCosmetic,
RefractorTierBoost,
RefractorTrack,
Decision,
Event,
MlbPlayer,
Pack,
PackType,
PitchingCard,
PitchingCardRatings,
Player,
BattingSeasonStats,
PitchingSeasonStats,
ProcessedGame,
Rarity,
RefractorBoostAudit,
RefractorCardState,
RefractorCosmetic,
RefractorTierBoost,
RefractorTrack,
Roster,
RosterSlot,
ScoutClaim,
ScoutOpportunity,
StratGame,
StratPlay,
Decision,
Team,
Card,
Event,
)
# ---------------------------------------------------------------------------
@ -111,15 +117,22 @@ _WP13_MODELS = [
BattingSeasonStats,
PitchingSeasonStats,
ProcessedGame,
BattingCard,
BattingCardRatings,
PitchingCard,
PitchingCardRatings,
RefractorTrack,
RefractorCardState,
RefractorTierBoost,
RefractorCosmetic,
RefractorBoostAudit,
]
# Patch the service-layer 'db' reference to use our shared test database so
# that db.atomic() in update_season_stats() operates on the same connection.
# Patch the service-layer 'db' references to use our shared test database so
# that db.atomic() in update_season_stats() and apply_tier_boost() operate on
# the same connection.
_season_stats_module.db = _wp13_db
_refractor_boost_module.db = _wp13_db
# ---------------------------------------------------------------------------
# Auth header used by every authenticated request
@ -323,6 +336,65 @@ def _make_state(
)
# Base batter ratings that sum to exactly 108 for use in tier advancement tests.
# apply_tier_boost() requires a base card (variant=0) with ratings rows to
# create boosted variant cards — tests that push past T1 must set this up.
_WP13_BASE_BATTER_RATINGS = {
"homerun": 3.0,
"bp_homerun": 1.0,
"triple": 0.5,
"double_three": 2.0,
"double_two": 2.0,
"double_pull": 6.0,
"single_two": 4.0,
"single_one": 12.0,
"single_center": 5.0,
"bp_single": 2.0,
"hbp": 3.0,
"walk": 7.0,
"strikeout": 15.0,
"lineout": 3.0,
"popout": 2.0,
"flyout_a": 5.0,
"flyout_bq": 4.0,
"flyout_lf_b": 3.0,
"flyout_rf_b": 9.0,
"groundout_a": 6.0,
"groundout_b": 8.0,
"groundout_c": 5.5,
}
def _make_base_batter_card(player):
"""Create a BattingCard (variant=0) with two ratings rows for apply_tier_boost()."""
card = BattingCard.create(
player=player,
variant=0,
steal_low=1,
steal_high=6,
steal_auto=False,
steal_jump=0.5,
bunting="C",
hit_and_run="B",
running=3,
offense_col=2,
hand="R",
)
for vs_hand in ("L", "R"):
BattingCardRatings.create(
battingcard=card,
vs_hand=vs_hand,
pull_rate=0.4,
center_rate=0.35,
slap_rate=0.25,
avg=0.300,
obp=0.370,
slg=0.450,
**_WP13_BASE_BATTER_RATINGS,
)
return card
# ---------------------------------------------------------------------------
# Tests: POST /api/v2/season-stats/update-game/{game_id}
# ---------------------------------------------------------------------------
@ -486,6 +558,8 @@ def test_evaluate_game_tier_advancement(client):
game = _make_game(team_a, team_b)
track = _make_track(name="WP13 Tier Adv Track")
_make_state(batter, team_a, track, current_tier=0, current_value=34.0)
# Phase 2: base card required so apply_tier_boost() can create a variant.
_make_base_batter_card(batter)
# Seed prior stats: 34 PA (value = 34; T1 threshold = 37)
BattingSeasonStats.create(
@ -567,6 +641,8 @@ def test_evaluate_game_tier_ups_in_response(client):
game = _make_game(team_a, team_b)
track = _make_track(name="WP13 Tier-Ups Track")
_make_state(batter, team_a, track, current_tier=0)
# Phase 2: base card required so apply_tier_boost() can create a variant.
_make_base_batter_card(batter)
# Seed prior stats below threshold
BattingSeasonStats.create(player=batter, team=team_a, season=10, pa=34)
@ -798,3 +874,432 @@ def test_evaluate_game_error_isolation(client, monkeypatch):
# The failing player must not appear in tier_ups
failing_ids = [tu["player_id"] for tu in data["tier_ups"]]
assert fail_player_id not in failing_ids
# ---------------------------------------------------------------------------
# Base pitcher card ratings that sum to exactly 108 for use in pitcher tier
# advancement tests.
# Variable columns (18): sum to 79.
# X-check columns (9): sum to 29.
# Total: 108.
# ---------------------------------------------------------------------------
_WP13_BASE_PITCHER_RATINGS = {
# 18 variable outcome columns (sum = 79)
"homerun": 2.0,
"bp_homerun": 1.0,
"triple": 0.5,
"double_three": 1.5,
"double_two": 2.0,
"double_cf": 2.0,
"single_two": 3.0,
"single_one": 4.0,
"single_center": 3.0,
"bp_single": 2.0,
"hbp": 1.0,
"walk": 3.0,
"strikeout": 30.0,
"flyout_lf_b": 4.0,
"flyout_cf_b": 5.0,
"flyout_rf_b": 5.0,
"groundout_a": 5.0,
"groundout_b": 5.0,
# 9 x-check columns (sum = 29)
"xcheck_p": 4.0,
"xcheck_c": 3.0,
"xcheck_1b": 3.0,
"xcheck_2b": 3.0,
"xcheck_3b": 3.0,
"xcheck_ss": 3.0,
"xcheck_lf": 3.0,
"xcheck_cf": 3.0,
"xcheck_rf": 4.0,
}
def _make_base_pitcher_card(player):
"""Create a PitchingCard (variant=0) with two ratings rows for apply_tier_boost().
Analogous to _make_base_batter_card but for pitcher cards. Ratings are
seeded from _WP13_BASE_PITCHER_RATINGS which satisfies the 108-sum invariant
required by apply_tier_boost() (18 variable cols summing to 79 plus 9
x-check cols summing to 29 = 108 total).
"""
card = PitchingCard.create(
player=player,
variant=0,
balk=1,
wild_pitch=2,
hold=3,
starter_rating=7,
relief_rating=5,
closer_rating=None,
batting=None,
offense_col=1,
hand="R",
)
for vs_hand in ("L", "R"):
PitchingCardRatings.create(
pitchingcard=card,
vs_hand=vs_hand,
avg=0.250,
obp=0.310,
slg=0.360,
**_WP13_BASE_PITCHER_RATINGS,
)
return card
# ---------------------------------------------------------------------------
# Gap 1: REFRACTOR_BOOST_ENABLED=false kill switch
# ---------------------------------------------------------------------------
def test_evaluate_game_boost_disabled_skips_tier_up(client, monkeypatch):
"""When REFRACTOR_BOOST_ENABLED=false, tier-ups are not reported even if formula says tier-up.
What: Seed a batter at tier=0 with stats above T1 (pa=34 prior + 4-PA game
pushes total to 38 > T1 threshold of 37). Set REFRACTOR_BOOST_ENABLED=false
before calling evaluate-game.
Why: The kill switch must suppress all tier-up notifications and leave
current_tier unchanged so that no variant card is created and no Discord
announcement is sent. If the kill switch is ignored the bot will announce
tier-ups during maintenance windows when card creation is deliberately
disabled.
"""
monkeypatch.setenv("REFRACTOR_BOOST_ENABLED", "false")
team_a = _make_team("BD1", gmid=20201)
team_b = _make_team("BD2", gmid=20202)
batter = _make_player("WP13 KillSwitch Batter")
pitcher = _make_player("WP13 KillSwitch Pitcher", pos="SP")
game = _make_game(team_a, team_b)
track = _make_track(name="WP13 KillSwitch Track")
_make_state(batter, team_a, track, current_tier=0, current_value=0.0)
_make_base_batter_card(batter)
# Seed prior stats just below T1
BattingSeasonStats.create(player=batter, team=team_a, season=10, pa=34)
# Game adds 4 PA — total = 38 > T1 (37)
for i in range(4):
_make_play(game, i + 1, batter, team_a, pitcher, team_b, pa=1, ab=1, outs=1)
client.post(f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER)
resp = client.post(
f"/api/v2/refractor/evaluate-game/{game.id}", headers=AUTH_HEADER
)
assert resp.status_code == 200
data = resp.json()
# Kill switch: the boost block is bypassed so apply_tier_boost() is never
# called and current_tier must remain 0 in the DB.
state = RefractorCardState.get(
(RefractorCardState.player == batter) & (RefractorCardState.team == team_a)
)
assert state.current_tier == 0
# No BattingCard variant must have been created (boost never ran).
from app.services.refractor_boost import compute_variant_hash
t1_hash = compute_variant_hash(batter.player_id, 1)
assert (
BattingCard.get_or_none(
(BattingCard.player == batter) & (BattingCard.variant == t1_hash)
)
is None
), "Variant card must not be created when boost is disabled"
# When boost is disabled, no tier_up notification is sent — the router
# skips the append entirely to prevent false notifications to the bot.
assert len(data["tier_ups"]) == 0
# ---------------------------------------------------------------------------
# Gap 4: Multi-tier jump T0 -> T2 at HTTP layer
# ---------------------------------------------------------------------------
def test_evaluate_game_multi_tier_jump(client):
"""Player with stats above T2 threshold jumps from T0 to T2 in one game.
What: Seed a batter at tier=0 with no prior stats. The game itself
provides stats in range [T2=149, T3=448).
Using pa=50, hit=50 (all singles): value = 50 + 50*2 = 150.
Why: The evaluate-game loop must iterate through each tier from old+1 to
computed_tier, calling apply_tier_boost() once per tier. A multi-tier jump
must produce variant cards for every intermediate tier and report a single
tier_up entry whose new_tier equals the highest tier reached.
The variant_created in the response must match the T2 hash (not T1), because
the last apply_tier_boost() call returns the T2 variant.
"""
from app.services.refractor_boost import compute_variant_hash
team_a = _make_team("MJ1", gmid=20211)
team_b = _make_team("MJ2", gmid=20212)
batter = _make_player("WP13 MultiJump Batter")
pitcher = _make_player("WP13 MultiJump Pitcher", pos="SP")
game = _make_game(team_a, team_b)
track = _make_track(name="WP13 MultiJump Track")
_make_state(batter, team_a, track, current_tier=0, current_value=0.0)
_make_base_batter_card(batter)
# Target value in range [T2=149, T3=448).
# formula: pa + tb*2, tb = singles + 2*doubles + 3*triples + 4*HR.
# 50 PA, 50 hits (all singles): tb = 50; value = 50 + 50*2 = 150.
# 150 >= T2 (149) and < T3 (448) so tier lands exactly at 2.
for i in range(50):
_make_play(
game, i + 1, batter, team_a, pitcher, team_b, pa=1, ab=1, hit=1, outs=0
)
client.post(f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER)
resp = client.post(
f"/api/v2/refractor/evaluate-game/{game.id}", headers=AUTH_HEADER
)
assert resp.status_code == 200
data = resp.json()
# Must have exactly one tier_up entry for this player.
assert len(data["tier_ups"]) == 1
tu = data["tier_ups"][0]
assert tu["old_tier"] == 0
assert tu["new_tier"] == 2
# The variant_created must match T2 hash (last boost iteration).
expected_t2_hash = compute_variant_hash(batter.player_id, 2)
assert tu["variant_created"] == expected_t2_hash
# Both T1 and T2 variant BattingCard rows must exist.
t1_hash = compute_variant_hash(batter.player_id, 1)
t2_hash = compute_variant_hash(batter.player_id, 2)
assert (
BattingCard.get_or_none(
(BattingCard.player == batter) & (BattingCard.variant == t1_hash)
)
is not None
), "T1 variant card missing"
assert (
BattingCard.get_or_none(
(BattingCard.player == batter) & (BattingCard.variant == t2_hash)
)
is not None
), "T2 variant card missing"
# DB state must reflect T2.
state = RefractorCardState.get(
(RefractorCardState.player == batter) & (RefractorCardState.team == team_a)
)
assert state.current_tier == 2
# ---------------------------------------------------------------------------
# Gap 5: Pitcher through evaluate-game
# ---------------------------------------------------------------------------
def test_evaluate_game_pitcher_tier_advancement(client):
"""Pitcher reaching T1 through evaluate-game creates a boosted PitchingCard variant.
What: Create a pitcher player with a PitchingCard + PitchingCardRatings
(variant=0) and a RefractorCardState on the 'sp' track. Seed
PitchingSeasonStats with outs and strikeouts just below T1 (prior season),
then add a game where the pitcher appears and records enough additional outs
to cross the threshold.
The pitcher formula is: outs/3 + strikeouts. Track thresholds are the same
(t1=37). Prior season: outs=60, strikeouts=16 -> value = 20 + 16 = 36.
Game adds 3 outs + 1 K -> career total outs=63, strikeouts=17 -> 21+17=38.
Why: Pitcher boost must follow the same evaluate-game flow as batter boost.
If card_type='sp' is not handled, the pitcher track silently skips the boost
and no tier_ups entry is emitted even when the threshold is passed.
"""
team_a = _make_team("PT1", gmid=20221)
team_b = _make_team("PT2", gmid=20222)
pitcher = _make_player("WP13 TierPitcher", pos="SP")
# We need a batter for the play records (pitcher is pitcher side).
batter = _make_player("WP13 PitcherTest Batter")
game = _make_game(team_a, team_b)
sp_track, _ = RefractorTrack.get_or_create(
name="WP13 SP Track",
defaults=dict(
card_type="sp",
formula="outs / 3 + strikeouts",
t1_threshold=37,
t2_threshold=149,
t3_threshold=448,
t4_threshold=896,
),
)
_make_state(pitcher, team_a, sp_track, current_tier=0, current_value=0.0)
_make_base_pitcher_card(pitcher)
# Prior season: outs=60, K=16 -> 60/3 + 16 = 36 (below T1=37)
PitchingSeasonStats.create(
player=pitcher,
team=team_a,
season=10,
outs=60,
strikeouts=16,
)
# Game: pitcher records 3 outs (1 inning) and 1 K.
# Career after game: outs=63, K=17 -> 63/3 + 17 = 21 + 17 = 38 > T1=37.
_make_play(
game,
1,
batter,
team_b,
pitcher,
team_a,
pa=1,
ab=1,
outs=3,
so=1,
)
client.post(f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER)
resp = client.post(
f"/api/v2/refractor/evaluate-game/{game.id}", headers=AUTH_HEADER
)
assert resp.status_code == 200
data = resp.json()
# The pitcher must appear in tier_ups.
pitcher_ups = [
tu for tu in data["tier_ups"] if tu["player_id"] == pitcher.player_id
]
assert len(pitcher_ups) == 1, (
f"Expected 1 tier_up for pitcher, got: {data['tier_ups']}"
)
tu = pitcher_ups[0]
assert tu["old_tier"] == 0
assert tu["new_tier"] >= 1
# A boosted PitchingCard variant must exist in the database.
from app.services.refractor_boost import compute_variant_hash
t1_hash = compute_variant_hash(pitcher.player_id, 1)
variant_card = PitchingCard.get_or_none(
(PitchingCard.player == pitcher) & (PitchingCard.variant == t1_hash)
)
assert variant_card is not None, "T1 PitchingCard variant was not created"
# ---------------------------------------------------------------------------
# Gap 7: variant_created field in tier_up response
# ---------------------------------------------------------------------------
def test_evaluate_game_tier_up_includes_variant_created(client):
"""Tier-up response includes variant_created with the correct hash.
What: Seed a batter at tier=0 with stats that push past T1. After
evaluate-game, the tier_ups entry must contain a 'variant_created' key
whose value matches compute_variant_hash(player_id, 1) and is a positive
non-zero integer.
Why: The bot reads variant_created to update the card image URL after a
tier-up. A missing or incorrect hash will point the bot at the wrong card
image (or no image at all), breaking the tier-up animation in Discord.
"""
from app.services.refractor_boost import compute_variant_hash
team_a = _make_team("VC1", gmid=20231)
team_b = _make_team("VC2", gmid=20232)
batter = _make_player("WP13 VariantCreated Batter")
pitcher = _make_player("WP13 VariantCreated Pitcher", pos="SP")
game = _make_game(team_a, team_b)
track = _make_track(name="WP13 VariantCreated Track")
_make_state(batter, team_a, track, current_tier=0, current_value=0.0)
_make_base_batter_card(batter)
# Prior season: pa=34, well below T1=37
BattingSeasonStats.create(player=batter, team=team_a, season=10, pa=34)
# Game: 4 PA -> total pa=38 > T1=37
for i in range(4):
_make_play(game, i + 1, batter, team_a, pitcher, team_b, pa=1, ab=1, outs=1)
client.post(f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER)
resp = client.post(
f"/api/v2/refractor/evaluate-game/{game.id}", headers=AUTH_HEADER
)
assert resp.status_code == 200
data = resp.json()
assert len(data["tier_ups"]) == 1
tu = data["tier_ups"][0]
# variant_created must be present, non-zero, and match the T1 hash.
assert "variant_created" in tu, "variant_created key missing from tier_up entry"
assert isinstance(tu["variant_created"], int)
assert tu["variant_created"] != 0
expected_hash = compute_variant_hash(batter.player_id, 1)
assert tu["variant_created"] == expected_hash
# ---------------------------------------------------------------------------
# Gap 8: Empty card_type on track produces no tier-up
# ---------------------------------------------------------------------------
def test_evaluate_game_skips_boost_when_track_has_no_card_type(client):
"""Track with empty card_type produces no tier-up notification.
What: Create a RefractorTrack with card_type="" (empty string) and seed a
batter with stats above T1. Call evaluate-game.
Why: apply_tier_boost() requires a valid card_type to know which card model
to use. When card_type is empty or None the boost cannot run. The endpoint
must log a warning and skip the tier-up notification entirely it must NOT
report a tier-up that was never applied to the database. Reporting a phantom
tier-up would cause the bot to announce a card upgrade that does not exist.
"""
team_a = _make_team("NC1", gmid=20241)
team_b = _make_team("NC2", gmid=20242)
batter = _make_player("WP13 NoCardType Batter")
pitcher = _make_player("WP13 NoCardType Pitcher", pos="SP")
game = _make_game(team_a, team_b)
# Create track with card_type="" — an intentionally invalid/empty value.
empty_type_track, _ = RefractorTrack.get_or_create(
name="WP13 NoCardType Track",
defaults=dict(
card_type="",
formula="pa + tb * 2",
t1_threshold=37,
t2_threshold=149,
t3_threshold=448,
t4_threshold=896,
),
)
_make_state(batter, team_a, empty_type_track, current_tier=0, current_value=0.0)
# Prior stats below T1; game pushes past T1.
BattingSeasonStats.create(player=batter, team=team_a, season=10, pa=34)
for i in range(4):
_make_play(game, i + 1, batter, team_a, pitcher, team_b, pa=1, ab=1, outs=1)
client.post(f"/api/v2/season-stats/update-game/{game.id}", headers=AUTH_HEADER)
resp = client.post(
f"/api/v2/refractor/evaluate-game/{game.id}", headers=AUTH_HEADER
)
assert resp.status_code == 200
data = resp.json()
# No tier-up must be reported when card_type is empty.
assert data["tier_ups"] == []
# current_tier must remain 0 — boost was never applied.
state = RefractorCardState.get(
(RefractorCardState.player == batter) & (RefractorCardState.team == team_a)
)
assert state.current_tier == 0

View File

@ -12,6 +12,8 @@ from app.services.refractor_boost import (
apply_batter_boost,
apply_pitcher_boost,
compute_variant_hash,
compute_batter_display_stats,
compute_pitcher_display_stats,
BATTER_OUTCOME_COLUMNS,
PITCHER_OUTCOME_COLUMNS,
PITCHER_PRIORITY,
@ -126,11 +128,11 @@ def _singles_pitcher_vl():
"""Gibson 2020 vL — contact/groundball SP with typical distribution.
Variable outcome columns (18) sum to 79. X-check columns sum to 29.
Full card sums to 108. double_cf=2.95 so the priority algorithm will
start there before moving to singles.
Full card sums to 108 (79 variable + 29 x-checks). double_cf=2.95 so
the priority algorithm will start there before moving to singles.
"""
return {
# Variable columns (sum=79)
# Variable columns (79 of 108; x-checks add 29)
"homerun": 3.3,
"bp_homerun": 2.0,
"triple": 0.75,
@ -167,11 +169,11 @@ def _no_doubles_pitcher():
All three double columns (double_cf, double_three, double_two) are 0.0,
so the algorithm must skip past them and start reducing singles.
Variable columns sum to 79; x-checks sum to 29; full card sums to 108.
Variable columns sum to 79 (79 of 108; x-checks add 29); full card sums to 108.
groundout_b raised to 10.5 to bring variable sum from 77.0 to 79.0.
"""
return {
# Variable columns (sum=79)
# Variable columns (79 of 108; x-checks add 29)
"homerun": 2.0,
"bp_homerun": 1.0,
"triple": 0.5,
@ -511,47 +513,50 @@ class TestBatterBoostTruncation:
class TestPitcherBoost108Sum:
"""Verify the pitcher card invariant: 18 variable columns sum to 79,
x-checks sum to 29, total is 108 all after every boost.
x-checks sum to 29, full card total is 108 all after every boost.
"""
def test_singles_heavy_pitcher(self):
"""Gibson-like pitcher (double_cf=2.95, no other doubles) maintains
79-sum for the 18 variable columns after one boost.
the 108-sum card invariant after one boost (variable subset stays at 79).
What: Boost the Gibson vL card once. Assert that the 18 variable
columns (PITCHER_OUTCOME_COLUMNS) still sum to 79.
columns (PITCHER_OUTCOME_COLUMNS) still sum to 79 (the variable-column
subset of the 108-total card).
Why: The pitcher boost algorithm converts hit/walk chances to
strikeouts without changing the total number of outcomes. If any
chance is created or destroyed, the 79-sum breaks and game simulation
results become unreliable.
chance is created or destroyed, the variable subset drifts from 79,
breaking the 108-sum card invariant and making game simulation
results unreliable.
"""
result = apply_pitcher_boost(_singles_pitcher_vl())
assert _pitcher_var_sum(result) == pytest.approx(79.0, abs=1e-6)
def test_no_doubles_pitcher(self):
"""Pitcher with all three double columns at 0 skips them, reduces
singles instead, and the 18-column variable sum stays at 79.
singles instead, and the 18-column variable sum stays at 79 (of 108 total).
What: Boost the no-doubles pitcher fixture once. The priority list
starts with double_cf, double_three, double_two all zero so the
algorithm must skip them and begin consuming single_center. The
79-sum must be preserved.
variable-column subset (79 of 108) must be preserved.
Why: Validates the zero-skip logic in the priority loop. Without it,
the algorithm would incorrectly deduct from a 0-value column, producing
a negative entry and an invalid 79-sum.
a negative entry and violating the 108-sum card invariant.
"""
result = apply_pitcher_boost(_no_doubles_pitcher())
assert _pitcher_var_sum(result) == pytest.approx(79.0, abs=1e-6)
def test_cumulative_four_tiers(self):
"""Four successive boosts: sum==79 after each tier; no column is negative.
"""Four successive boosts: variable-column sum==79 (of 108 total) after
each tier; no column is negative.
What: Apply four boosts in sequence to the Gibson vL card (highest
number of reducible doubles/singles available). After each boost,
assert that the 18-column sum is 79.0 and that no individual column
went negative.
assert that the 18-column variable sum is 79.0 and that no individual
column went negative.
Why: Cumulative boost scenarios are the real production use case.
Float drift and edge cases in the priority budget loop could silently
@ -563,7 +568,7 @@ class TestPitcherBoost108Sum:
card = apply_pitcher_boost(card)
total = _pitcher_var_sum(card)
assert total == pytest.approx(79.0, abs=1e-6), (
f"79-sum drifted to {total} after tier {tier}"
f"Variable-column sum (79 of 108) drifted to {total} after tier {tier}"
)
for col in PITCHER_OUTCOME_COLUMNS:
assert card[col] >= -1e-9, (
@ -743,8 +748,8 @@ class TestPitcherBoostZeroSkip:
Why: This is the absolute edge of the zero-skip path. Without the
`if remaining > 0: logger.warning(...)` guard the unspent budget would
be silently discarded. More importantly, no column should be modified:
the 79-sum must be preserved, strikeout must not change, and every
priority column must still be exactly 0.0.
the variable-column subset (79 of 108 total) must be preserved,
strikeout must not change, and every priority column must still be 0.0.
"""
priority_cols = {col for col, _ in PITCHER_PRIORITY}
card = {col: 0.0 for col in PITCHER_OUTCOME_COLUMNS}
@ -764,7 +769,7 @@ class TestPitcherBoostZeroSkip:
result = apply_pitcher_boost(card)
# Variable columns still sum to 79
# Variable columns still sum to 79 (79 of 108 total; x-checks unchanged at 29)
assert _pitcher_var_sum(result) == pytest.approx(79.0, abs=1e-6)
# Strikeout is unchanged — nothing was moved into it
assert result["strikeout"] == pytest.approx(before_strikeout, abs=1e-9)
@ -904,3 +909,271 @@ class TestVariantHash:
f"Cosmetics order affected hash: ['foil','chrome']={h1}, "
f"['chrome','foil']={h2}"
)
# ---------------------------------------------------------------------------
# Batter display stats
# ---------------------------------------------------------------------------
def _zeroed_batter(outs: float = 108.0) -> dict:
"""Return a batter dict where all hit/walk/hbp columns are 0 and all
chances are absorbed by groundout_c.
The 22-column sum is exactly 108.0 by construction. Helper used to build
clean minimal cards that isolate individual formula terms.
"""
card = {col: 0.0 for col in BATTER_OUTCOME_COLUMNS}
card["groundout_c"] = outs
return card
class TestBatterDisplayStats:
"""Unit tests for compute_batter_display_stats(ratings) -> dict.
All tests call the function with plain dicts (no DB, no fixtures).
The function is pure: same input always produces the same output.
Formula under test (denominator is always 108):
avg = (HR + bp_HR/2 + triple + dbl_3 + dbl_2 + dbl_pull
+ sgl_2 + sgl_1 + sgl_ctr + bp_sgl/2) / 108
obp = avg + (hbp + walk) / 108
slg = (HR*4 + bp_HR*2 + triple*3 + dbl_3*2 + dbl_2*2 + dbl_pull*2
+ sgl_2 + sgl_1 + sgl_ctr + bp_sgl/2) / 108
"""
def test_avg_reflects_hit_chances(self):
"""homerun=9.0 alone (rest in outs summing to 108) yields avg == 9/108.
What: Build a card where only homerun is non-zero among the hit columns;
all 108 chances are accounted for (9 HR + 99 groundout_c). Assert that
avg equals 9.0/108.
Why: Verifies that the homerun column enters the avg numerator at full
weight (coefficient 1.0) and that the denominator is 108.
"""
card = _zeroed_batter(99.0)
card["homerun"] = 9.0
result = compute_batter_display_stats(card)
assert result["avg"] == pytest.approx(9.0 / 108, abs=1e-6)
def test_bp_homerun_half_weighted_in_avg(self):
"""bp_homerun=6.0 contributes only 3.0/108 to avg (half weight).
What: Card with bp_homerun=6.0, rest in outs. Assert avg == 3.0/108.
Why: Ballpark home runs are treated as weaker contact events the
formula halves their contribution to batting average. Getting the
coefficient wrong (using 1.0 instead of 0.5) would inflate avg for
cards with significant bp_homerun values.
"""
card = _zeroed_batter(102.0)
card["bp_homerun"] = 6.0
result = compute_batter_display_stats(card)
assert result["avg"] == pytest.approx(3.0 / 108, abs=1e-6)
def test_bp_single_half_weighted_in_avg(self):
"""bp_single=8.0 contributes only 4.0/108 to avg (half weight).
What: Card with bp_single=8.0, rest in outs. Assert avg == 4.0/108.
Why: Ballpark singles are similarly half-weighted. Confirms the /2
divisor is applied to bp_single in the avg numerator.
"""
card = _zeroed_batter(100.0)
card["bp_single"] = 8.0
result = compute_batter_display_stats(card)
assert result["avg"] == pytest.approx(4.0 / 108, abs=1e-6)
def test_obp_adds_hbp_and_walk_on_top_of_avg(self):
"""obp == avg + (hbp + walk) / 108 when homerun=9, hbp=9, walk=9.
What: Card with homerun=9, hbp=9, walk=9, rest in outs (81 chances).
Compute avg first (9/108), then verify obp == avg + 18/108.
Why: OBP extends AVG by counting on-base events that are not hits.
If hbp or walk were inadvertently included in the avg numerator, obp
would double-count them. This test confirms they are added only once,
outside the avg sub-expression.
"""
card = _zeroed_batter(81.0)
card["homerun"] = 9.0
card["hbp"] = 9.0
card["walk"] = 9.0
result = compute_batter_display_stats(card)
expected_avg = 9.0 / 108
expected_obp = expected_avg + 18.0 / 108
assert result["avg"] == pytest.approx(expected_avg, abs=1e-6)
assert result["obp"] == pytest.approx(expected_obp, abs=1e-6)
def test_slg_uses_correct_weights(self):
"""SLG numerator: HR*4 + triple*3 + double_pull*2 + single_one*1.
What: Card with homerun=4, triple=3, double_pull=2, single_one=1 (and
98 outs to sum to 108). Assert slg == (4*4 + 3*3 + 2*2 + 1*1) / 108
== 30/108.
Why: Each extra-base hit type carries a different base-advancement
weight in SLG. Any coefficient error (e.g. treating a triple as a
double) would systematically understate or overstate slugging for
power hitters.
"""
card = _zeroed_batter(98.0)
card["homerun"] = 4.0
card["triple"] = 3.0
card["double_pull"] = 2.0
card["single_one"] = 1.0
result = compute_batter_display_stats(card)
expected_slg = (4 * 4 + 3 * 3 + 2 * 2 + 1 * 1) / 108
assert result["slg"] == pytest.approx(expected_slg, abs=1e-6)
def test_all_zeros_returns_zeros(self):
"""Card with all hit/walk/hbp columns set to 0 produces avg=obp=slg=0.
What: Build a card where the 22 outcome columns sum to 108 but every
hit, walk, and hbp column is 0 (all chances in groundout_c). Assert
that avg, obp, and slg are all 0.
Why: Verifies the function does not produce NaN or raise on a degenerate
all-out card and that the zero numerator path returns clean zeros.
"""
card = _zeroed_batter(108.0)
result = compute_batter_display_stats(card)
assert result["avg"] == pytest.approx(0.0, abs=1e-6)
assert result["obp"] == pytest.approx(0.0, abs=1e-6)
assert result["slg"] == pytest.approx(0.0, abs=1e-6)
def test_matches_known_card(self):
"""Display stats for the silver batter fixture are internally consistent.
What: Pass the _silver_batter_vr() fixture dict to
compute_batter_display_stats and verify that avg > 0, obp > avg, and
slg > avg the expected ordering for any hitter with positive hit and
extra-base-hit chances.
Why: Confirms the function produces the correct relative ordering on a
realistic card. Absolute values are not hard-coded here because the
fixture is designed for boost tests, not display-stat tests; relative
ordering is sufficient to detect sign errors or column swaps.
"""
result = compute_batter_display_stats(_silver_batter_vr())
assert result["avg"] > 0
assert result["obp"] > result["avg"]
assert result["slg"] > result["avg"]
# ---------------------------------------------------------------------------
# Pitcher display stats
# ---------------------------------------------------------------------------
def _zeroed_pitcher(strikeout: float = 79.0) -> dict:
"""Return a pitcher dict where all hit/walk/hbp columns are 0 and all
variable chances are in strikeout.
The 18 PITCHER_OUTCOME_COLUMNS sum to 79 by construction. X-check
columns are not included because compute_pitcher_display_stats only reads
the hit/walk columns from PITCHER_OUTCOME_COLUMNS.
"""
card = {col: 0.0 for col in PITCHER_OUTCOME_COLUMNS}
card["strikeout"] = strikeout
return card
class TestPitcherDisplayStats:
"""Unit tests for compute_pitcher_display_stats(ratings) -> dict.
The pitcher formula mirrors the batter formula except that double_pull is
replaced by double_cf (the pitcher-specific double column). All other hit
columns are identical.
Formula under test (denominator is always 108):
avg = (HR + bp_HR/2 + triple + dbl_3 + dbl_2 + dbl_cf
+ sgl_2 + sgl_1 + sgl_ctr + bp_sgl/2) / 108
obp = avg + (hbp + walk) / 108
slg = (HR*4 + bp_HR*2 + triple*3 + dbl_3*2 + dbl_2*2 + dbl_cf*2
+ sgl_2 + sgl_1 + sgl_ctr + bp_sgl/2) / 108
"""
def test_pitcher_uses_double_cf_not_double_pull(self):
"""double_cf=6.0 contributes 6.0/108 to pitcher avg; double_pull is absent.
What: Card with double_cf=6.0 and strikeout=73.0 (sum=79). Assert avg
== 6.0/108.
Why: The pitcher formula uses double_cf instead of double_pull (which
does not exist on pitching cards). If the implementation accidentally
reads double_pull from a pitcher dict it would raise a KeyError or
silently read 0, producing a wrong avg.
"""
card = _zeroed_pitcher(73.0)
card["double_cf"] = 6.0
result = compute_pitcher_display_stats(card)
assert result["avg"] == pytest.approx(6.0 / 108, abs=1e-6)
def test_pitcher_slg_double_cf_costs_2(self):
"""double_cf=6.0 alone contributes 6.0*2/108 to pitcher slg.
What: Same card as above (double_cf=6.0, all else 0). Assert slg
== 12.0/108.
Why: Doubles carry a weight of 2 in SLG (two total bases). Verifies
that the coefficient is correctly applied to double_cf in the slg
formula.
"""
card = _zeroed_pitcher(73.0)
card["double_cf"] = 6.0
result = compute_pitcher_display_stats(card)
assert result["slg"] == pytest.approx(12.0 / 108, abs=1e-6)
def test_pitcher_bp_homerun_half_weighted(self):
"""bp_homerun=4.0 contributes only 2.0/108 to pitcher avg (half weight).
What: Card with bp_homerun=4.0 and strikeout=75.0. Assert avg == 2.0/108.
Why: Mirrors the batter bp_homerun test the half-weight rule applies
to both card types. Confirms the /2 divisor is present in the pitcher
formula.
"""
card = _zeroed_pitcher(75.0)
card["bp_homerun"] = 4.0
result = compute_pitcher_display_stats(card)
assert result["avg"] == pytest.approx(2.0 / 108, abs=1e-6)
def test_pitcher_obp_formula_matches_batter(self):
"""obp == avg + (hbp + walk) / 108, identical structure to batter formula.
What: Build a pitcher card with homerun=6, hbp=6, walk=6 (strikeout=61
to reach variable sum of 79). Compute avg = 6/108, then assert obp ==
avg + 12/108.
Why: The obp addend (hbp + walk) / 108 must be present and correct on
pitcher cards, exactly as it is for batters. A formula that
accidentally omits hbp or walk from pitcher obp would understate on-base
percentage for walks-heavy pitchers.
"""
card = _zeroed_pitcher(61.0)
card["homerun"] = 6.0
card["hbp"] = 6.0
card["walk"] = 6.0
result = compute_pitcher_display_stats(card)
expected_avg = 6.0 / 108
expected_obp = expected_avg + 12.0 / 108
assert result["avg"] == pytest.approx(expected_avg, abs=1e-6)
assert result["obp"] == pytest.approx(expected_obp, abs=1e-6)
def test_matches_known_pitcher_card(self):
"""Display stats for the Gibson vL fixture are internally consistent.
What: Pass the _singles_pitcher_vl() fixture dict to
compute_pitcher_display_stats and verify avg > 0, obp > avg, slg > avg.
Why: The Gibson card has both hit and walk columns, so the correct
relative ordering (obp > avg, slg > avg) must hold. This confirms
the function works end-to-end on a realistic pitcher card rather than
a minimal synthetic one.
"""
result = compute_pitcher_display_stats(_singles_pitcher_vl())
assert result["avg"] > 0
assert result["obp"] > result["avg"]
assert result["slg"] > result["avg"]

File diff suppressed because it is too large Load Diff

View File

@ -187,10 +187,11 @@ def _make_stats(player_id, team_id, season, **kwargs):
)
def _eval(player_id, team_id):
def _eval(player_id, team_id, dry_run: bool = False):
return evaluate_card(
player_id,
team_id,
dry_run=dry_run,
_stats_model=StatsStub,
_state_model=CardStateStub,
_compute_value_fn=_compute_value,
@ -392,13 +393,20 @@ class TestReturnShape:
"""Return dict has the expected keys and types."""
def test_return_keys(self, batter_track):
"""Result dict contains all expected keys."""
"""Result dict contains all expected keys.
Phase 2 addition: 'computed_tier' is included alongside 'current_tier'
so that evaluate-game can detect tier-ups without writing the tier
(dry_run=True path). Both keys must always be present.
"""
_make_state(1, 1, batter_track)
result = _eval(1, 1)
assert set(result.keys()) == {
"player_id",
"team_id",
"current_tier",
"computed_tier",
"computed_fully_evolved",
"current_value",
"fully_evolved",
"last_evaluated_at",
@ -621,3 +629,176 @@ class TestMultiTeamStatIsolation:
assert result_team2["current_tier"] == 2, (
f"Team 2 tier should be T2 for value=180, got {result_team2['current_tier']}"
)
class TestDryRun:
"""dry_run=True writes current_value and last_evaluated_at but NOT current_tier
or fully_evolved, allowing apply_tier_boost() to write tier + variant atomically.
All tests use stats that would produce a tier-up (value=160 T2) on a card
seeded at tier=0, so the delta between dry and non-dry behaviour is obvious.
Stub thresholds (batter): T1=37, T2=149, T3=448, T4=896.
value=160 T2 (149 <= 160 < 448); starting current_tier=0 tier-up to T2.
"""
def test_dry_run_does_not_write_current_tier(self, batter_track):
"""dry_run=True leaves current_tier unchanged in the database.
What: Seed a card at tier=0. Provide stats that would advance to T2
(value=160). Call evaluate_card with dry_run=True. Re-read the DB row
and assert current_tier is still 0.
Why: The dry_run path must not persist the tier so that apply_tier_boost()
can write tier + variant atomically on the next step. If current_tier
were written here, a boost failure would leave the tier advanced with no
corresponding variant, causing an inconsistent state.
"""
_make_state(1, 1, batter_track, current_tier=0)
_make_stats(1, 1, 1, pa=160)
_eval(1, 1, dry_run=True)
reloaded = CardStateStub.get(
(CardStateStub.player_id == 1) & (CardStateStub.team_id == 1)
)
assert reloaded.current_tier == 0, (
f"dry_run should not write current_tier; expected 0, got {reloaded.current_tier}"
)
def test_dry_run_does_not_write_fully_evolved(self, batter_track):
"""dry_run=True leaves fully_evolved=False unchanged in the database.
What: Seed a card at tier=0 with fully_evolved=False. Provide stats that
would push to T4 (value=900). Call evaluate_card with dry_run=True.
Re-read the DB row and assert fully_evolved is still False.
Why: fully_evolved follows current_tier and must be written atomically
by apply_tier_boost(). Writing it here would let the flag get out of
sync with the tier if the boost subsequently fails.
"""
_make_state(1, 1, batter_track, current_tier=0)
_make_stats(1, 1, 1, pa=900) # value=900 → T4 → fully_evolved=True normally
_eval(1, 1, dry_run=True)
reloaded = CardStateStub.get(
(CardStateStub.player_id == 1) & (CardStateStub.team_id == 1)
)
assert reloaded.fully_evolved is False, (
"dry_run should not write fully_evolved; expected False, "
f"got {reloaded.fully_evolved}"
)
def test_dry_run_writes_current_value(self, batter_track):
"""dry_run=True DOES update current_value in the database.
What: Seed a card with current_value=0. Provide stats giving value=160.
Call evaluate_card with dry_run=True. Re-read the DB row and assert
current_value has been updated to 160.0.
Why: current_value tracks formula progress and is safe to write
at any time it does not affect game logic atomicity, so it is
always persisted regardless of dry_run.
"""
_make_state(1, 1, batter_track, current_value=0.0)
_make_stats(1, 1, 1, pa=160)
_eval(1, 1, dry_run=True)
reloaded = CardStateStub.get(
(CardStateStub.player_id == 1) & (CardStateStub.team_id == 1)
)
assert reloaded.current_value == 160.0, (
f"dry_run should still write current_value; expected 160.0, "
f"got {reloaded.current_value}"
)
def test_dry_run_writes_last_evaluated_at(self, batter_track):
"""dry_run=True DOES update last_evaluated_at in the database.
What: Seed a card with last_evaluated_at=None. Call evaluate_card with
dry_run=True. Re-read the DB row and assert last_evaluated_at is now a
non-None datetime.
Why: last_evaluated_at is a bookkeeping field used for scheduling and
audit purposes. It is safe to update independently of tier writes
and should always reflect the most recent evaluation attempt.
"""
_make_state(1, 1, batter_track)
_make_stats(1, 1, 1, pa=160)
_eval(1, 1, dry_run=True)
reloaded = CardStateStub.get(
(CardStateStub.player_id == 1) & (CardStateStub.team_id == 1)
)
assert reloaded.last_evaluated_at is not None, (
"dry_run should still write last_evaluated_at; got None"
)
def test_dry_run_returns_computed_tier(self, batter_track):
"""dry_run=True return dict has computed_tier=T2 while current_tier stays 0.
What: Seed at tier=0. Stats value=160 T2. Call dry_run=True.
Assert:
- result["computed_tier"] == 2 (what the formula says)
- result["current_tier"] == 0 (what is stored; unchanged)
Why: Callers use the divergence between computed_tier and current_tier
to detect a pending tier-up. Both keys must be present and correct for
the evaluate-game endpoint to gate apply_tier_boost() correctly.
"""
_make_state(1, 1, batter_track, current_tier=0)
_make_stats(1, 1, 1, pa=160)
result = _eval(1, 1, dry_run=True)
assert result["computed_tier"] == 2, (
f"computed_tier should reflect formula result T2; got {result['computed_tier']}"
)
assert result["current_tier"] == 0, (
f"current_tier should reflect unchanged DB value 0; got {result['current_tier']}"
)
def test_dry_run_returns_computed_fully_evolved(self, batter_track):
"""dry_run=True sets computed_fully_evolved correctly in the return dict.
What: Two sub-cases:
- Stats value=160 T2: computed_fully_evolved should be False.
- Stats value=900 T4: computed_fully_evolved should be True.
In both cases fully_evolved in the DB remains False (tier not written).
Why: computed_fully_evolved lets callers know whether the pending tier-up
will result in a fully-evolved card without having to re-query the DB
or recalculate the tier themselves. It must match (computed_tier >= 4),
not the stored fully_evolved value.
"""
# Sub-case 1: computed T2 → computed_fully_evolved=False
_make_state(1, 1, batter_track, current_tier=0)
_make_stats(1, 1, 1, pa=160)
result = _eval(1, 1, dry_run=True)
assert result["computed_fully_evolved"] is False, (
f"computed_fully_evolved should be False for T2; got {result['computed_fully_evolved']}"
)
assert result["fully_evolved"] is False, (
"stored fully_evolved should remain False after dry_run"
)
# Reset for sub-case 2: computed T4 → computed_fully_evolved=True
CardStateStub.delete().execute()
StatsStub.delete().execute()
_make_state(1, 1, batter_track, current_tier=0)
_make_stats(1, 1, 1, pa=900) # value=900 → T4
result2 = _eval(1, 1, dry_run=True)
assert result2["computed_fully_evolved"] is True, (
f"computed_fully_evolved should be True for T4; got {result2['computed_fully_evolved']}"
)
assert result2["fully_evolved"] is False, (
"stored fully_evolved should remain False after dry_run even at T4"
)