From 1b4eab9d99d2e9b9bfb36dc5e0fb13c667907908 Mon Sep 17 00:00:00 2001 From: Cal Corum Date: Thu, 19 Mar 2026 10:17:13 -0500 Subject: [PATCH] refactor: replace incremental delta upserts with full recalculation in season stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous approach accumulated per-game deltas into season stats rows, which was fragile — partial processing corrupted stats, upsert bugs compounded, and there was no self-healing mechanism. Now update_season_stats() recomputes full season totals from all StratPlay rows for each affected player whenever a game is processed. The result replaces whatever was stored, eliminating double-counting and enabling self-healing via force=True. Also fixes: - evolution_evaluator.py: broken PlayerSeasonStats import → queries BattingSeasonStats or PitchingSeasonStats based on card_type - evolution_evaluator.py: r.k → r.strikeouts - test_evolution_models.py, test_postgame_evolution.py: PlayerSeasonStats → BattingSeasonStats (model never existed) Co-Authored-By: Claude Opus 4.6 (1M context) --- app/routers_v2/season_stats.py | 16 +- app/services/evolution_evaluator.py | 72 ++- app/services/season_stats.py | 766 ++++++++++++---------------- tests/test_evolution_evaluator.py | 2 +- tests/test_evolution_models.py | 38 +- tests/test_postgame_evolution.py | 28 +- tests/test_season_stats_update.py | 246 ++++++++- 7 files changed, 643 insertions(+), 525 deletions(-) diff --git a/app/routers_v2/season_stats.py b/app/routers_v2/season_stats.py index 91ee76e..90c0b8e 100644 --- a/app/routers_v2/season_stats.py +++ b/app/routers_v2/season_stats.py @@ -23,18 +23,24 @@ logger = logging.getLogger(__name__) @router.post("/update-game/{game_id}") -async def update_game_season_stats(game_id: int, token: str = Depends(oauth2_scheme)): - """Increment season stats with batting and pitching deltas from a game. +async def update_game_season_stats( + game_id: int, force: bool = False, token: str = Depends(oauth2_scheme) +): + """Recalculate season stats from all StratPlay and Decision rows for a game. - Calls update_season_stats(game_id) from the service layer which: + Calls update_season_stats(game_id, force=force) from the service layer which: - Aggregates all StratPlay rows by (player_id, team_id, season) - Merges Decision rows into pitching groups - Performs an additive ON CONFLICT upsert into player_season_stats - Guards against double-counting via the last_game FK check + Query params: + - force: if true, bypasses the idempotency guard and reprocesses a + previously seen game_id (useful for correcting stats after data fixes) + Response: {"updated": N, "skipped": false} - N: total player_season_stats rows upserted (batters + pitchers) - - skipped: true when this game_id was already processed (idempotent re-delivery) + - skipped: true when this game_id was already processed and force=false Errors from the service are logged but re-raised as 500 so the bot knows to retry. @@ -46,7 +52,7 @@ async def update_game_season_stats(game_id: int, token: str = Depends(oauth2_sch from ..services.season_stats import update_season_stats try: - result = update_season_stats(game_id) + result = update_season_stats(game_id, force=force) except Exception as exc: logger.error("update-game/%d failed: %s", game_id, exc, exc_info=True) raise HTTPException( diff --git a/app/services/evolution_evaluator.py b/app/services/evolution_evaluator.py index 230345c..81dab5a 100644 --- a/app/services/evolution_evaluator.py +++ b/app/services/evolution_evaluator.py @@ -81,9 +81,6 @@ def evaluate_card( Raises: ValueError: If no evolution_card_state row exists for (player_id, team_id). """ - if _stats_model is None: - from app.db_engine import PlayerSeasonStats as _stats_model # noqa: PLC0415 - if _state_model is None: from app.db_engine import EvolutionCardState as _state_model # noqa: PLC0415 @@ -107,22 +104,63 @@ def evaluate_card( f"No evolution_card_state for player_id={player_id} team_id={team_id}" ) - # 2. Load career totals: SUM all player_season_stats rows for (player_id, team_id) - rows = list( - _stats_model.select().where( - (_stats_model.player_id == player_id) & (_stats_model.team_id == team_id) + # 2. Load career totals from the appropriate season stats table + if _stats_model is not None: + # Test override: use the injected stub model for all fields + rows = list( + _stats_model.select().where( + (_stats_model.player_id == player_id) + & (_stats_model.team_id == team_id) + ) ) - ) + totals = _CareerTotals( + pa=sum(r.pa for r in rows), + hits=sum(r.hits for r in rows), + doubles=sum(r.doubles for r in rows), + triples=sum(r.triples for r in rows), + hr=sum(r.hr for r in rows), + outs=sum(r.outs for r in rows), + strikeouts=sum(r.strikeouts for r in rows), + ) + else: + from app.db_engine import ( + BattingSeasonStats, + PitchingSeasonStats, + ) # noqa: PLC0415 - totals = _CareerTotals( - pa=sum(r.pa for r in rows), - hits=sum(r.hits for r in rows), - doubles=sum(r.doubles for r in rows), - triples=sum(r.triples for r in rows), - hr=sum(r.hr for r in rows), - outs=sum(r.outs for r in rows), - strikeouts=sum(r.k for r in rows), - ) + card_type = card_state.track.card_type + if card_type == "batter": + rows = list( + BattingSeasonStats.select().where( + (BattingSeasonStats.player == player_id) + & (BattingSeasonStats.team == team_id) + ) + ) + totals = _CareerTotals( + pa=sum(r.pa for r in rows), + hits=sum(r.hits for r in rows), + doubles=sum(r.doubles for r in rows), + triples=sum(r.triples for r in rows), + hr=sum(r.hr for r in rows), + outs=0, + strikeouts=sum(r.strikeouts for r in rows), + ) + else: + rows = list( + PitchingSeasonStats.select().where( + (PitchingSeasonStats.player == player_id) + & (PitchingSeasonStats.team == team_id) + ) + ) + totals = _CareerTotals( + pa=0, + hits=0, + doubles=0, + triples=0, + hr=0, + outs=sum(r.outs for r in rows), + strikeouts=sum(r.strikeouts for r in rows), + ) # 3. Determine track track = card_state.track diff --git a/app/services/season_stats.py b/app/services/season_stats.py index c37deae..ba82e8f 100644 --- a/app/services/season_stats.py +++ b/app/services/season_stats.py @@ -1,27 +1,32 @@ """ -season_stats.py — Incremental BattingSeasonStats and PitchingSeasonStats update logic. +season_stats.py — Full-recalculation BattingSeasonStats and PitchingSeasonStats update logic. -Called once per completed StratGame to accumulate batting and pitching -statistics into the batting_season_stats and pitching_season_stats tables -respectively. +Called once per completed StratGame to recompute the full season batting and +pitching statistics for every player who appeared in that game, then write +those totals to the batting_season_stats and pitching_season_stats tables. -Idempotency: re-delivery of a game (including out-of-order re-delivery) -is detected via an atomic INSERT into the ProcessedGame ledger table -keyed on game_id. The first call for a given game_id succeeds; all -subsequent calls return early with "skipped": True without modifying -any stats rows. +Unlike the previous incremental (delta) approach, each call recomputes totals +from scratch by aggregating all StratPlay rows for the player+team+season +triple. This eliminates double-counting on re-delivery and makes every row a +faithful snapshot of the full season to date. -Peewee upsert strategy: -- SQLite: read-modify-write inside db.atomic() transaction -- PostgreSQL: ON CONFLICT ... DO UPDATE with column-level EXCLUDED increments +Idempotency: re-delivery of a game is detected via the ProcessedGame ledger +table, keyed on game_id. +- First call: records the ledger entry and proceeds with recalculation. +- Subsequent calls without force=True: return early with "skipped": True. +- force=True: skips the early-return check and recalculates anyway (useful + for correcting data after retroactive stat adjustments). + +Upsert strategy: get_or_create + field assignment + save(). Because we are +writing the full recomputed total rather than adding a delta, there is no +risk of concurrent-write skew between games. A single unified path works for +both SQLite and PostgreSQL. """ import logging -import os -from collections import defaultdict from datetime import datetime -from peewee import EXCLUDED +from peewee import Case, fn from app.db_engine import ( db, @@ -35,464 +40,297 @@ from app.db_engine import ( logger = logging.getLogger(__name__) -DATABASE_TYPE = os.environ.get("DATABASE_TYPE", "sqlite").lower() - -def _build_batting_groups(plays): +def _get_player_pairs(game_id: int) -> tuple[set, set]: """ - Aggregate per-play batting stats by (batter_id, batter_team_id). + Return the sets of (player_id, team_id) pairs that appeared in the game. - Only plays where pa > 0 are counted toward games, but all - play-level stat fields are accumulated regardless of pa value so - that rare edge cases (e.g. sac bunt without official PA) are - correctly included in the totals. + Queries StratPlay for all rows belonging to game_id and extracts: + - batting_pairs: set of (batter_id, batter_team_id), excluding rows where + batter_id is None (e.g. automatic outs, walk-off plays without a PA). + - pitching_pairs: set of (pitcher_id, pitcher_team_id) for all plays + (pitcher is always present). - Returns a dict keyed by (batter_id, batter_team_id) with stat dicts - matching BattingSeasonStats column names. + Args: + game_id: Primary key of the StratGame to query. + + Returns: + Tuple of (batting_pairs, pitching_pairs) where each element is a set + of (int, int) tuples. """ - groups = defaultdict( - lambda: { - "games": 0, - "pa": 0, - "ab": 0, - "hits": 0, - "doubles": 0, - "triples": 0, - "hr": 0, - "rbi": 0, - "runs": 0, - "bb": 0, - "strikeouts": 0, - "hbp": 0, - "sac": 0, - "ibb": 0, - "gidp": 0, - "sb": 0, - "cs": 0, - "appeared": False, # tracks whether batter appeared at all in this game - } + plays = ( + StratPlay.select( + StratPlay.batter, + StratPlay.batter_team, + StratPlay.pitcher, + StratPlay.pitcher_team, + ) + .where(StratPlay.game == game_id) + .tuples() ) - for play in plays: - batter_id = play.batter_id - batter_team_id = play.batter_team_id + batting_pairs: set[tuple[int, int]] = set() + pitching_pairs: set[tuple[int, int]] = set() - if batter_id is None: - continue + for batter_id, batter_team_id, pitcher_id, pitcher_team_id in plays: + if batter_id is not None: + batting_pairs.add((batter_id, batter_team_id)) + pitching_pairs.add((pitcher_id, pitcher_team_id)) - key = (batter_id, batter_team_id) - g = groups[key] - - g["pa"] += play.pa - g["ab"] += play.ab - g["hits"] += play.hit - g["doubles"] += play.double - g["triples"] += play.triple - g["hr"] += play.homerun - g["rbi"] += play.rbi - g["runs"] += play.run - g["bb"] += play.bb - g["strikeouts"] += play.so - g["hbp"] += play.hbp - g["sac"] += play.sac - g["ibb"] += play.ibb - g["gidp"] += play.gidp - g["sb"] += play.sb - g["cs"] += play.cs - - if play.pa > 0 and not g["appeared"]: - g["games"] = 1 - g["appeared"] = True - - # Clean up the helper flag before returning - for key in groups: - del groups[key]["appeared"] - - return groups + return batting_pairs, pitching_pairs -def _build_pitching_groups(plays): +def _recalc_batting(player_id: int, team_id: int, season: int) -> dict: """ - Aggregate per-play pitching stats by (pitcher_id, pitcher_team_id). + Recompute full-season batting totals for a player+team+season triple. - Stats on StratPlay are recorded from the batter's perspective, so - when accumulating pitcher stats we collect: - - outs → pitcher outs recorded (directly on play) - - so → strikeouts (batter's so = pitcher's strikeouts) - - hit → hits allowed - - bb → walks allowed (batter bb, separate from hbp) - - hbp → hit batters - - homerun → home runs allowed + Aggregates every StratPlay row where batter == player_id and + batter_team == team_id across all games in the given season. - games counts unique pitchers who appeared (at least one play as - pitcher), capped at 1 per game since this function processes a - single game. games_started is populated later via _apply_decisions(). + games counts only games where the player had at least one official PA + (pa > 0). The COUNT(DISTINCT ...) with a CASE expression achieves this: + NULL values from the CASE are ignored by COUNT, so only game IDs where + pa > 0 contribute. - Fields not available from StratPlay (runs_allowed, earned_runs, - wild_pitches, balks) default to 0 and are not incremented. + Args: + player_id: FK to the player record. + team_id: FK to the team record. + season: Integer season year. - Returns a dict keyed by (pitcher_id, pitcher_team_id) with stat dicts - matching PitchingSeasonStats column names. + Returns: + Dict with keys matching BattingSeasonStats columns; all values are + native Python ints (defaulting to 0 if no rows matched). """ - groups = defaultdict( - lambda: { - "games": 1, # pitcher appeared in this game by definition - "games_started": 0, # populated later via _apply_decisions - "outs": 0, - "strikeouts": 0, - "bb": 0, - "hits_allowed": 0, - "runs_allowed": 0, # not available from StratPlay - "earned_runs": 0, # not available from StratPlay - "hr_allowed": 0, - "hbp": 0, - "wild_pitches": 0, # not available from StratPlay - "balks": 0, # not available from StratPlay - "wins": 0, - "losses": 0, - "holds": 0, - "saves": 0, - "blown_saves": 0, - } + row = ( + StratPlay.select( + fn.COUNT( + fn.DISTINCT(Case(None, [(StratPlay.pa > 0, StratPlay.game)], None)) + ).alias("games"), + fn.SUM(StratPlay.pa).alias("pa"), + fn.SUM(StratPlay.ab).alias("ab"), + fn.SUM(StratPlay.hit).alias("hits"), + fn.SUM(StratPlay.double).alias("doubles"), + fn.SUM(StratPlay.triple).alias("triples"), + fn.SUM(StratPlay.homerun).alias("hr"), + fn.SUM(StratPlay.rbi).alias("rbi"), + fn.SUM(StratPlay.run).alias("runs"), + fn.SUM(StratPlay.bb).alias("bb"), + fn.SUM(StratPlay.so).alias("strikeouts"), + fn.SUM(StratPlay.hbp).alias("hbp"), + fn.SUM(StratPlay.sac).alias("sac"), + fn.SUM(StratPlay.ibb).alias("ibb"), + fn.SUM(StratPlay.gidp).alias("gidp"), + fn.SUM(StratPlay.sb).alias("sb"), + fn.SUM(StratPlay.cs).alias("cs"), + ) + .join(StratGame, on=(StratPlay.game == StratGame.id)) + .where( + StratPlay.batter == player_id, + StratPlay.batter_team == team_id, + StratGame.season == season, + ) + .dicts() + .first() ) - for play in plays: - pitcher_id = play.pitcher_id - pitcher_team_id = play.pitcher_team_id + if row is None: + row = {} - if pitcher_id is None: - continue - - key = (pitcher_id, pitcher_team_id) - g = groups[key] - - g["outs"] += play.outs - g["strikeouts"] += play.so - g["hits_allowed"] += play.hit - g["bb"] += play.bb - g["hbp"] += play.hbp - g["hr_allowed"] += play.homerun - - return groups + return { + "games": row.get("games") or 0, + "pa": row.get("pa") or 0, + "ab": row.get("ab") or 0, + "hits": row.get("hits") or 0, + "doubles": row.get("doubles") or 0, + "triples": row.get("triples") or 0, + "hr": row.get("hr") or 0, + "rbi": row.get("rbi") or 0, + "runs": row.get("runs") or 0, + "bb": row.get("bb") or 0, + "strikeouts": row.get("strikeouts") or 0, + "hbp": row.get("hbp") or 0, + "sac": row.get("sac") or 0, + "ibb": row.get("ibb") or 0, + "gidp": row.get("gidp") or 0, + "sb": row.get("sb") or 0, + "cs": row.get("cs") or 0, + } -def _apply_decisions(pitching_groups, decisions): +def _recalc_pitching(player_id: int, team_id: int, season: int) -> dict: """ - Merge Decision rows into the pitching stat groups. + Recompute full-season pitching totals for a player+team+season triple. - Each Decision belongs to exactly one pitcher in the game, containing - win/loss/save/hold/blown-save flags and the is_start indicator. + Aggregates every StratPlay row where pitcher == player_id and + pitcher_team == team_id across all games in the given season. games counts + all distinct games in which the pitcher appeared (any play qualifies). + + Stats derived from StratPlay (from the batter-perspective columns): + - outs = SUM(outs) + - strikeouts = SUM(so) — batter SO = pitcher K + - hits_allowed = SUM(hit) + - bb = SUM(bb) — walks allowed + - hbp = SUM(hbp) + - hr_allowed = SUM(homerun) + - wild_pitches = SUM(wild_pitch) + - balks = SUM(balk) + + Fields not available from StratPlay (runs_allowed, earned_runs) default + to 0. Decision-level fields (wins, losses, etc.) are populated separately + by _recalc_decisions() and merged in the caller. + + Args: + player_id: FK to the player record. + team_id: FK to the team record. + season: Integer season year. + + Returns: + Dict with keys matching PitchingSeasonStats columns (excluding + decision fields, which are filled by _recalc_decisions). """ - for decision in decisions: - pitcher_id = decision.pitcher_id - pitcher_team_id = decision.pitcher_team_id - key = (pitcher_id, pitcher_team_id) - - # Pitcher may have a Decision without plays (rare edge case for - # games where the Decision was recorded without StratPlay rows). - # Initialise a zeroed entry if not already present. - if key not in pitching_groups: - pitching_groups[key] = { - "games": 1, - "games_started": 0, - "outs": 0, - "strikeouts": 0, - "bb": 0, - "hits_allowed": 0, - "runs_allowed": 0, - "earned_runs": 0, - "hr_allowed": 0, - "hbp": 0, - "wild_pitches": 0, - "balks": 0, - "wins": 0, - "losses": 0, - "holds": 0, - "saves": 0, - "blown_saves": 0, - } - - g = pitching_groups[key] - g["wins"] += decision.win - g["losses"] += decision.loss - g["saves"] += decision.is_save - g["holds"] += decision.hold - g["blown_saves"] += decision.b_save - g["games_started"] += 1 if decision.is_start else 0 - - -def _upsert_batting_postgres(player_id, team_id, season, game_id, batting): - """ - PostgreSQL upsert for BattingSeasonStats using ON CONFLICT ... DO UPDATE. - Each stat column is incremented by the EXCLUDED (incoming) value, - ensuring concurrent games don't overwrite each other. - """ - now = datetime.now() - - increment_cols = [ - "games", - "pa", - "ab", - "hits", - "doubles", - "triples", - "hr", - "rbi", - "runs", - "bb", - "strikeouts", - "hbp", - "sac", - "ibb", - "gidp", - "sb", - "cs", - ] - - conflict_target = [ - BattingSeasonStats.player, - BattingSeasonStats.team, - BattingSeasonStats.season, - ] - - update_dict = {} - for col in increment_cols: - field_obj = getattr(BattingSeasonStats, col) - update_dict[field_obj] = field_obj + EXCLUDED[col] - update_dict[BattingSeasonStats.last_game] = EXCLUDED["last_game_id"] - update_dict[BattingSeasonStats.last_updated_at] = EXCLUDED["last_updated_at"] - - BattingSeasonStats.insert( - player=player_id, - team=team_id, - season=season, - games=batting.get("games", 0), - pa=batting.get("pa", 0), - ab=batting.get("ab", 0), - hits=batting.get("hits", 0), - doubles=batting.get("doubles", 0), - triples=batting.get("triples", 0), - hr=batting.get("hr", 0), - rbi=batting.get("rbi", 0), - runs=batting.get("runs", 0), - bb=batting.get("bb", 0), - strikeouts=batting.get("strikeouts", 0), - hbp=batting.get("hbp", 0), - sac=batting.get("sac", 0), - ibb=batting.get("ibb", 0), - gidp=batting.get("gidp", 0), - sb=batting.get("sb", 0), - cs=batting.get("cs", 0), - last_game=game_id, - last_updated_at=now, - ).on_conflict( - conflict_target=conflict_target, - action="update", - update=update_dict, - ).execute() - - -def _upsert_pitching_postgres(player_id, team_id, season, game_id, pitching): - """ - PostgreSQL upsert for PitchingSeasonStats using ON CONFLICT ... DO UPDATE. - Each stat column is incremented by the EXCLUDED (incoming) value, - ensuring concurrent games don't overwrite each other. - """ - now = datetime.now() - - increment_cols = [ - "games", - "games_started", - "outs", - "strikeouts", - "bb", - "hits_allowed", - "runs_allowed", - "earned_runs", - "hr_allowed", - "hbp", - "wild_pitches", - "balks", - "wins", - "losses", - "holds", - "saves", - "blown_saves", - ] - - conflict_target = [ - PitchingSeasonStats.player, - PitchingSeasonStats.team, - PitchingSeasonStats.season, - ] - - update_dict = {} - for col in increment_cols: - field_obj = getattr(PitchingSeasonStats, col) - update_dict[field_obj] = field_obj + EXCLUDED[col] - update_dict[PitchingSeasonStats.last_game] = EXCLUDED["last_game_id"] - update_dict[PitchingSeasonStats.last_updated_at] = EXCLUDED["last_updated_at"] - - PitchingSeasonStats.insert( - player=player_id, - team=team_id, - season=season, - games=pitching.get("games", 0), - games_started=pitching.get("games_started", 0), - outs=pitching.get("outs", 0), - strikeouts=pitching.get("strikeouts", 0), - bb=pitching.get("bb", 0), - hits_allowed=pitching.get("hits_allowed", 0), - runs_allowed=pitching.get("runs_allowed", 0), - earned_runs=pitching.get("earned_runs", 0), - hr_allowed=pitching.get("hr_allowed", 0), - hbp=pitching.get("hbp", 0), - wild_pitches=pitching.get("wild_pitches", 0), - balks=pitching.get("balks", 0), - wins=pitching.get("wins", 0), - losses=pitching.get("losses", 0), - holds=pitching.get("holds", 0), - saves=pitching.get("saves", 0), - blown_saves=pitching.get("blown_saves", 0), - last_game=game_id, - last_updated_at=now, - ).on_conflict( - conflict_target=conflict_target, - action="update", - update=update_dict, - ).execute() - - -def _upsert_batting_sqlite(player_id, team_id, season, game_id, batting): - """ - SQLite upsert for BattingSeasonStats: read-modify-write inside the outer atomic() block. - - SQLite doesn't support EXCLUDED-based increments via Peewee's - on_conflict(), so we use get_or_create + field-level addition. - This is safe because the entire update_season_stats() call is - wrapped in db.atomic(). - """ - now = datetime.now() - - obj, _ = BattingSeasonStats.get_or_create( - player_id=player_id, - team_id=team_id, - season=season, + row = ( + StratPlay.select( + fn.COUNT(fn.DISTINCT(StratPlay.game)).alias("games"), + fn.SUM(StratPlay.outs).alias("outs"), + fn.SUM(StratPlay.so).alias("strikeouts"), + fn.SUM(StratPlay.hit).alias("hits_allowed"), + fn.SUM(StratPlay.bb).alias("bb"), + fn.SUM(StratPlay.hbp).alias("hbp"), + fn.SUM(StratPlay.homerun).alias("hr_allowed"), + fn.SUM(StratPlay.wild_pitch).alias("wild_pitches"), + fn.SUM(StratPlay.balk).alias("balks"), + ) + .join(StratGame, on=(StratPlay.game == StratGame.id)) + .where( + StratPlay.pitcher == player_id, + StratPlay.pitcher_team == team_id, + StratGame.season == season, + ) + .dicts() + .first() ) - obj.games += batting.get("games", 0) - obj.pa += batting.get("pa", 0) - obj.ab += batting.get("ab", 0) - obj.hits += batting.get("hits", 0) - obj.doubles += batting.get("doubles", 0) - obj.triples += batting.get("triples", 0) - obj.hr += batting.get("hr", 0) - obj.rbi += batting.get("rbi", 0) - obj.runs += batting.get("runs", 0) - obj.bb += batting.get("bb", 0) - obj.strikeouts += batting.get("strikeouts", 0) - obj.hbp += batting.get("hbp", 0) - obj.sac += batting.get("sac", 0) - obj.ibb += batting.get("ibb", 0) - obj.gidp += batting.get("gidp", 0) - obj.sb += batting.get("sb", 0) - obj.cs += batting.get("cs", 0) + if row is None: + row = {} - obj.last_game_id = game_id - obj.last_updated_at = now - obj.save() + return { + "games": row.get("games") or 0, + "outs": row.get("outs") or 0, + "strikeouts": row.get("strikeouts") or 0, + "hits_allowed": row.get("hits_allowed") or 0, + "bb": row.get("bb") or 0, + "hbp": row.get("hbp") or 0, + "hr_allowed": row.get("hr_allowed") or 0, + "wild_pitches": row.get("wild_pitches") or 0, + "balks": row.get("balks") or 0, + # Not available from play-by-play data + "runs_allowed": 0, + "earned_runs": 0, + } -def _upsert_pitching_sqlite(player_id, team_id, season, game_id, pitching): +def _recalc_decisions(player_id: int, team_id: int, season: int) -> dict: """ - SQLite upsert for PitchingSeasonStats: read-modify-write inside the outer atomic() block. + Recompute full-season decision totals for a pitcher+team+season triple. - SQLite doesn't support EXCLUDED-based increments via Peewee's - on_conflict(), so we use get_or_create + field-level addition. - This is safe because the entire update_season_stats() call is - wrapped in db.atomic(). + Aggregates all Decision rows for the pitcher across the season. Decision + rows are keyed by (pitcher, pitcher_team, season) independently of the + StratPlay table, so this query is separate from _recalc_pitching(). + + Decision.is_start is a BooleanField; CAST to INTEGER before summing to + ensure correct arithmetic across SQLite (True/False) and PostgreSQL + (boolean). + + Args: + player_id: FK to the player record (pitcher). + team_id: FK to the team record. + season: Integer season year. + + Returns: + Dict with keys: wins, losses, holds, saves, blown_saves, + games_started. All values are native Python ints. """ - now = datetime.now() - - obj, _ = PitchingSeasonStats.get_or_create( - player_id=player_id, - team_id=team_id, - season=season, + row = ( + Decision.select( + fn.SUM(Decision.win).alias("wins"), + fn.SUM(Decision.loss).alias("losses"), + fn.SUM(Decision.hold).alias("holds"), + fn.SUM(Decision.is_save).alias("saves"), + fn.SUM(Decision.b_save).alias("blown_saves"), + fn.SUM(Decision.is_start.cast("INTEGER")).alias("games_started"), + ) + .where( + Decision.pitcher == player_id, + Decision.pitcher_team == team_id, + Decision.season == season, + ) + .dicts() + .first() ) - obj.games += pitching.get("games", 0) - obj.games_started += pitching.get("games_started", 0) - obj.outs += pitching.get("outs", 0) - obj.strikeouts += pitching.get("strikeouts", 0) - obj.bb += pitching.get("bb", 0) - obj.hits_allowed += pitching.get("hits_allowed", 0) - obj.runs_allowed += pitching.get("runs_allowed", 0) - obj.earned_runs += pitching.get("earned_runs", 0) - obj.hr_allowed += pitching.get("hr_allowed", 0) - obj.hbp += pitching.get("hbp", 0) - obj.wild_pitches += pitching.get("wild_pitches", 0) - obj.balks += pitching.get("balks", 0) - obj.wins += pitching.get("wins", 0) - obj.losses += pitching.get("losses", 0) - obj.holds += pitching.get("holds", 0) - obj.saves += pitching.get("saves", 0) - obj.blown_saves += pitching.get("blown_saves", 0) + if row is None: + row = {} - obj.last_game_id = game_id - obj.last_updated_at = now - obj.save() + return { + "wins": row.get("wins") or 0, + "losses": row.get("losses") or 0, + "holds": row.get("holds") or 0, + "saves": row.get("saves") or 0, + "blown_saves": row.get("blown_saves") or 0, + "games_started": row.get("games_started") or 0, + } -def update_season_stats(game_id: int) -> dict: +def update_season_stats(game_id: int, force: bool = False) -> dict: """ - Accumulate per-game batting and pitching stats into BattingSeasonStats - and PitchingSeasonStats respectively. + Recompute full-season batting and pitching stats for every player in the game. - This function is safe to call exactly once per game. Idempotency is - enforced via an atomic INSERT into the ProcessedGame ledger table. - The first call for a given game_id succeeds and returns full results; - any subsequent call (including out-of-order re-delivery after a later - game has been processed) finds the existing row and returns early with - "skipped": True without touching any stats rows. + Unlike the previous incremental approach, this function recalculates each + player's season totals from scratch by querying all StratPlay rows for + the player+team+season triple. The resulting totals replace whatever was + previously stored — no additive delta is applied. Algorithm: 1. Fetch StratGame to get the season. - 2. Atomic INSERT into ProcessedGame — if the row already exists, - return early (skipped). - 3. Collect all StratPlay rows for the game. - 4. Group batting stats by (batter_id, batter_team_id). - 5. Group pitching stats by (pitcher_id, pitcher_team_id). - 6. Merge Decision rows into pitching groups. - 7. Upsert each batter into BattingSeasonStats using either: - - PostgreSQL: atomic SQL increment via ON CONFLICT DO UPDATE - - SQLite: read-modify-write inside a transaction - 8. Upsert each pitcher into PitchingSeasonStats using the same strategy. + 2. Check the ProcessedGame ledger: + - If already processed and force=False, return early (skipped=True). + - If already processed and force=True, continue (overwrite allowed). + - If not yet processed, create the ledger entry. + 3. Determine (player_id, team_id) pairs via _get_player_pairs(). + 4. For each batting pair: recompute season totals, then get_or_create + BattingSeasonStats and overwrite all fields. + 5. For each pitching pair: recompute season play totals and decision + totals, merge, then get_or_create PitchingSeasonStats and overwrite + all fields. Args: game_id: Primary key of the StratGame to process. + force: If True, re-process even if the game was previously recorded + in the ProcessedGame ledger. Useful for correcting stats after + retroactive data adjustments. Returns: - Summary dict with keys: game_id, season, batters_updated, - pitchers_updated. If the game was already processed, also - includes "skipped": True. + Dict with keys: + game_id — echoed back + season — season integer from StratGame + batters_updated — number of BattingSeasonStats rows written + pitchers_updated — number of PitchingSeasonStats rows written + skipped — True only when the game was already processed + and force=False; absent otherwise. Raises: StratGame.DoesNotExist: If no StratGame row matches game_id. """ - logger.info("update_season_stats: starting for game_id=%d", game_id) + logger.info("update_season_stats: starting for game_id=%d force=%s", game_id, force) - # Step 1 — Fetch the game to get season game = StratGame.get_by_id(game_id) season = game.season with db.atomic(): - # Step 2 — Full idempotency via ProcessedGame ledger. - # Atomic INSERT: if the row already exists (same game_id), get_or_create - # returns created=False and we skip. This handles same-game immediate - # replay AND out-of-order re-delivery (game G re-delivered after G+1 - # was already processed). + # Idempotency check via ProcessedGame ledger. _, created = ProcessedGame.get_or_create(game_id=game_id) - if not created: + + if not created and not force: logger.info( "update_season_stats: game_id=%d already processed, skipping", game_id, @@ -505,41 +343,85 @@ def update_season_stats(game_id: int) -> dict: "skipped": True, } - # Step 3 — Load plays - plays = list(StratPlay.select().where(StratPlay.game == game_id)) + if not created and force: + logger.info( + "update_season_stats: game_id=%d already processed, force=True — recalculating", + game_id, + ) + + batting_pairs, pitching_pairs = _get_player_pairs(game_id) logger.debug( - "update_season_stats: game_id=%d loaded %d plays", game_id, len(plays) + "update_season_stats: game_id=%d found %d batting pairs, %d pitching pairs", + game_id, + len(batting_pairs), + len(pitching_pairs), ) - # Steps 4 & 5 — Aggregate batting and pitching groups - batting_groups = _build_batting_groups(plays) - pitching_groups = _build_pitching_groups(plays) + now = datetime.now() - # Step 6 — Merge Decision rows into pitching groups - decisions = list(Decision.select().where(Decision.game == game_id)) - _apply_decisions(pitching_groups, decisions) - - upsert_batting = ( - _upsert_batting_postgres - if DATABASE_TYPE == "postgresql" - else _upsert_batting_sqlite - ) - upsert_pitching = ( - _upsert_pitching_postgres - if DATABASE_TYPE == "postgresql" - else _upsert_pitching_sqlite - ) - - # Step 7 — Upsert batting rows into BattingSeasonStats + # Recompute and overwrite batting season stats for each batter. batters_updated = 0 - for (player_id, team_id), batting in batting_groups.items(): - upsert_batting(player_id, team_id, season, game_id, batting) + for player_id, team_id in batting_pairs: + stats = _recalc_batting(player_id, team_id, season) + + obj, _ = BattingSeasonStats.get_or_create( + player_id=player_id, + team_id=team_id, + season=season, + ) + obj.games = stats["games"] + obj.pa = stats["pa"] + obj.ab = stats["ab"] + obj.hits = stats["hits"] + obj.doubles = stats["doubles"] + obj.triples = stats["triples"] + obj.hr = stats["hr"] + obj.rbi = stats["rbi"] + obj.runs = stats["runs"] + obj.bb = stats["bb"] + obj.strikeouts = stats["strikeouts"] + obj.hbp = stats["hbp"] + obj.sac = stats["sac"] + obj.ibb = stats["ibb"] + obj.gidp = stats["gidp"] + obj.sb = stats["sb"] + obj.cs = stats["cs"] + obj.last_game_id = game_id + obj.last_updated_at = now + obj.save() batters_updated += 1 - # Step 8 — Upsert pitching rows into PitchingSeasonStats + # Recompute and overwrite pitching season stats for each pitcher. pitchers_updated = 0 - for (player_id, team_id), pitching in pitching_groups.items(): - upsert_pitching(player_id, team_id, season, game_id, pitching) + for player_id, team_id in pitching_pairs: + play_stats = _recalc_pitching(player_id, team_id, season) + decision_stats = _recalc_decisions(player_id, team_id, season) + + obj, _ = PitchingSeasonStats.get_or_create( + player_id=player_id, + team_id=team_id, + season=season, + ) + obj.games = play_stats["games"] + obj.games_started = decision_stats["games_started"] + obj.outs = play_stats["outs"] + obj.strikeouts = play_stats["strikeouts"] + obj.bb = play_stats["bb"] + obj.hits_allowed = play_stats["hits_allowed"] + obj.runs_allowed = play_stats["runs_allowed"] + obj.earned_runs = play_stats["earned_runs"] + obj.hr_allowed = play_stats["hr_allowed"] + obj.hbp = play_stats["hbp"] + obj.wild_pitches = play_stats["wild_pitches"] + obj.balks = play_stats["balks"] + obj.wins = decision_stats["wins"] + obj.losses = decision_stats["losses"] + obj.holds = decision_stats["holds"] + obj.saves = decision_stats["saves"] + obj.blown_saves = decision_stats["blown_saves"] + obj.last_game_id = game_id + obj.last_updated_at = now + obj.save() pitchers_updated += 1 logger.info( diff --git a/tests/test_evolution_evaluator.py b/tests/test_evolution_evaluator.py index a4f2fac..abbefdf 100644 --- a/tests/test_evolution_evaluator.py +++ b/tests/test_evolution_evaluator.py @@ -85,7 +85,7 @@ class StatsStub(Model): triples = IntegerField(default=0) hr = IntegerField(default=0) outs = IntegerField(default=0) - k = IntegerField(default=0) + strikeouts = IntegerField(default=0) class Meta: database = _test_db diff --git a/tests/test_evolution_models.py b/tests/test_evolution_models.py index 189fa46..4479b9f 100644 --- a/tests/test_evolution_models.py +++ b/tests/test_evolution_models.py @@ -20,7 +20,7 @@ from peewee import IntegrityError from playhouse.shortcuts import model_to_dict from app.db_engine import ( - PlayerSeasonStats, + BattingSeasonStats, EvolutionCardState, EvolutionCosmetic, EvolutionTierBoost, @@ -248,13 +248,13 @@ class TestEvolutionCosmetic: # --------------------------------------------------------------------------- -class TestPlayerSeasonStats: - """Tests for BattingSeasonStats, the per-season accumulation table. +class TestBattingSeasonStats: + """Tests for BattingSeasonStats, the per-season batting accumulation table. - Each row aggregates game-by-game batting and pitching stats for one - player on one team in one season. The three-column unique constraint - prevents double-counting and ensures a single authoritative row for - each (player, team, season) combination. + Each row aggregates game-by-game batting stats for one player on one + team in one season. The three-column unique constraint prevents + double-counting and ensures a single authoritative row for each + (player, team, season) combination. """ def test_create_season_stats(self, player, team): @@ -264,11 +264,11 @@ class TestPlayerSeasonStats: are not provided, which is the initial state before any games are processed. """ - stats = PlayerSeasonStats.create( + stats = BattingSeasonStats.create( player=player, team=team, season=11, - games_batting=5, + games=5, pa=20, ab=18, hits=6, @@ -277,25 +277,21 @@ class TestPlayerSeasonStats: hr=2, bb=2, hbp=0, - so=4, + strikeouts=4, rbi=5, runs=3, sb=1, cs=0, ) - fetched = PlayerSeasonStats.get_by_id(stats.id) + fetched = BattingSeasonStats.get_by_id(stats.id) assert fetched.player_id == player.player_id assert fetched.team_id == team.id assert fetched.season == 11 - assert fetched.games_batting == 5 + assert fetched.games == 5 assert fetched.pa == 20 assert fetched.hits == 6 assert fetched.hr == 2 - # Pitching fields were not set — confirm default zero values - assert fetched.games_pitching == 0 - assert fetched.outs == 0 - assert fetched.wins == 0 - assert fetched.saves == 0 + assert fetched.strikeouts == 4 # Nullable meta fields assert fetched.last_game is None assert fetched.last_updated_at is None @@ -307,9 +303,9 @@ class TestPlayerSeasonStats: player-team-season combination has exactly one accumulation row, preventing duplicate stat aggregation that would inflate totals. """ - PlayerSeasonStats.create(player=player, team=team, season=11) + BattingSeasonStats.create(player=player, team=team, season=11) with pytest.raises(IntegrityError): - PlayerSeasonStats.create(player=player, team=team, season=11) + BattingSeasonStats.create(player=player, team=team, season=11) def test_season_stats_increment(self, player, team): """Manually incrementing hits on an existing row persists the change. @@ -319,7 +315,7 @@ class TestPlayerSeasonStats: writes back to the database and that subsequent reads reflect the updated value. """ - stats = PlayerSeasonStats.create( + stats = BattingSeasonStats.create( player=player, team=team, season=11, @@ -328,5 +324,5 @@ class TestPlayerSeasonStats: stats.hits += 3 stats.save() - refreshed = PlayerSeasonStats.get_by_id(stats.id) + refreshed = BattingSeasonStats.get_by_id(stats.id) assert refreshed.hits == 13 diff --git a/tests/test_postgame_evolution.py b/tests/test_postgame_evolution.py index b5f5d1e..21671e8 100644 --- a/tests/test_postgame_evolution.py +++ b/tests/test_postgame_evolution.py @@ -63,7 +63,9 @@ from app.db_engine import ( Pack, PackType, Player, - PlayerSeasonStats, + BattingSeasonStats, + PitchingSeasonStats, + ProcessedGame, Rarity, Roster, RosterSlot, @@ -106,7 +108,9 @@ _WP13_MODELS = [ Decision, ScoutOpportunity, ScoutClaim, - PlayerSeasonStats, + BattingSeasonStats, + PitchingSeasonStats, + ProcessedGame, EvolutionTrack, EvolutionCardState, EvolutionTierBoost, @@ -328,7 +332,7 @@ def test_update_game_creates_season_stats_rows(client): """POST update-game creates player_season_stats rows for players in the game. What: Set up a batter and pitcher in a game with 3 PA for the batter. - After the endpoint call, assert a PlayerSeasonStats row exists with pa=3. + After the endpoint call, assert a BattingSeasonStats row exists with pa=3. Why: This is the core write path. If the row is not created, the evolution evaluator will always see zero career stats. @@ -347,10 +351,10 @@ def test_update_game_creates_season_stats_rows(client): ) assert resp.status_code == 200 - stats = PlayerSeasonStats.get_or_none( - (PlayerSeasonStats.player == batter) - & (PlayerSeasonStats.team == team_a) - & (PlayerSeasonStats.season == 11) + stats = BattingSeasonStats.get_or_none( + (BattingSeasonStats.player == batter) + & (BattingSeasonStats.team == team_a) + & (BattingSeasonStats.season == 11) ) assert stats is not None assert stats.pa == 3 @@ -417,8 +421,8 @@ def test_update_game_idempotent(client): assert data2["skipped"] is True assert data2["updated"] == 0 - stats = PlayerSeasonStats.get( - (PlayerSeasonStats.player == batter) & (PlayerSeasonStats.team == team_a) + stats = BattingSeasonStats.get( + (BattingSeasonStats.player == batter) & (BattingSeasonStats.team == team_a) ) assert stats.pa == 3 # not 6 @@ -468,7 +472,7 @@ def test_evaluate_game_tier_advancement(client): """A game that pushes a card past a tier threshold advances the tier. What: Set the batter's career value just below T1 (37) by manually seeding - a prior PlayerSeasonStats row with pa=34. Then add a game that brings the + a prior BattingSeasonStats row with pa=34. Then add a game that brings the total past 37 and call evaluate-game. current_tier must advance to >= 1. Why: Tier advancement is the core deliverable of card evolution. If the @@ -484,7 +488,7 @@ def test_evaluate_game_tier_advancement(client): _make_state(batter, team_a, track, current_tier=0, current_value=34.0) # Seed prior stats: 34 PA (value = 34; T1 threshold = 37) - PlayerSeasonStats.create( + BattingSeasonStats.create( player=batter, team=team_a, season=10, # previous season @@ -565,7 +569,7 @@ def test_evaluate_game_tier_ups_in_response(client): _make_state(batter, team_a, track, current_tier=0) # Seed prior stats below threshold - PlayerSeasonStats.create(player=batter, team=team_a, season=10, pa=34) + BattingSeasonStats.create(player=batter, team=team_a, season=10, pa=34) # Game pushes past T1 for i in range(4): diff --git a/tests/test_season_stats_update.py b/tests/test_season_stats_update.py index 218e12f..faf9c44 100644 --- a/tests/test_season_stats_update.py +++ b/tests/test_season_stats_update.py @@ -1,10 +1,10 @@ """ Tests for app/services/season_stats.py — update_season_stats(). -What: Verify that the incremental stat accumulation function correctly -aggregates StratPlay and Decision rows into BattingSeasonStats and -PitchingSeasonStats, handles duplicate calls idempotently, and -accumulates stats across multiple games. +What: Verify that the full-recalculation stat engine correctly aggregates +StratPlay and Decision rows into BattingSeasonStats and PitchingSeasonStats, +handles duplicate calls idempotently, accumulates stats across multiple games, +and supports forced reprocessing for self-healing. Why: This is the core bookkeeping engine for card evolution scoring. A double-count bug, a missed Decision merge, or a team-isolation failure @@ -191,7 +191,7 @@ def game(team_a, team_b): # --------------------------------------------------------------------------- -# Tests +# Tests — Existing behavior (kept) # --------------------------------------------------------------------------- @@ -200,7 +200,7 @@ def test_single_game_batting_stats(team_a, team_b, player_batter, player_pitcher What: Create three plate appearances (2 hits, 1 strikeout, a walk, and a home run) for one batter. After update_season_stats(), the - PlayerSeasonStats row should reflect the exact sum of all play fields. + BattingSeasonStats row should reflect the exact sum of all play fields. Why: The core of the batting aggregation pipeline. If any field mapping is wrong (e.g. 'hit' mapped to 'doubles' instead of 'hits'), evolution @@ -287,11 +287,11 @@ def test_single_game_pitching_stats( What: The same plays that create batting stats for the batter are also the source for the pitcher's opposing stats. This test checks that - _build_pitching_groups() correctly inverts batter-perspective fields. + _recalc_pitching() correctly inverts batter-perspective fields. - Why: The batter's 'so' becomes the pitcher's 'k', the batter's 'hit' - becomes 'hits_allowed', etc. Any transposition in this mapping would - corrupt pitcher stats silently. + Why: The batter's 'so' becomes the pitcher's 'strikeouts', the batter's + 'hit' becomes 'hits_allowed', etc. Any transposition in this mapping + would corrupt pitcher stats silently. """ # Play 1: strikeout — batter so=1, outs=1 make_play( @@ -347,14 +347,14 @@ def test_single_game_pitching_stats( def test_decision_integration(team_a, team_b, player_batter, player_pitcher, game): - """Decision.win=1 for a pitcher results in wins=1 in PlayerSeasonStats. + """Decision.win=1 for a pitcher results in wins=1 in PitchingSeasonStats. - What: Add a single StratPlay to establish the pitcher in pitching_groups, + What: Add a single StratPlay to establish the pitcher in pitching pairs, then create a Decision row recording a win. Call update_season_stats() and verify the wins column is 1. Why: Decisions are stored in a separate table from StratPlay. If - _apply_decisions() fails to merge them (wrong FK lookup, key mismatch), + _recalc_decisions() fails to merge them (wrong FK lookup, key mismatch), pitchers would always show 0 wins/losses/saves regardless of actual game outcomes, breaking standings and evolution criteria. """ @@ -441,9 +441,9 @@ def test_two_games_accumulate(team_a, team_b, player_batter, player_pitcher): What: Process game 1 (pa=2) then game 2 (pa=3) for the same batter/team. After both updates the stats row should show pa=5. - Why: PlayerSeasonStats is a season-long accumulator, not a per-game - snapshot. If the upsert logic overwrites instead of increments, a player's - stats would always reflect only their most recent game. + Why: BattingSeasonStats is a season-long accumulator, not a per-game + snapshot. The full recalculation queries all StratPlay rows for the season, + so processing game 2 recomputes with all 5 PAs included. """ game1 = StratGame.create( season=11, game_type="ranked", away_team=team_a, home_team=team_b @@ -593,18 +593,15 @@ def test_two_team_game(team_a, team_b): def test_out_of_order_replay_prevented(team_a, team_b, player_batter, player_pitcher): - """Out-of-order re-delivery of game G (after G+1 was processed) must not double-count. + """Out-of-order processing and re-delivery produce correct stats. - What: Process game G+1 first (pa=2), then process game G (pa=3). Now - re-deliver game G. The third call must return 'skipped'=True and leave - the batter's pa unchanged at 5 (3 + 2), not 8 (3 + 2 + 3). + What: Process game G+1 first (pa=2), then game G (pa=3). The full + recalculation approach means both calls query all StratPlay rows for the + season, so the final stats are always correct regardless of processing + order. Re-delivering game G returns 'skipped'=True and leaves stats at 5. - Why: This is the failure mode that the old last_game FK guard could not - catch. After G+1 is processed, no BattingSeasonStats row carries - last_game=G anymore (it was overwritten to G+1). The old guard would - have returned already_processed=False and double-counted. The - ProcessedGame ledger fixes this by keying on game_id independently of - the stats rows. + Why: With full recalculation, out-of-order processing is inherently safe. + The ProcessedGame ledger still prevents redundant work on re-delivery. """ game_g = StratGame.create( season=11, game_type="ranked", away_team=team_a, home_team=team_b @@ -657,5 +654,200 @@ def test_out_of_order_replay_prevented(team_a, team_b, player_batter, player_pit assert replay_result.get("skipped") is True # Stats must remain at 5, not 8 - stats.refresh() + stats = BattingSeasonStats.get( + BattingSeasonStats.player == player_batter, + BattingSeasonStats.team == team_a, + BattingSeasonStats.season == 11, + ) assert stats.pa == 5 + + +# --------------------------------------------------------------------------- +# Tests — New (force recalc / idempotency / self-healing) +# --------------------------------------------------------------------------- + + +def test_force_recalc(team_a, team_b, player_batter, player_pitcher, game): + """Processing with force=True after initial processing does not double stats. + + What: Process a game normally (pa=3), then reprocess with force=True. + Because the recalculation reads all StratPlay rows and writes totals + (not deltas), the stats remain at pa=3 after the forced reprocess. + + Why: The force flag bypasses the ProcessedGame ledger skip, but since + the underlying data hasn't changed, the recalculated totals must be + identical. This proves the replacement upsert is safe. + """ + for i in range(3): + make_play( + game, + i + 1, + player_batter, + team_a, + player_pitcher, + team_b, + pa=1, + ab=1, + hit=1, + outs=0, + ) + + first_result = update_season_stats(game.id) + assert first_result["batters_updated"] >= 1 + assert "skipped" not in first_result + + # Force reprocess — should NOT double stats + force_result = update_season_stats(game.id, force=True) + assert "skipped" not in force_result + assert force_result["batters_updated"] >= 1 + + stats = BattingSeasonStats.get( + BattingSeasonStats.player == player_batter, + BattingSeasonStats.team == team_a, + BattingSeasonStats.season == 11, + ) + assert stats.pa == 3 + assert stats.hits == 3 + assert stats.games == 1 + + +def test_idempotent_reprocessing(team_a, team_b, player_batter, player_pitcher, game): + """Two consecutive force=True calls produce identical stats. + + What: Force-process the same game twice. Both calls recompute from + scratch, so the stats after the second call must be identical to the + stats after the first call. + + Why: Idempotency is a critical property of the recalculation engine. + External systems (admin scripts, retry loops) may call force=True + multiple times; the result must be stable. + """ + for i in range(4): + make_play( + game, + i + 1, + player_batter, + team_a, + player_pitcher, + team_b, + pa=1, + ab=1, + so=1 if i % 2 == 0 else 0, + hit=0 if i % 2 == 0 else 1, + outs=1 if i % 2 == 0 else 0, + ) + + update_season_stats(game.id, force=True) + stats_after_first = BattingSeasonStats.get( + BattingSeasonStats.player == player_batter, + BattingSeasonStats.team == team_a, + BattingSeasonStats.season == 11, + ) + pa_1, hits_1, so_1 = ( + stats_after_first.pa, + stats_after_first.hits, + stats_after_first.strikeouts, + ) + + update_season_stats(game.id, force=True) + stats_after_second = BattingSeasonStats.get( + BattingSeasonStats.player == player_batter, + BattingSeasonStats.team == team_a, + BattingSeasonStats.season == 11, + ) + + assert stats_after_second.pa == pa_1 + assert stats_after_second.hits == hits_1 + assert stats_after_second.strikeouts == so_1 + + +def test_partial_reprocessing_heals( + team_a, team_b, player_batter, player_pitcher, game +): + """Force reprocessing corrects manually corrupted stats. + + What: Process a game (pa=3, hits=2), then manually corrupt the stats + row (set pa=999). Force-reprocess the game. The stats should be healed + back to the correct totals (pa=3, hits=2). + + Why: This is the primary self-healing benefit of full recalculation. + Partial processing, bugs, or manual edits can corrupt season stats; + force=True recomputes from the source-of-truth StratPlay data and + writes the correct totals regardless of current row state. + """ + # PA 1: single + make_play( + game, + 1, + player_batter, + team_a, + player_pitcher, + team_b, + pa=1, + ab=1, + hit=1, + outs=0, + ) + # PA 2: double + make_play( + game, + 2, + player_batter, + team_a, + player_pitcher, + team_b, + pa=1, + ab=1, + hit=1, + double=1, + outs=0, + ) + # PA 3: strikeout + make_play( + game, + 3, + player_batter, + team_a, + player_pitcher, + team_b, + pa=1, + ab=1, + so=1, + outs=1, + ) + + update_season_stats(game.id) + + # Verify correct initial state + stats = BattingSeasonStats.get( + BattingSeasonStats.player == player_batter, + BattingSeasonStats.team == team_a, + BattingSeasonStats.season == 11, + ) + assert stats.pa == 3 + assert stats.hits == 2 + assert stats.doubles == 1 + + # Corrupt the stats manually + stats.pa = 999 + stats.hits = 0 + stats.doubles = 50 + stats.save() + + # Verify corruption took effect + stats = BattingSeasonStats.get_by_id(stats.id) + assert stats.pa == 999 + + # Force reprocess — should heal the corruption + update_season_stats(game.id, force=True) + + stats = BattingSeasonStats.get( + BattingSeasonStats.player == player_batter, + BattingSeasonStats.team == team_a, + BattingSeasonStats.season == 11, + ) + assert stats.pa == 3 + assert stats.hits == 2 + assert stats.doubles == 1 + assert stats.strikeouts == 1 + assert stats.games == 1