""" season_stats.py — Incremental BattingSeasonStats and PitchingSeasonStats update logic. Called once per completed StratGame to accumulate batting and pitching statistics into the batting_season_stats and pitching_season_stats tables respectively. Idempotency: re-delivery of a game (including out-of-order re-delivery) is detected via an atomic INSERT into the ProcessedGame ledger table keyed on game_id. The first call for a given game_id succeeds; all subsequent calls return early with "skipped": True without modifying any stats rows. Peewee upsert strategy: - SQLite: read-modify-write inside db.atomic() transaction - PostgreSQL: ON CONFLICT ... DO UPDATE with column-level EXCLUDED increments """ import logging import os from collections import defaultdict from datetime import datetime from peewee import EXCLUDED from app.db_engine import ( db, BattingSeasonStats, Decision, PitchingSeasonStats, ProcessedGame, StratGame, StratPlay, ) logger = logging.getLogger(__name__) DATABASE_TYPE = os.environ.get("DATABASE_TYPE", "sqlite").lower() def _build_batting_groups(plays): """ Aggregate per-play batting stats by (batter_id, batter_team_id). Only plays where pa > 0 are counted toward games, but all play-level stat fields are accumulated regardless of pa value so that rare edge cases (e.g. sac bunt without official PA) are correctly included in the totals. Returns a dict keyed by (batter_id, batter_team_id) with stat dicts matching BattingSeasonStats column names. """ groups = defaultdict( lambda: { "games": 0, "pa": 0, "ab": 0, "hits": 0, "doubles": 0, "triples": 0, "hr": 0, "rbi": 0, "runs": 0, "bb": 0, "strikeouts": 0, "hbp": 0, "sac": 0, "ibb": 0, "gidp": 0, "sb": 0, "cs": 0, "appeared": False, # tracks whether batter appeared at all in this game } ) for play in plays: batter_id = play.batter_id batter_team_id = play.batter_team_id if batter_id is None: continue key = (batter_id, batter_team_id) g = groups[key] g["pa"] += play.pa g["ab"] += play.ab g["hits"] += play.hit g["doubles"] += play.double g["triples"] += play.triple g["hr"] += play.homerun g["rbi"] += play.rbi g["runs"] += play.run g["bb"] += play.bb g["strikeouts"] += play.so g["hbp"] += play.hbp g["sac"] += play.sac g["ibb"] += play.ibb g["gidp"] += play.gidp g["sb"] += play.sb g["cs"] += play.cs if play.pa > 0 and not g["appeared"]: g["games"] = 1 g["appeared"] = True # Clean up the helper flag before returning for key in groups: del groups[key]["appeared"] return groups def _build_pitching_groups(plays): """ Aggregate per-play pitching stats by (pitcher_id, pitcher_team_id). Stats on StratPlay are recorded from the batter's perspective, so when accumulating pitcher stats we collect: - outs → pitcher outs recorded (directly on play) - so → strikeouts (batter's so = pitcher's strikeouts) - hit → hits allowed - bb → walks allowed (batter bb, separate from hbp) - hbp → hit batters - homerun → home runs allowed games counts unique pitchers who appeared (at least one play as pitcher), capped at 1 per game since this function processes a single game. games_started is populated later via _apply_decisions(). Fields not available from StratPlay (runs_allowed, earned_runs, wild_pitches, balks) default to 0 and are not incremented. Returns a dict keyed by (pitcher_id, pitcher_team_id) with stat dicts matching PitchingSeasonStats column names. """ groups = defaultdict( lambda: { "games": 1, # pitcher appeared in this game by definition "games_started": 0, # populated later via _apply_decisions "outs": 0, "strikeouts": 0, "bb": 0, "hits_allowed": 0, "runs_allowed": 0, # not available from StratPlay "earned_runs": 0, # not available from StratPlay "hr_allowed": 0, "hbp": 0, "wild_pitches": 0, # not available from StratPlay "balks": 0, # not available from StratPlay "wins": 0, "losses": 0, "holds": 0, "saves": 0, "blown_saves": 0, } ) for play in plays: pitcher_id = play.pitcher_id pitcher_team_id = play.pitcher_team_id key = (pitcher_id, pitcher_team_id) g = groups[key] g["outs"] += play.outs g["strikeouts"] += play.so g["hits_allowed"] += play.hit g["bb"] += play.bb g["hbp"] += play.hbp g["hr_allowed"] += play.homerun return groups def _apply_decisions(pitching_groups, decisions): """ Merge Decision rows into the pitching stat groups. Each Decision belongs to exactly one pitcher in the game, containing win/loss/save/hold/blown-save flags and the is_start indicator. """ for decision in decisions: pitcher_id = decision.pitcher_id pitcher_team_id = decision.pitcher_team_id key = (pitcher_id, pitcher_team_id) # Pitcher may have a Decision without plays (rare edge case for # games where the Decision was recorded without StratPlay rows). # Initialise a zeroed entry if not already present. if key not in pitching_groups: pitching_groups[key] = { "games": 1, "games_started": 0, "outs": 0, "strikeouts": 0, "bb": 0, "hits_allowed": 0, "runs_allowed": 0, "earned_runs": 0, "hr_allowed": 0, "hbp": 0, "wild_pitches": 0, "balks": 0, "wins": 0, "losses": 0, "holds": 0, "saves": 0, "blown_saves": 0, } g = pitching_groups[key] g["wins"] += decision.win g["losses"] += decision.loss g["saves"] += decision.is_save g["holds"] += decision.hold g["blown_saves"] += decision.b_save g["games_started"] += 1 if decision.is_start else 0 def _upsert_batting_postgres(player_id, team_id, season, game_id, batting): """ PostgreSQL upsert for BattingSeasonStats using ON CONFLICT ... DO UPDATE. Each stat column is incremented by the EXCLUDED (incoming) value, ensuring concurrent games don't overwrite each other. """ now = datetime.now() increment_cols = [ "games", "pa", "ab", "hits", "doubles", "triples", "hr", "rbi", "runs", "bb", "strikeouts", "hbp", "sac", "ibb", "gidp", "sb", "cs", ] conflict_target = [ BattingSeasonStats.player, BattingSeasonStats.team, BattingSeasonStats.season, ] update_dict = {} for col in increment_cols: field_obj = getattr(BattingSeasonStats, col) update_dict[field_obj] = field_obj + EXCLUDED[col] update_dict[BattingSeasonStats.last_game] = EXCLUDED["last_game_id"] update_dict[BattingSeasonStats.last_updated_at] = EXCLUDED["last_updated_at"] BattingSeasonStats.insert( player=player_id, team=team_id, season=season, games=batting.get("games", 0), pa=batting.get("pa", 0), ab=batting.get("ab", 0), hits=batting.get("hits", 0), doubles=batting.get("doubles", 0), triples=batting.get("triples", 0), hr=batting.get("hr", 0), rbi=batting.get("rbi", 0), runs=batting.get("runs", 0), bb=batting.get("bb", 0), strikeouts=batting.get("strikeouts", 0), hbp=batting.get("hbp", 0), sac=batting.get("sac", 0), ibb=batting.get("ibb", 0), gidp=batting.get("gidp", 0), sb=batting.get("sb", 0), cs=batting.get("cs", 0), last_game=game_id, last_updated_at=now, ).on_conflict( conflict_target=conflict_target, action="update", update=update_dict, ).execute() def _upsert_pitching_postgres(player_id, team_id, season, game_id, pitching): """ PostgreSQL upsert for PitchingSeasonStats using ON CONFLICT ... DO UPDATE. Each stat column is incremented by the EXCLUDED (incoming) value, ensuring concurrent games don't overwrite each other. """ now = datetime.now() increment_cols = [ "games", "games_started", "outs", "strikeouts", "bb", "hits_allowed", "runs_allowed", "earned_runs", "hr_allowed", "hbp", "wild_pitches", "balks", "wins", "losses", "holds", "saves", "blown_saves", ] conflict_target = [ PitchingSeasonStats.player, PitchingSeasonStats.team, PitchingSeasonStats.season, ] update_dict = {} for col in increment_cols: field_obj = getattr(PitchingSeasonStats, col) update_dict[field_obj] = field_obj + EXCLUDED[col] update_dict[PitchingSeasonStats.last_game] = EXCLUDED["last_game_id"] update_dict[PitchingSeasonStats.last_updated_at] = EXCLUDED["last_updated_at"] PitchingSeasonStats.insert( player=player_id, team=team_id, season=season, games=pitching.get("games", 0), games_started=pitching.get("games_started", 0), outs=pitching.get("outs", 0), strikeouts=pitching.get("strikeouts", 0), bb=pitching.get("bb", 0), hits_allowed=pitching.get("hits_allowed", 0), runs_allowed=pitching.get("runs_allowed", 0), earned_runs=pitching.get("earned_runs", 0), hr_allowed=pitching.get("hr_allowed", 0), hbp=pitching.get("hbp", 0), wild_pitches=pitching.get("wild_pitches", 0), balks=pitching.get("balks", 0), wins=pitching.get("wins", 0), losses=pitching.get("losses", 0), holds=pitching.get("holds", 0), saves=pitching.get("saves", 0), blown_saves=pitching.get("blown_saves", 0), last_game=game_id, last_updated_at=now, ).on_conflict( conflict_target=conflict_target, action="update", update=update_dict, ).execute() def _upsert_batting_sqlite(player_id, team_id, season, game_id, batting): """ SQLite upsert for BattingSeasonStats: read-modify-write inside the outer atomic() block. SQLite doesn't support EXCLUDED-based increments via Peewee's on_conflict(), so we use get_or_create + field-level addition. This is safe because the entire update_season_stats() call is wrapped in db.atomic(). """ now = datetime.now() obj, _ = BattingSeasonStats.get_or_create( player_id=player_id, team_id=team_id, season=season, ) obj.games += batting.get("games", 0) obj.pa += batting.get("pa", 0) obj.ab += batting.get("ab", 0) obj.hits += batting.get("hits", 0) obj.doubles += batting.get("doubles", 0) obj.triples += batting.get("triples", 0) obj.hr += batting.get("hr", 0) obj.rbi += batting.get("rbi", 0) obj.runs += batting.get("runs", 0) obj.bb += batting.get("bb", 0) obj.strikeouts += batting.get("strikeouts", 0) obj.hbp += batting.get("hbp", 0) obj.sac += batting.get("sac", 0) obj.ibb += batting.get("ibb", 0) obj.gidp += batting.get("gidp", 0) obj.sb += batting.get("sb", 0) obj.cs += batting.get("cs", 0) obj.last_game_id = game_id obj.last_updated_at = now obj.save() def _upsert_pitching_sqlite(player_id, team_id, season, game_id, pitching): """ SQLite upsert for PitchingSeasonStats: read-modify-write inside the outer atomic() block. SQLite doesn't support EXCLUDED-based increments via Peewee's on_conflict(), so we use get_or_create + field-level addition. This is safe because the entire update_season_stats() call is wrapped in db.atomic(). """ now = datetime.now() obj, _ = PitchingSeasonStats.get_or_create( player_id=player_id, team_id=team_id, season=season, ) obj.games += pitching.get("games", 0) obj.games_started += pitching.get("games_started", 0) obj.outs += pitching.get("outs", 0) obj.strikeouts += pitching.get("strikeouts", 0) obj.bb += pitching.get("bb", 0) obj.hits_allowed += pitching.get("hits_allowed", 0) obj.runs_allowed += pitching.get("runs_allowed", 0) obj.earned_runs += pitching.get("earned_runs", 0) obj.hr_allowed += pitching.get("hr_allowed", 0) obj.hbp += pitching.get("hbp", 0) obj.wild_pitches += pitching.get("wild_pitches", 0) obj.balks += pitching.get("balks", 0) obj.wins += pitching.get("wins", 0) obj.losses += pitching.get("losses", 0) obj.holds += pitching.get("holds", 0) obj.saves += pitching.get("saves", 0) obj.blown_saves += pitching.get("blown_saves", 0) obj.last_game_id = game_id obj.last_updated_at = now obj.save() def update_season_stats(game_id: int) -> dict: """ Accumulate per-game batting and pitching stats into BattingSeasonStats and PitchingSeasonStats respectively. This function is safe to call exactly once per game. Idempotency is enforced via an atomic INSERT into the ProcessedGame ledger table. The first call for a given game_id succeeds and returns full results; any subsequent call (including out-of-order re-delivery after a later game has been processed) finds the existing row and returns early with "skipped": True without touching any stats rows. Algorithm: 1. Fetch StratGame to get the season. 2. Atomic INSERT into ProcessedGame — if the row already exists, return early (skipped). 3. Collect all StratPlay rows for the game. 4. Group batting stats by (batter_id, batter_team_id). 5. Group pitching stats by (pitcher_id, pitcher_team_id). 6. Merge Decision rows into pitching groups. 7. Upsert each batter into BattingSeasonStats using either: - PostgreSQL: atomic SQL increment via ON CONFLICT DO UPDATE - SQLite: read-modify-write inside a transaction 8. Upsert each pitcher into PitchingSeasonStats using the same strategy. Args: game_id: Primary key of the StratGame to process. Returns: Summary dict with keys: game_id, season, batters_updated, pitchers_updated. If the game was already processed, also includes "skipped": True. Raises: StratGame.DoesNotExist: If no StratGame row matches game_id. """ logger.info("update_season_stats: starting for game_id=%d", game_id) # Step 1 — Fetch the game to get season game = StratGame.get_by_id(game_id) season = game.season with db.atomic(): # Step 2 — Full idempotency via ProcessedGame ledger. # Atomic INSERT: if the row already exists (same game_id), get_or_create # returns created=False and we skip. This handles same-game immediate # replay AND out-of-order re-delivery (game G re-delivered after G+1 # was already processed). _, created = ProcessedGame.get_or_create(game_id=game_id) if not created: logger.info( "update_season_stats: game_id=%d already processed, skipping", game_id, ) return { "game_id": game_id, "season": season, "batters_updated": 0, "pitchers_updated": 0, "skipped": True, } # Step 3 — Load plays plays = list(StratPlay.select().where(StratPlay.game == game_id)) logger.debug( "update_season_stats: game_id=%d loaded %d plays", game_id, len(plays) ) # Steps 4 & 5 — Aggregate batting and pitching groups batting_groups = _build_batting_groups(plays) pitching_groups = _build_pitching_groups(plays) # Step 6 — Merge Decision rows into pitching groups decisions = list(Decision.select().where(Decision.game == game_id)) _apply_decisions(pitching_groups, decisions) upsert_batting = ( _upsert_batting_postgres if DATABASE_TYPE == "postgresql" else _upsert_batting_sqlite ) upsert_pitching = ( _upsert_pitching_postgres if DATABASE_TYPE == "postgresql" else _upsert_pitching_sqlite ) # Step 7 — Upsert batting rows into BattingSeasonStats batters_updated = 0 for (player_id, team_id), batting in batting_groups.items(): upsert_batting(player_id, team_id, season, game_id, batting) batters_updated += 1 # Step 8 — Upsert pitching rows into PitchingSeasonStats pitchers_updated = 0 for (player_id, team_id), pitching in pitching_groups.items(): upsert_pitching(player_id, team_id, season, game_id, pitching) pitchers_updated += 1 logger.info( "update_season_stats: game_id=%d complete — " "batters_updated=%d pitchers_updated=%d", game_id, batters_updated, pitchers_updated, ) return { "game_id": game_id, "season": season, "batters_updated": batters_updated, "pitchers_updated": pitchers_updated, }