paper-dynasty-database/app/services/season_stats.py
Cal Corum d15fc97afb
All checks were successful
Build Docker Image / build (pull_request) Successful in 8m53s
fix: add pitcher_id null guard in _get_player_pairs
Prevents (None, team_id) tuples from being added to pitching_pairs
when a StratPlay row has no pitcher (edge case matching the existing
batter_id guard).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-23 12:50:16 -05:00

454 lines
16 KiB
Python

"""
season_stats.py — Full-recalculation BattingSeasonStats and PitchingSeasonStats update logic.
Called once per completed StratGame to recompute the full season batting and
pitching statistics for every player who appeared in that game, then write
those totals to the batting_season_stats and pitching_season_stats tables.
Unlike the previous incremental (delta) approach, each call recomputes totals
from scratch by aggregating all StratPlay rows for the player+team+season
triple. This eliminates double-counting on re-delivery and makes every row a
faithful snapshot of the full season to date.
Idempotency: re-delivery of a game is detected via the ProcessedGame ledger
table, keyed on game_id.
- First call: records the ledger entry and proceeds with recalculation.
- Subsequent calls without force=True: return early with "skipped": True.
- force=True: skips the early-return check and recalculates anyway (useful
for correcting data after retroactive stat adjustments).
Upsert strategy: get_or_create + field assignment + save(). Because we are
writing the full recomputed total rather than adding a delta, there is no
risk of concurrent-write skew between games. A single unified path works for
both SQLite and PostgreSQL.
"""
import logging
from datetime import datetime
from peewee import Case, fn
from app.db_engine import (
db,
BattingSeasonStats,
Decision,
PitchingSeasonStats,
ProcessedGame,
StratGame,
StratPlay,
)
logger = logging.getLogger(__name__)
def _get_player_pairs(game_id: int) -> tuple[set, set]:
"""
Return the sets of (player_id, team_id) pairs that appeared in the game.
Queries StratPlay for all rows belonging to game_id and extracts:
- batting_pairs: set of (batter_id, batter_team_id), excluding rows where
batter_id is None (e.g. automatic outs, walk-off plays without a PA).
- pitching_pairs: set of (pitcher_id, pitcher_team_id) from all plays
(pitcher is always present), plus any pitchers from the Decision table
who may not have StratPlay rows (rare edge case).
Args:
game_id: Primary key of the StratGame to query.
Returns:
Tuple of (batting_pairs, pitching_pairs) where each element is a set
of (int, int) tuples.
"""
plays = (
StratPlay.select(
StratPlay.batter,
StratPlay.batter_team,
StratPlay.pitcher,
StratPlay.pitcher_team,
)
.where(StratPlay.game == game_id)
.tuples()
)
batting_pairs: set[tuple[int, int]] = set()
pitching_pairs: set[tuple[int, int]] = set()
for batter_id, batter_team_id, pitcher_id, pitcher_team_id in plays:
if batter_id is not None:
batting_pairs.add((batter_id, batter_team_id))
if pitcher_id is not None:
pitching_pairs.add((pitcher_id, pitcher_team_id))
# Include pitchers who have a Decision but no StratPlay rows for this game
# (rare edge case, e.g. a pitcher credited with a decision without recording
# any plays — the old code handled this explicitly in _apply_decisions).
decision_pitchers = (
Decision.select(Decision.pitcher, Decision.pitcher_team)
.where(Decision.game == game_id)
.tuples()
)
for pitcher_id, pitcher_team_id in decision_pitchers:
pitching_pairs.add((pitcher_id, pitcher_team_id))
return batting_pairs, pitching_pairs
def _recalc_batting(player_id: int, team_id: int, season: int) -> dict:
"""
Recompute full-season batting totals for a player+team+season triple.
Aggregates every StratPlay row where batter == player_id and
batter_team == team_id across all games in the given season.
games counts only games where the player had at least one official PA
(pa > 0). The COUNT(DISTINCT ...) with a CASE expression achieves this:
NULL values from the CASE are ignored by COUNT, so only game IDs where
pa > 0 contribute.
Args:
player_id: FK to the player record.
team_id: FK to the team record.
season: Integer season year.
Returns:
Dict with keys matching BattingSeasonStats columns; all values are
native Python ints (defaulting to 0 if no rows matched).
"""
row = (
StratPlay.select(
fn.COUNT(
Case(None, [(StratPlay.pa > 0, StratPlay.game)], None).distinct()
).alias("games"),
fn.SUM(StratPlay.pa).alias("pa"),
fn.SUM(StratPlay.ab).alias("ab"),
fn.SUM(StratPlay.hit).alias("hits"),
fn.SUM(StratPlay.double).alias("doubles"),
fn.SUM(StratPlay.triple).alias("triples"),
fn.SUM(StratPlay.homerun).alias("hr"),
fn.SUM(StratPlay.rbi).alias("rbi"),
fn.SUM(StratPlay.run).alias("runs"),
fn.SUM(StratPlay.bb).alias("bb"),
fn.SUM(StratPlay.so).alias("strikeouts"),
fn.SUM(StratPlay.hbp).alias("hbp"),
fn.SUM(StratPlay.sac).alias("sac"),
fn.SUM(StratPlay.ibb).alias("ibb"),
fn.SUM(StratPlay.gidp).alias("gidp"),
fn.SUM(StratPlay.sb).alias("sb"),
fn.SUM(StratPlay.cs).alias("cs"),
)
.join(StratGame, on=(StratPlay.game == StratGame.id))
.where(
StratPlay.batter == player_id,
StratPlay.batter_team == team_id,
StratGame.season == season,
)
.dicts()
.first()
)
if row is None:
row = {}
return {
"games": row.get("games") or 0,
"pa": row.get("pa") or 0,
"ab": row.get("ab") or 0,
"hits": row.get("hits") or 0,
"doubles": row.get("doubles") or 0,
"triples": row.get("triples") or 0,
"hr": row.get("hr") or 0,
"rbi": row.get("rbi") or 0,
"runs": row.get("runs") or 0,
"bb": row.get("bb") or 0,
"strikeouts": row.get("strikeouts") or 0,
"hbp": row.get("hbp") or 0,
"sac": row.get("sac") or 0,
"ibb": row.get("ibb") or 0,
"gidp": row.get("gidp") or 0,
"sb": row.get("sb") or 0,
"cs": row.get("cs") or 0,
}
def _recalc_pitching(player_id: int, team_id: int, season: int) -> dict:
"""
Recompute full-season pitching totals for a player+team+season triple.
Aggregates every StratPlay row where pitcher == player_id and
pitcher_team == team_id across all games in the given season. games counts
all distinct games in which the pitcher appeared (any play qualifies).
Stats derived from StratPlay (from the batter-perspective columns):
- outs = SUM(outs)
- strikeouts = SUM(so) — batter SO = pitcher K
- hits_allowed = SUM(hit)
- bb = SUM(bb) — walks allowed
- hbp = SUM(hbp)
- hr_allowed = SUM(homerun)
- wild_pitches = SUM(wild_pitch)
- balks = SUM(balk)
Fields not available from StratPlay (runs_allowed, earned_runs) default
to 0. Decision-level fields (wins, losses, etc.) are populated separately
by _recalc_decisions() and merged in the caller.
Args:
player_id: FK to the player record.
team_id: FK to the team record.
season: Integer season year.
Returns:
Dict with keys matching PitchingSeasonStats columns (excluding
decision fields, which are filled by _recalc_decisions).
"""
row = (
StratPlay.select(
fn.COUNT(StratPlay.game.distinct()).alias("games"),
fn.SUM(StratPlay.outs).alias("outs"),
fn.SUM(StratPlay.so).alias("strikeouts"),
fn.SUM(StratPlay.hit).alias("hits_allowed"),
fn.SUM(StratPlay.bb).alias("bb"),
fn.SUM(StratPlay.hbp).alias("hbp"),
fn.SUM(StratPlay.homerun).alias("hr_allowed"),
fn.SUM(StratPlay.wild_pitch).alias("wild_pitches"),
fn.SUM(StratPlay.balk).alias("balks"),
)
.join(StratGame, on=(StratPlay.game == StratGame.id))
.where(
StratPlay.pitcher == player_id,
StratPlay.pitcher_team == team_id,
StratGame.season == season,
)
.dicts()
.first()
)
if row is None:
row = {}
return {
"games": row.get("games") or 0,
"outs": row.get("outs") or 0,
"strikeouts": row.get("strikeouts") or 0,
"hits_allowed": row.get("hits_allowed") or 0,
"bb": row.get("bb") or 0,
"hbp": row.get("hbp") or 0,
"hr_allowed": row.get("hr_allowed") or 0,
"wild_pitches": row.get("wild_pitches") or 0,
"balks": row.get("balks") or 0,
# Not available from play-by-play data
"runs_allowed": 0,
"earned_runs": 0,
}
def _recalc_decisions(player_id: int, team_id: int, season: int) -> dict:
"""
Recompute full-season decision totals for a pitcher+team+season triple.
Aggregates all Decision rows for the pitcher across the season. Decision
rows are keyed by (pitcher, pitcher_team, season) independently of the
StratPlay table, so this query is separate from _recalc_pitching().
Decision.is_start is a BooleanField; CAST to INTEGER before summing to
ensure correct arithmetic across SQLite (True/False) and PostgreSQL
(boolean).
Args:
player_id: FK to the player record (pitcher).
team_id: FK to the team record.
season: Integer season year.
Returns:
Dict with keys: wins, losses, holds, saves, blown_saves,
games_started. All values are native Python ints.
"""
row = (
Decision.select(
fn.SUM(Decision.win).alias("wins"),
fn.SUM(Decision.loss).alias("losses"),
fn.SUM(Decision.hold).alias("holds"),
fn.SUM(Decision.is_save).alias("saves"),
fn.SUM(Decision.b_save).alias("blown_saves"),
fn.SUM(Decision.is_start.cast("INTEGER")).alias("games_started"),
)
.where(
Decision.pitcher == player_id,
Decision.pitcher_team == team_id,
Decision.season == season,
)
.dicts()
.first()
)
if row is None:
row = {}
return {
"wins": row.get("wins") or 0,
"losses": row.get("losses") or 0,
"holds": row.get("holds") or 0,
"saves": row.get("saves") or 0,
"blown_saves": row.get("blown_saves") or 0,
"games_started": row.get("games_started") or 0,
}
def update_season_stats(game_id: int, force: bool = False) -> dict:
"""
Recompute full-season batting and pitching stats for every player in the game.
Unlike the previous incremental approach, this function recalculates each
player's season totals from scratch by querying all StratPlay rows for
the player+team+season triple. The resulting totals replace whatever was
previously stored — no additive delta is applied.
Algorithm:
1. Fetch StratGame to get the season.
2. Check the ProcessedGame ledger:
- If already processed and force=False, return early (skipped=True).
- If already processed and force=True, continue (overwrite allowed).
- If not yet processed, create the ledger entry.
3. Determine (player_id, team_id) pairs via _get_player_pairs().
4. For each batting pair: recompute season totals, then get_or_create
BattingSeasonStats and overwrite all fields.
5. For each pitching pair: recompute season play totals and decision
totals, merge, then get_or_create PitchingSeasonStats and overwrite
all fields.
Args:
game_id: Primary key of the StratGame to process.
force: If True, re-process even if the game was previously recorded
in the ProcessedGame ledger. Useful for correcting stats after
retroactive data adjustments.
Returns:
Dict with keys:
game_id — echoed back
season — season integer from StratGame
batters_updated — number of BattingSeasonStats rows written
pitchers_updated — number of PitchingSeasonStats rows written
skipped — True only when the game was already processed
and force=False; absent otherwise.
Raises:
StratGame.DoesNotExist: If no StratGame row matches game_id.
"""
logger.info("update_season_stats: starting for game_id=%d force=%s", game_id, force)
game = StratGame.get_by_id(game_id)
season = game.season
with db.atomic():
# Idempotency check via ProcessedGame ledger.
_, created = ProcessedGame.get_or_create(game_id=game_id)
if not created and not force:
logger.info(
"update_season_stats: game_id=%d already processed, skipping",
game_id,
)
return {
"game_id": game_id,
"season": season,
"batters_updated": 0,
"pitchers_updated": 0,
"skipped": True,
}
if not created and force:
logger.info(
"update_season_stats: game_id=%d already processed, force=True — recalculating",
game_id,
)
batting_pairs, pitching_pairs = _get_player_pairs(game_id)
logger.debug(
"update_season_stats: game_id=%d found %d batting pairs, %d pitching pairs",
game_id,
len(batting_pairs),
len(pitching_pairs),
)
now = datetime.now()
# Recompute and overwrite batting season stats for each batter.
batters_updated = 0
for player_id, team_id in batting_pairs:
stats = _recalc_batting(player_id, team_id, season)
obj, _ = BattingSeasonStats.get_or_create(
player_id=player_id,
team_id=team_id,
season=season,
)
obj.games = stats["games"]
obj.pa = stats["pa"]
obj.ab = stats["ab"]
obj.hits = stats["hits"]
obj.doubles = stats["doubles"]
obj.triples = stats["triples"]
obj.hr = stats["hr"]
obj.rbi = stats["rbi"]
obj.runs = stats["runs"]
obj.bb = stats["bb"]
obj.strikeouts = stats["strikeouts"]
obj.hbp = stats["hbp"]
obj.sac = stats["sac"]
obj.ibb = stats["ibb"]
obj.gidp = stats["gidp"]
obj.sb = stats["sb"]
obj.cs = stats["cs"]
obj.last_game_id = game_id
obj.last_updated_at = now
obj.save()
batters_updated += 1
# Recompute and overwrite pitching season stats for each pitcher.
pitchers_updated = 0
for player_id, team_id in pitching_pairs:
play_stats = _recalc_pitching(player_id, team_id, season)
decision_stats = _recalc_decisions(player_id, team_id, season)
obj, _ = PitchingSeasonStats.get_or_create(
player_id=player_id,
team_id=team_id,
season=season,
)
obj.games = play_stats["games"]
obj.games_started = decision_stats["games_started"]
obj.outs = play_stats["outs"]
obj.strikeouts = play_stats["strikeouts"]
obj.bb = play_stats["bb"]
obj.hits_allowed = play_stats["hits_allowed"]
obj.runs_allowed = play_stats["runs_allowed"]
obj.earned_runs = play_stats["earned_runs"]
obj.hr_allowed = play_stats["hr_allowed"]
obj.hbp = play_stats["hbp"]
obj.wild_pitches = play_stats["wild_pitches"]
obj.balks = play_stats["balks"]
obj.wins = decision_stats["wins"]
obj.losses = decision_stats["losses"]
obj.holds = decision_stats["holds"]
obj.saves = decision_stats["saves"]
obj.blown_saves = decision_stats["blown_saves"]
obj.last_game_id = game_id
obj.last_updated_at = now
obj.save()
pitchers_updated += 1
logger.info(
"update_season_stats: game_id=%d complete — "
"batters_updated=%d pitchers_updated=%d",
game_id,
batters_updated,
pitchers_updated,
)
return {
"game_id": game_id,
"season": season,
"batters_updated": batters_updated,
"pitchers_updated": pitchers_updated,
}