All checks were successful
Build Docker Image / build (pull_request) Successful in 8m53s
Prevents (None, team_id) tuples from being added to pitching_pairs when a StratPlay row has no pitcher (edge case matching the existing batter_id guard). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
454 lines
16 KiB
Python
454 lines
16 KiB
Python
"""
|
|
season_stats.py — Full-recalculation BattingSeasonStats and PitchingSeasonStats update logic.
|
|
|
|
Called once per completed StratGame to recompute the full season batting and
|
|
pitching statistics for every player who appeared in that game, then write
|
|
those totals to the batting_season_stats and pitching_season_stats tables.
|
|
|
|
Unlike the previous incremental (delta) approach, each call recomputes totals
|
|
from scratch by aggregating all StratPlay rows for the player+team+season
|
|
triple. This eliminates double-counting on re-delivery and makes every row a
|
|
faithful snapshot of the full season to date.
|
|
|
|
Idempotency: re-delivery of a game is detected via the ProcessedGame ledger
|
|
table, keyed on game_id.
|
|
- First call: records the ledger entry and proceeds with recalculation.
|
|
- Subsequent calls without force=True: return early with "skipped": True.
|
|
- force=True: skips the early-return check and recalculates anyway (useful
|
|
for correcting data after retroactive stat adjustments).
|
|
|
|
Upsert strategy: get_or_create + field assignment + save(). Because we are
|
|
writing the full recomputed total rather than adding a delta, there is no
|
|
risk of concurrent-write skew between games. A single unified path works for
|
|
both SQLite and PostgreSQL.
|
|
"""
|
|
|
|
import logging
|
|
from datetime import datetime
|
|
|
|
from peewee import Case, fn
|
|
|
|
from app.db_engine import (
|
|
db,
|
|
BattingSeasonStats,
|
|
Decision,
|
|
PitchingSeasonStats,
|
|
ProcessedGame,
|
|
StratGame,
|
|
StratPlay,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _get_player_pairs(game_id: int) -> tuple[set, set]:
|
|
"""
|
|
Return the sets of (player_id, team_id) pairs that appeared in the game.
|
|
|
|
Queries StratPlay for all rows belonging to game_id and extracts:
|
|
- batting_pairs: set of (batter_id, batter_team_id), excluding rows where
|
|
batter_id is None (e.g. automatic outs, walk-off plays without a PA).
|
|
- pitching_pairs: set of (pitcher_id, pitcher_team_id) from all plays
|
|
(pitcher is always present), plus any pitchers from the Decision table
|
|
who may not have StratPlay rows (rare edge case).
|
|
|
|
Args:
|
|
game_id: Primary key of the StratGame to query.
|
|
|
|
Returns:
|
|
Tuple of (batting_pairs, pitching_pairs) where each element is a set
|
|
of (int, int) tuples.
|
|
"""
|
|
plays = (
|
|
StratPlay.select(
|
|
StratPlay.batter,
|
|
StratPlay.batter_team,
|
|
StratPlay.pitcher,
|
|
StratPlay.pitcher_team,
|
|
)
|
|
.where(StratPlay.game == game_id)
|
|
.tuples()
|
|
)
|
|
|
|
batting_pairs: set[tuple[int, int]] = set()
|
|
pitching_pairs: set[tuple[int, int]] = set()
|
|
|
|
for batter_id, batter_team_id, pitcher_id, pitcher_team_id in plays:
|
|
if batter_id is not None:
|
|
batting_pairs.add((batter_id, batter_team_id))
|
|
if pitcher_id is not None:
|
|
pitching_pairs.add((pitcher_id, pitcher_team_id))
|
|
|
|
# Include pitchers who have a Decision but no StratPlay rows for this game
|
|
# (rare edge case, e.g. a pitcher credited with a decision without recording
|
|
# any plays — the old code handled this explicitly in _apply_decisions).
|
|
decision_pitchers = (
|
|
Decision.select(Decision.pitcher, Decision.pitcher_team)
|
|
.where(Decision.game == game_id)
|
|
.tuples()
|
|
)
|
|
for pitcher_id, pitcher_team_id in decision_pitchers:
|
|
pitching_pairs.add((pitcher_id, pitcher_team_id))
|
|
|
|
return batting_pairs, pitching_pairs
|
|
|
|
|
|
def _recalc_batting(player_id: int, team_id: int, season: int) -> dict:
|
|
"""
|
|
Recompute full-season batting totals for a player+team+season triple.
|
|
|
|
Aggregates every StratPlay row where batter == player_id and
|
|
batter_team == team_id across all games in the given season.
|
|
|
|
games counts only games where the player had at least one official PA
|
|
(pa > 0). The COUNT(DISTINCT ...) with a CASE expression achieves this:
|
|
NULL values from the CASE are ignored by COUNT, so only game IDs where
|
|
pa > 0 contribute.
|
|
|
|
Args:
|
|
player_id: FK to the player record.
|
|
team_id: FK to the team record.
|
|
season: Integer season year.
|
|
|
|
Returns:
|
|
Dict with keys matching BattingSeasonStats columns; all values are
|
|
native Python ints (defaulting to 0 if no rows matched).
|
|
"""
|
|
row = (
|
|
StratPlay.select(
|
|
fn.COUNT(
|
|
Case(None, [(StratPlay.pa > 0, StratPlay.game)], None).distinct()
|
|
).alias("games"),
|
|
fn.SUM(StratPlay.pa).alias("pa"),
|
|
fn.SUM(StratPlay.ab).alias("ab"),
|
|
fn.SUM(StratPlay.hit).alias("hits"),
|
|
fn.SUM(StratPlay.double).alias("doubles"),
|
|
fn.SUM(StratPlay.triple).alias("triples"),
|
|
fn.SUM(StratPlay.homerun).alias("hr"),
|
|
fn.SUM(StratPlay.rbi).alias("rbi"),
|
|
fn.SUM(StratPlay.run).alias("runs"),
|
|
fn.SUM(StratPlay.bb).alias("bb"),
|
|
fn.SUM(StratPlay.so).alias("strikeouts"),
|
|
fn.SUM(StratPlay.hbp).alias("hbp"),
|
|
fn.SUM(StratPlay.sac).alias("sac"),
|
|
fn.SUM(StratPlay.ibb).alias("ibb"),
|
|
fn.SUM(StratPlay.gidp).alias("gidp"),
|
|
fn.SUM(StratPlay.sb).alias("sb"),
|
|
fn.SUM(StratPlay.cs).alias("cs"),
|
|
)
|
|
.join(StratGame, on=(StratPlay.game == StratGame.id))
|
|
.where(
|
|
StratPlay.batter == player_id,
|
|
StratPlay.batter_team == team_id,
|
|
StratGame.season == season,
|
|
)
|
|
.dicts()
|
|
.first()
|
|
)
|
|
|
|
if row is None:
|
|
row = {}
|
|
|
|
return {
|
|
"games": row.get("games") or 0,
|
|
"pa": row.get("pa") or 0,
|
|
"ab": row.get("ab") or 0,
|
|
"hits": row.get("hits") or 0,
|
|
"doubles": row.get("doubles") or 0,
|
|
"triples": row.get("triples") or 0,
|
|
"hr": row.get("hr") or 0,
|
|
"rbi": row.get("rbi") or 0,
|
|
"runs": row.get("runs") or 0,
|
|
"bb": row.get("bb") or 0,
|
|
"strikeouts": row.get("strikeouts") or 0,
|
|
"hbp": row.get("hbp") or 0,
|
|
"sac": row.get("sac") or 0,
|
|
"ibb": row.get("ibb") or 0,
|
|
"gidp": row.get("gidp") or 0,
|
|
"sb": row.get("sb") or 0,
|
|
"cs": row.get("cs") or 0,
|
|
}
|
|
|
|
|
|
def _recalc_pitching(player_id: int, team_id: int, season: int) -> dict:
|
|
"""
|
|
Recompute full-season pitching totals for a player+team+season triple.
|
|
|
|
Aggregates every StratPlay row where pitcher == player_id and
|
|
pitcher_team == team_id across all games in the given season. games counts
|
|
all distinct games in which the pitcher appeared (any play qualifies).
|
|
|
|
Stats derived from StratPlay (from the batter-perspective columns):
|
|
- outs = SUM(outs)
|
|
- strikeouts = SUM(so) — batter SO = pitcher K
|
|
- hits_allowed = SUM(hit)
|
|
- bb = SUM(bb) — walks allowed
|
|
- hbp = SUM(hbp)
|
|
- hr_allowed = SUM(homerun)
|
|
- wild_pitches = SUM(wild_pitch)
|
|
- balks = SUM(balk)
|
|
|
|
Fields not available from StratPlay (runs_allowed, earned_runs) default
|
|
to 0. Decision-level fields (wins, losses, etc.) are populated separately
|
|
by _recalc_decisions() and merged in the caller.
|
|
|
|
Args:
|
|
player_id: FK to the player record.
|
|
team_id: FK to the team record.
|
|
season: Integer season year.
|
|
|
|
Returns:
|
|
Dict with keys matching PitchingSeasonStats columns (excluding
|
|
decision fields, which are filled by _recalc_decisions).
|
|
"""
|
|
row = (
|
|
StratPlay.select(
|
|
fn.COUNT(StratPlay.game.distinct()).alias("games"),
|
|
fn.SUM(StratPlay.outs).alias("outs"),
|
|
fn.SUM(StratPlay.so).alias("strikeouts"),
|
|
fn.SUM(StratPlay.hit).alias("hits_allowed"),
|
|
fn.SUM(StratPlay.bb).alias("bb"),
|
|
fn.SUM(StratPlay.hbp).alias("hbp"),
|
|
fn.SUM(StratPlay.homerun).alias("hr_allowed"),
|
|
fn.SUM(StratPlay.wild_pitch).alias("wild_pitches"),
|
|
fn.SUM(StratPlay.balk).alias("balks"),
|
|
)
|
|
.join(StratGame, on=(StratPlay.game == StratGame.id))
|
|
.where(
|
|
StratPlay.pitcher == player_id,
|
|
StratPlay.pitcher_team == team_id,
|
|
StratGame.season == season,
|
|
)
|
|
.dicts()
|
|
.first()
|
|
)
|
|
|
|
if row is None:
|
|
row = {}
|
|
|
|
return {
|
|
"games": row.get("games") or 0,
|
|
"outs": row.get("outs") or 0,
|
|
"strikeouts": row.get("strikeouts") or 0,
|
|
"hits_allowed": row.get("hits_allowed") or 0,
|
|
"bb": row.get("bb") or 0,
|
|
"hbp": row.get("hbp") or 0,
|
|
"hr_allowed": row.get("hr_allowed") or 0,
|
|
"wild_pitches": row.get("wild_pitches") or 0,
|
|
"balks": row.get("balks") or 0,
|
|
# Not available from play-by-play data
|
|
"runs_allowed": 0,
|
|
"earned_runs": 0,
|
|
}
|
|
|
|
|
|
def _recalc_decisions(player_id: int, team_id: int, season: int) -> dict:
|
|
"""
|
|
Recompute full-season decision totals for a pitcher+team+season triple.
|
|
|
|
Aggregates all Decision rows for the pitcher across the season. Decision
|
|
rows are keyed by (pitcher, pitcher_team, season) independently of the
|
|
StratPlay table, so this query is separate from _recalc_pitching().
|
|
|
|
Decision.is_start is a BooleanField; CAST to INTEGER before summing to
|
|
ensure correct arithmetic across SQLite (True/False) and PostgreSQL
|
|
(boolean).
|
|
|
|
Args:
|
|
player_id: FK to the player record (pitcher).
|
|
team_id: FK to the team record.
|
|
season: Integer season year.
|
|
|
|
Returns:
|
|
Dict with keys: wins, losses, holds, saves, blown_saves,
|
|
games_started. All values are native Python ints.
|
|
"""
|
|
row = (
|
|
Decision.select(
|
|
fn.SUM(Decision.win).alias("wins"),
|
|
fn.SUM(Decision.loss).alias("losses"),
|
|
fn.SUM(Decision.hold).alias("holds"),
|
|
fn.SUM(Decision.is_save).alias("saves"),
|
|
fn.SUM(Decision.b_save).alias("blown_saves"),
|
|
fn.SUM(Decision.is_start.cast("INTEGER")).alias("games_started"),
|
|
)
|
|
.where(
|
|
Decision.pitcher == player_id,
|
|
Decision.pitcher_team == team_id,
|
|
Decision.season == season,
|
|
)
|
|
.dicts()
|
|
.first()
|
|
)
|
|
|
|
if row is None:
|
|
row = {}
|
|
|
|
return {
|
|
"wins": row.get("wins") or 0,
|
|
"losses": row.get("losses") or 0,
|
|
"holds": row.get("holds") or 0,
|
|
"saves": row.get("saves") or 0,
|
|
"blown_saves": row.get("blown_saves") or 0,
|
|
"games_started": row.get("games_started") or 0,
|
|
}
|
|
|
|
|
|
def update_season_stats(game_id: int, force: bool = False) -> dict:
|
|
"""
|
|
Recompute full-season batting and pitching stats for every player in the game.
|
|
|
|
Unlike the previous incremental approach, this function recalculates each
|
|
player's season totals from scratch by querying all StratPlay rows for
|
|
the player+team+season triple. The resulting totals replace whatever was
|
|
previously stored — no additive delta is applied.
|
|
|
|
Algorithm:
|
|
1. Fetch StratGame to get the season.
|
|
2. Check the ProcessedGame ledger:
|
|
- If already processed and force=False, return early (skipped=True).
|
|
- If already processed and force=True, continue (overwrite allowed).
|
|
- If not yet processed, create the ledger entry.
|
|
3. Determine (player_id, team_id) pairs via _get_player_pairs().
|
|
4. For each batting pair: recompute season totals, then get_or_create
|
|
BattingSeasonStats and overwrite all fields.
|
|
5. For each pitching pair: recompute season play totals and decision
|
|
totals, merge, then get_or_create PitchingSeasonStats and overwrite
|
|
all fields.
|
|
|
|
Args:
|
|
game_id: Primary key of the StratGame to process.
|
|
force: If True, re-process even if the game was previously recorded
|
|
in the ProcessedGame ledger. Useful for correcting stats after
|
|
retroactive data adjustments.
|
|
|
|
Returns:
|
|
Dict with keys:
|
|
game_id — echoed back
|
|
season — season integer from StratGame
|
|
batters_updated — number of BattingSeasonStats rows written
|
|
pitchers_updated — number of PitchingSeasonStats rows written
|
|
skipped — True only when the game was already processed
|
|
and force=False; absent otherwise.
|
|
|
|
Raises:
|
|
StratGame.DoesNotExist: If no StratGame row matches game_id.
|
|
"""
|
|
logger.info("update_season_stats: starting for game_id=%d force=%s", game_id, force)
|
|
|
|
game = StratGame.get_by_id(game_id)
|
|
season = game.season
|
|
|
|
with db.atomic():
|
|
# Idempotency check via ProcessedGame ledger.
|
|
_, created = ProcessedGame.get_or_create(game_id=game_id)
|
|
|
|
if not created and not force:
|
|
logger.info(
|
|
"update_season_stats: game_id=%d already processed, skipping",
|
|
game_id,
|
|
)
|
|
return {
|
|
"game_id": game_id,
|
|
"season": season,
|
|
"batters_updated": 0,
|
|
"pitchers_updated": 0,
|
|
"skipped": True,
|
|
}
|
|
|
|
if not created and force:
|
|
logger.info(
|
|
"update_season_stats: game_id=%d already processed, force=True — recalculating",
|
|
game_id,
|
|
)
|
|
|
|
batting_pairs, pitching_pairs = _get_player_pairs(game_id)
|
|
logger.debug(
|
|
"update_season_stats: game_id=%d found %d batting pairs, %d pitching pairs",
|
|
game_id,
|
|
len(batting_pairs),
|
|
len(pitching_pairs),
|
|
)
|
|
|
|
now = datetime.now()
|
|
|
|
# Recompute and overwrite batting season stats for each batter.
|
|
batters_updated = 0
|
|
for player_id, team_id in batting_pairs:
|
|
stats = _recalc_batting(player_id, team_id, season)
|
|
|
|
obj, _ = BattingSeasonStats.get_or_create(
|
|
player_id=player_id,
|
|
team_id=team_id,
|
|
season=season,
|
|
)
|
|
obj.games = stats["games"]
|
|
obj.pa = stats["pa"]
|
|
obj.ab = stats["ab"]
|
|
obj.hits = stats["hits"]
|
|
obj.doubles = stats["doubles"]
|
|
obj.triples = stats["triples"]
|
|
obj.hr = stats["hr"]
|
|
obj.rbi = stats["rbi"]
|
|
obj.runs = stats["runs"]
|
|
obj.bb = stats["bb"]
|
|
obj.strikeouts = stats["strikeouts"]
|
|
obj.hbp = stats["hbp"]
|
|
obj.sac = stats["sac"]
|
|
obj.ibb = stats["ibb"]
|
|
obj.gidp = stats["gidp"]
|
|
obj.sb = stats["sb"]
|
|
obj.cs = stats["cs"]
|
|
obj.last_game_id = game_id
|
|
obj.last_updated_at = now
|
|
obj.save()
|
|
batters_updated += 1
|
|
|
|
# Recompute and overwrite pitching season stats for each pitcher.
|
|
pitchers_updated = 0
|
|
for player_id, team_id in pitching_pairs:
|
|
play_stats = _recalc_pitching(player_id, team_id, season)
|
|
decision_stats = _recalc_decisions(player_id, team_id, season)
|
|
|
|
obj, _ = PitchingSeasonStats.get_or_create(
|
|
player_id=player_id,
|
|
team_id=team_id,
|
|
season=season,
|
|
)
|
|
obj.games = play_stats["games"]
|
|
obj.games_started = decision_stats["games_started"]
|
|
obj.outs = play_stats["outs"]
|
|
obj.strikeouts = play_stats["strikeouts"]
|
|
obj.bb = play_stats["bb"]
|
|
obj.hits_allowed = play_stats["hits_allowed"]
|
|
obj.runs_allowed = play_stats["runs_allowed"]
|
|
obj.earned_runs = play_stats["earned_runs"]
|
|
obj.hr_allowed = play_stats["hr_allowed"]
|
|
obj.hbp = play_stats["hbp"]
|
|
obj.wild_pitches = play_stats["wild_pitches"]
|
|
obj.balks = play_stats["balks"]
|
|
obj.wins = decision_stats["wins"]
|
|
obj.losses = decision_stats["losses"]
|
|
obj.holds = decision_stats["holds"]
|
|
obj.saves = decision_stats["saves"]
|
|
obj.blown_saves = decision_stats["blown_saves"]
|
|
obj.last_game_id = game_id
|
|
obj.last_updated_at = now
|
|
obj.save()
|
|
pitchers_updated += 1
|
|
|
|
logger.info(
|
|
"update_season_stats: game_id=%d complete — "
|
|
"batters_updated=%d pitchers_updated=%d",
|
|
game_id,
|
|
batters_updated,
|
|
pitchers_updated,
|
|
)
|
|
|
|
return {
|
|
"game_id": game_id,
|
|
"season": season,
|
|
"batters_updated": batters_updated,
|
|
"pitchers_updated": pitchers_updated,
|
|
}
|