paper-dynasty-database/app/services/season_stats.py

"""
season_stats.py — Full-recalculation BattingSeasonStats and PitchingSeasonStats update logic.

Called once per completed StratGame to recompute the full season batting and
pitching statistics for every player who appeared in that game, then write
those totals to the batting_season_stats and pitching_season_stats tables.

Unlike the previous incremental (delta) approach, each call recomputes totals
from scratch by aggregating all StratPlay rows for the player+team+season
triple. This eliminates double-counting on re-delivery and makes every row a
faithful snapshot of the full season to date.

Idempotency: re-delivery of a game is detected via the ProcessedGame ledger
table, keyed on game_id.
- First call: records the ledger entry and proceeds with recalculation.
- Subsequent calls without force=True: return early with "skipped": True.
- force=True: skips the early-return check and recalculates anyway (useful
  for correcting data after retroactive stat adjustments).

Upsert strategy: get_or_create + field assignment + save(). Because we are
writing the full recomputed total rather than adding a delta, there is no
risk of concurrent-write skew between games. A single unified path works for
both SQLite and PostgreSQL.
"""

import logging
from datetime import datetime

from peewee import Case, fn

from app.db_engine import (
    db,
    BattingSeasonStats,
    Decision,
    PitchingSeasonStats,
    ProcessedGame,
    StratGame,
    StratPlay,
)

logger = logging.getLogger(__name__)


def _get_player_pairs(game_id: int) -> tuple[set, set]:
    """
    Return the sets of (player_id, team_id) pairs that appeared in the game.

    Queries StratPlay for all rows belonging to game_id and extracts:
    - batting_pairs: set of (batter_id, batter_team_id), excluding rows where
      batter_id is None (e.g. automatic outs, walk-off plays without a PA).
    - pitching_pairs: set of (pitcher_id, pitcher_team_id) from all plays
      (pitcher is always present), plus any pitchers from the Decision table
      who may not have StratPlay rows (rare edge case).

    Args:
        game_id: Primary key of the StratGame to query.

    Returns:
        Tuple of (batting_pairs, pitching_pairs) where each element is a set
        of (int, int) tuples.
    """
    plays = (
        StratPlay.select(
            StratPlay.batter,
            StratPlay.batter_team,
            StratPlay.pitcher,
            StratPlay.pitcher_team,
        )
        .where(StratPlay.game == game_id)
        .tuples()
    )

    batting_pairs: set[tuple[int, int]] = set()
    pitching_pairs: set[tuple[int, int]] = set()

    for batter_id, batter_team_id, pitcher_id, pitcher_team_id in plays:
        if batter_id is not None:
            batting_pairs.add((batter_id, batter_team_id))
        if pitcher_id is not None:
            pitching_pairs.add((pitcher_id, pitcher_team_id))

    # Include pitchers who have a Decision but no StratPlay rows for this game
    # (rare edge case, e.g. a pitcher credited with a decision without recording
    # any plays — the old code handled this explicitly in _apply_decisions).
    decision_pitchers = (
        Decision.select(Decision.pitcher, Decision.pitcher_team)
        .where(Decision.game == game_id)
        .tuples()
    )
    for pitcher_id, pitcher_team_id in decision_pitchers:
        pitching_pairs.add((pitcher_id, pitcher_team_id))

    return batting_pairs, pitching_pairs


def _recalc_batting(player_id: int, team_id: int, season: int) -> dict:
    """
    Recompute full-season batting totals for a player+team+season triple.

    Aggregates every StratPlay row where batter == player_id and
    batter_team == team_id across all games in the given season.

    games counts only games where the player had at least one official PA
    (pa > 0). The COUNT(DISTINCT ...) with a CASE expression achieves this:
    NULL values from the CASE are ignored by COUNT, so only game IDs where
    pa > 0 contribute.

    Args:
        player_id: FK to the player record.
        team_id:   FK to the team record.
        season:    Integer season year.

    Returns:
        Dict with keys matching BattingSeasonStats columns; all values are
        native Python ints (defaulting to 0 if no rows matched).
    """
    row = (
        StratPlay.select(
            fn.COUNT(
                Case(None, [(StratPlay.pa > 0, StratPlay.game)], None).distinct()
            ).alias("games"),
            fn.SUM(StratPlay.pa).alias("pa"),
            fn.SUM(StratPlay.ab).alias("ab"),
            fn.SUM(StratPlay.hit).alias("hits"),
            fn.SUM(StratPlay.double).alias("doubles"),
            fn.SUM(StratPlay.triple).alias("triples"),
            fn.SUM(StratPlay.homerun).alias("hr"),
            fn.SUM(StratPlay.rbi).alias("rbi"),
            fn.SUM(StratPlay.run).alias("runs"),
            fn.SUM(StratPlay.bb).alias("bb"),
            fn.SUM(StratPlay.so).alias("strikeouts"),
            fn.SUM(StratPlay.hbp).alias("hbp"),
            fn.SUM(StratPlay.sac).alias("sac"),
            fn.SUM(StratPlay.ibb).alias("ibb"),
            fn.SUM(StratPlay.gidp).alias("gidp"),
            fn.SUM(StratPlay.sb).alias("sb"),
            fn.SUM(StratPlay.cs).alias("cs"),
        )
        .join(StratGame, on=(StratPlay.game == StratGame.id))
        .where(
            StratPlay.batter == player_id,
            StratPlay.batter_team == team_id,
            StratGame.season == season,
        )
        .dicts()
        .first()
    )

    if row is None:
        row = {}

    return {
        "games": row.get("games") or 0,
        "pa": row.get("pa") or 0,
        "ab": row.get("ab") or 0,
        "hits": row.get("hits") or 0,
        "doubles": row.get("doubles") or 0,
        "triples": row.get("triples") or 0,
        "hr": row.get("hr") or 0,
        "rbi": row.get("rbi") or 0,
        "runs": row.get("runs") or 0,
        "bb": row.get("bb") or 0,
        "strikeouts": row.get("strikeouts") or 0,
        "hbp": row.get("hbp") or 0,
        "sac": row.get("sac") or 0,
        "ibb": row.get("ibb") or 0,
        "gidp": row.get("gidp") or 0,
        "sb": row.get("sb") or 0,
        "cs": row.get("cs") or 0,
    }


def _recalc_pitching(player_id: int, team_id: int, season: int) -> dict:
    """
    Recompute full-season pitching totals for a player+team+season triple.

    Aggregates every StratPlay row where pitcher == player_id and
    pitcher_team == team_id across all games in the given season. games counts
    all distinct games in which the pitcher appeared (any play qualifies).

    Stats derived from StratPlay (from the batter-perspective columns):
    - outs          = SUM(outs)
    - strikeouts    = SUM(so)          — batter SO = pitcher K
    - hits_allowed  = SUM(hit)
    - bb            = SUM(bb)          — walks allowed
    - hbp           = SUM(hbp)
    - hr_allowed    = SUM(homerun)
    - wild_pitches  = SUM(wild_pitch)
    - balks         = SUM(balk)

    Fields not available from StratPlay (runs_allowed, earned_runs) default
    to 0. Decision-level fields (wins, losses, etc.) are populated separately
    by _recalc_decisions() and merged in the caller.

    Args:
        player_id: FK to the player record.
        team_id:   FK to the team record.
        season:    Integer season year.

    Returns:
        Dict with keys matching PitchingSeasonStats columns (excluding
        decision fields, which are filled by _recalc_decisions).
    """
    row = (
        StratPlay.select(
            fn.COUNT(StratPlay.game.distinct()).alias("games"),
            fn.SUM(StratPlay.outs).alias("outs"),
            fn.SUM(StratPlay.so).alias("strikeouts"),
            fn.SUM(StratPlay.hit).alias("hits_allowed"),
            fn.SUM(StratPlay.bb).alias("bb"),
            fn.SUM(StratPlay.hbp).alias("hbp"),
            fn.SUM(StratPlay.homerun).alias("hr_allowed"),
            fn.SUM(StratPlay.wild_pitch).alias("wild_pitches"),
            fn.SUM(StratPlay.balk).alias("balks"),
        )
        .join(StratGame, on=(StratPlay.game == StratGame.id))
        .where(
            StratPlay.pitcher == player_id,
            StratPlay.pitcher_team == team_id,
            StratGame.season == season,
        )
        .dicts()
        .first()
    )

    if row is None:
        row = {}

    return {
        "games": row.get("games") or 0,
        "outs": row.get("outs") or 0,
        "strikeouts": row.get("strikeouts") or 0,
        "hits_allowed": row.get("hits_allowed") or 0,
        "bb": row.get("bb") or 0,
        "hbp": row.get("hbp") or 0,
        "hr_allowed": row.get("hr_allowed") or 0,
        "wild_pitches": row.get("wild_pitches") or 0,
        "balks": row.get("balks") or 0,
        # Not available from play-by-play data
        "runs_allowed": 0,
        "earned_runs": 0,
    }


def _recalc_decisions(player_id: int, team_id: int, season: int) -> dict:
    """
    Recompute full-season decision totals for a pitcher+team+season triple.

    Aggregates all Decision rows for the pitcher across the season. Decision
    rows are keyed by (pitcher, pitcher_team, season) independently of the
    StratPlay table, so this query is separate from _recalc_pitching().

    Decision.is_start is a BooleanField; CAST to INTEGER before summing to
    ensure correct arithmetic across SQLite (True/False) and PostgreSQL
    (boolean).

    Args:
        player_id: FK to the player record (pitcher).
        team_id:   FK to the team record.
        season:    Integer season year.

    Returns:
        Dict with keys: wins, losses, holds, saves, blown_saves,
        games_started. All values are native Python ints.
    """
    row = (
        Decision.select(
            fn.SUM(Decision.win).alias("wins"),
            fn.SUM(Decision.loss).alias("losses"),
            fn.SUM(Decision.hold).alias("holds"),
            fn.SUM(Decision.is_save).alias("saves"),
            fn.SUM(Decision.b_save).alias("blown_saves"),
            fn.SUM(Decision.is_start.cast("INTEGER")).alias("games_started"),
        )
        .where(
            Decision.pitcher == player_id,
            Decision.pitcher_team == team_id,
            Decision.season == season,
        )
        .dicts()
        .first()
    )

    if row is None:
        row = {}

    return {
        "wins": row.get("wins") or 0,
        "losses": row.get("losses") or 0,
        "holds": row.get("holds") or 0,
        "saves": row.get("saves") or 0,
        "blown_saves": row.get("blown_saves") or 0,
        "games_started": row.get("games_started") or 0,
    }


def update_season_stats(game_id: int, force: bool = False) -> dict:
    """
    Recompute full-season batting and pitching stats for every player in the game.

    Unlike the previous incremental approach, this function recalculates each
    player's season totals from scratch by querying all StratPlay rows for
    the player+team+season triple. The resulting totals replace whatever was
    previously stored — no additive delta is applied.

    Algorithm:
        1. Fetch StratGame to get the season.
        2. Check the ProcessedGame ledger:
           - If already processed and force=False, return early (skipped=True).
           - If already processed and force=True, continue (overwrite allowed).
           - If not yet processed, create the ledger entry.
        3. Determine (player_id, team_id) pairs via _get_player_pairs().
        4. For each batting pair: recompute season totals, then get_or_create
           BattingSeasonStats and overwrite all fields.
        5. For each pitching pair: recompute season play totals and decision
           totals, merge, then get_or_create PitchingSeasonStats and overwrite
           all fields.

    Args:
        game_id: Primary key of the StratGame to process.
        force:   If True, re-process even if the game was previously recorded
                 in the ProcessedGame ledger. Useful for correcting stats after
                 retroactive data adjustments.

    Returns:
        Dict with keys:
            game_id          — echoed back
            season           — season integer from StratGame
            batters_updated  — number of BattingSeasonStats rows written
            pitchers_updated — number of PitchingSeasonStats rows written
            skipped          — True only when the game was already processed
                               and force=False; absent otherwise.

    Raises:
        StratGame.DoesNotExist: If no StratGame row matches game_id.
    """
    logger.info("update_season_stats: starting for game_id=%d force=%s", game_id, force)

    game = StratGame.get_by_id(game_id)
    season = game.season

    with db.atomic():
        # Idempotency check via ProcessedGame ledger.
        _, created = ProcessedGame.get_or_create(game_id=game_id)

        if not created and not force:
            logger.info(
                "update_season_stats: game_id=%d already processed, skipping",
                game_id,
            )
            return {
                "game_id": game_id,
                "season": season,
                "batters_updated": 0,
                "pitchers_updated": 0,
                "skipped": True,
            }

        if not created and force:
            logger.info(
                "update_season_stats: game_id=%d already processed, force=True — recalculating",
                game_id,
            )

        batting_pairs, pitching_pairs = _get_player_pairs(game_id)
        logger.debug(
            "update_season_stats: game_id=%d found %d batting pairs, %d pitching pairs",
            game_id,
            len(batting_pairs),
            len(pitching_pairs),
        )

        now = datetime.now()

        # Recompute and overwrite batting season stats for each batter.
        batters_updated = 0
        for player_id, team_id in batting_pairs:
            stats = _recalc_batting(player_id, team_id, season)

            obj, _ = BattingSeasonStats.get_or_create(
                player_id=player_id,
                team_id=team_id,
                season=season,
            )
            obj.games = stats["games"]
            obj.pa = stats["pa"]
            obj.ab = stats["ab"]
            obj.hits = stats["hits"]
            obj.doubles = stats["doubles"]
            obj.triples = stats["triples"]
            obj.hr = stats["hr"]
            obj.rbi = stats["rbi"]
            obj.runs = stats["runs"]
            obj.bb = stats["bb"]
            obj.strikeouts = stats["strikeouts"]
            obj.hbp = stats["hbp"]
            obj.sac = stats["sac"]
            obj.ibb = stats["ibb"]
            obj.gidp = stats["gidp"]
            obj.sb = stats["sb"]
            obj.cs = stats["cs"]
            obj.last_game_id = game_id
            obj.last_updated_at = now
            obj.save()
            batters_updated += 1

        # Recompute and overwrite pitching season stats for each pitcher.
        pitchers_updated = 0
        for player_id, team_id in pitching_pairs:
            play_stats = _recalc_pitching(player_id, team_id, season)
            decision_stats = _recalc_decisions(player_id, team_id, season)

            obj, _ = PitchingSeasonStats.get_or_create(
                player_id=player_id,
                team_id=team_id,
                season=season,
            )
            obj.games = play_stats["games"]
            obj.games_started = decision_stats["games_started"]
            obj.outs = play_stats["outs"]
            obj.strikeouts = play_stats["strikeouts"]
            obj.bb = play_stats["bb"]
            obj.hits_allowed = play_stats["hits_allowed"]
            obj.runs_allowed = play_stats["runs_allowed"]
            obj.earned_runs = play_stats["earned_runs"]
            obj.hr_allowed = play_stats["hr_allowed"]
            obj.hbp = play_stats["hbp"]
            obj.wild_pitches = play_stats["wild_pitches"]
            obj.balks = play_stats["balks"]
            obj.wins = decision_stats["wins"]
            obj.losses = decision_stats["losses"]
            obj.holds = decision_stats["holds"]
            obj.saves = decision_stats["saves"]
            obj.blown_saves = decision_stats["blown_saves"]
            obj.last_game_id = game_id
            obj.last_updated_at = now
            obj.save()
            pitchers_updated += 1

        logger.info(
            "update_season_stats: game_id=%d complete — "
            "batters_updated=%d pitchers_updated=%d",
            game_id,
            batters_updated,
            pitchers_updated,
        )

    return {
        "game_id": game_id,
        "season": season,
        "batters_updated": batters_updated,
        "pitchers_updated": pitchers_updated,
    }