paper-dynasty-database/tests/test_season_stats_update.py

"""
Tests for app/services/season_stats.py — update_season_stats().

What: Verify that the full-recalculation stat engine correctly aggregates
StratPlay and Decision rows into BattingSeasonStats and PitchingSeasonStats,
handles duplicate calls idempotently, accumulates stats across multiple games,
and supports forced reprocessing for self-healing.

Why: This is the core bookkeeping engine for card evolution scoring. A
double-count bug, a missed Decision merge, or a team-isolation failure
would silently produce wrong stats that would then corrupt every
evolution tier calculation downstream.

Test data is created using real Peewee models (no mocking) against the
in-memory SQLite database provided by the autouse setup_test_db fixture
in conftest.py.  All Player and Team creation uses the actual required
column set discovered from the model definition in db_engine.py.
"""

import app.services.season_stats as _season_stats_module
import pytest

from app.db_engine import (
    BattingSeasonStats,
    Cardset,
    Decision,
    PitchingSeasonStats,
    Player,
    Rarity,
    StratGame,
    StratPlay,
    Team,
)
from app.services.season_stats import update_season_stats
from tests.conftest import _test_db

# ---------------------------------------------------------------------------
# Module-level patch: redirect season_stats.db to the test database
# ---------------------------------------------------------------------------
# season_stats.py holds a module-level reference to the `db` object imported
# from db_engine. When test models are rebound to _test_db via bind(), the
# `db` object inside season_stats still points at the original production db
# (SQLite file or PostgreSQL). We replace it here so that db.atomic() in
# update_season_stats() operates on the same in-memory connection that the
# test fixtures write to.
_season_stats_module.db = _test_db


# ---------------------------------------------------------------------------
# Helper factories
# ---------------------------------------------------------------------------


def _make_cardset():
    """Return a reusable Cardset row (or fetch the existing one by name)."""
    cs, _ = Cardset.get_or_create(
        name="Test Set",
        defaults={"description": "Test cardset", "total_cards": 100},
    )
    return cs


def _make_rarity():
    """Return the Common rarity singleton."""
    r, _ = Rarity.get_or_create(value=1, name="Common", defaults={"color": "#ffffff"})
    return r


def _make_player(name: str, pos: str = "1B") -> Player:
    """Create a Player row with all required (non-nullable) columns satisfied.

    Why we need this helper: Player has many non-nullable varchar columns
    (image, mlbclub, franchise, description) and a required FK to Cardset.
    A single helper keeps test fixtures concise and consistent.
    """
    return Player.create(
        p_name=name,
        rarity=_make_rarity(),
        cardset=_make_cardset(),
        set_num=1,
        pos_1=pos,
        image="https://example.com/image.png",
        mlbclub="TST",
        franchise="TST",
        description=f"Test player: {name}",
    )


def _make_team(abbrev: str, gmid: int, season: int = 11) -> Team:
    """Create a Team row with all required (non-nullable) columns satisfied."""
    return Team.create(
        abbrev=abbrev,
        sname=abbrev,
        lname=f"Team {abbrev}",
        gmid=gmid,
        gmname=f"gm_{abbrev.lower()}",
        gsheet="https://docs.google.com/spreadsheets/test",
        wallet=500,
        team_value=1000,
        collection_value=1000,
        season=season,
        is_ai=False,
    )


def make_play(game, play_num, batter, batter_team, pitcher, pitcher_team, **stats):
    """Create a StratPlay row with sensible defaults for all required fields.

    Why we provide defaults for every stat column: StratPlay has many
    IntegerField columns with default=0 at the model level, but supplying
    them explicitly makes it clear what the baseline state of each play is
    and keeps the helper signature stable if defaults change.
    """
    defaults = dict(
        on_base_code="000",
        inning_half="top",
        inning_num=1,
        batting_order=1,
        starting_outs=0,
        away_score=0,
        home_score=0,
        pa=0,
        ab=0,
        hit=0,
        run=0,
        double=0,
        triple=0,
        homerun=0,
        bb=0,
        so=0,
        hbp=0,
        rbi=0,
        sb=0,
        cs=0,
        outs=0,
        sac=0,
        ibb=0,
        gidp=0,
        bphr=0,
        bpfo=0,
        bp1b=0,
        bplo=0,
    )
    defaults.update(stats)
    return StratPlay.create(
        game=game,
        play_num=play_num,
        batter=batter,
        batter_team=batter_team,
        pitcher=pitcher,
        pitcher_team=pitcher_team,
        **defaults,
    )


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture
def team_a():
    return _make_team("TMA", gmid=1001)


@pytest.fixture
def team_b():
    return _make_team("TMB", gmid=1002)


@pytest.fixture
def player_batter():
    """A batter-type player for team A."""
    return _make_player("Batter One", pos="CF")


@pytest.fixture
def player_pitcher():
    """A pitcher-type player for team B."""
    return _make_player("Pitcher One", pos="SP")


@pytest.fixture
def game(team_a, team_b):
    return StratGame.create(
        season=11,
        game_type="ranked",
        away_team=team_a,
        home_team=team_b,
    )


# ---------------------------------------------------------------------------
# Tests — Existing behavior (kept)
# ---------------------------------------------------------------------------


def test_single_game_batting_stats(team_a, team_b, player_batter, player_pitcher, game):
    """Batting stat totals from StratPlay rows are correctly accumulated.

    What: Create three plate appearances (2 hits, 1 strikeout, a walk, and a
    home run) for one batter. After update_season_stats(), the
    BattingSeasonStats row should reflect the exact sum of all play fields.

    Why: The core of the batting aggregation pipeline. If any field mapping
    is wrong (e.g. 'hit' mapped to 'doubles' instead of 'hits'), evolution
    scoring and leaderboards would silently report incorrect stats.
    """
    # PA 1: single (hit=1, ab=1, pa=1)
    make_play(
        game,
        1,
        player_batter,
        team_a,
        player_pitcher,
        team_b,
        pa=1,
        ab=1,
        hit=1,
        outs=0,
    )
    # PA 2: home run (hit=1, homerun=1, ab=1, pa=1, rbi=1, run=1)
    make_play(
        game,
        2,
        player_batter,
        team_a,
        player_pitcher,
        team_b,
        pa=1,
        ab=1,
        hit=1,
        homerun=1,
        rbi=1,
        run=1,
        outs=0,
    )
    # PA 3: strikeout (ab=1, pa=1, so=1, outs=1)
    make_play(
        game,
        3,
        player_batter,
        team_a,
        player_pitcher,
        team_b,
        pa=1,
        ab=1,
        so=1,
        outs=1,
    )
    # PA 4: walk (pa=1, bb=1)
    make_play(
        game,
        4,
        player_batter,
        team_a,
        player_pitcher,
        team_b,
        pa=1,
        bb=1,
        outs=0,
    )

    result = update_season_stats(game.id)

    assert result["batters_updated"] >= 1
    stats = BattingSeasonStats.get(
        BattingSeasonStats.player == player_batter,
        BattingSeasonStats.team == team_a,
        BattingSeasonStats.season == 11,
    )
    assert stats.pa == 4
    assert stats.ab == 3
    assert stats.hits == 2
    assert stats.hr == 1
    assert stats.strikeouts == 1
    assert stats.bb == 1
    assert stats.rbi == 1
    assert stats.runs == 1
    assert stats.games == 1


def test_single_game_pitching_stats(
    team_a, team_b, player_batter, player_pitcher, game
):
    """Pitching stat totals (outs, k, hits_allowed, bb_allowed) are correct.

    What: The same plays that create batting stats for the batter are also
    the source for the pitcher's opposing stats. This test checks that
    _recalc_pitching() correctly inverts batter-perspective fields.

    Why: The batter's 'so' becomes the pitcher's 'strikeouts', the batter's
    'hit' becomes 'hits_allowed', etc. Any transposition in this mapping
    would corrupt pitcher stats silently.
    """
    # Play 1: strikeout — batter so=1, outs=1
    make_play(
        game,
        1,
        player_batter,
        team_a,
        player_pitcher,
        team_b,
        pa=1,
        ab=1,
        so=1,
        outs=1,
    )
    # Play 2: single — batter hit=1
    make_play(
        game,
        2,
        player_batter,
        team_a,
        player_pitcher,
        team_b,
        pa=1,
        ab=1,
        hit=1,
        outs=0,
    )
    # Play 3: walk — batter bb=1
    make_play(
        game,
        3,
        player_batter,
        team_a,
        player_pitcher,
        team_b,
        pa=1,
        bb=1,
        outs=0,
    )

    update_season_stats(game.id)

    stats = PitchingSeasonStats.get(
        PitchingSeasonStats.player == player_pitcher,
        PitchingSeasonStats.team == team_b,
        PitchingSeasonStats.season == 11,
    )
    assert stats.outs == 1  # one strikeout = one out recorded
    assert stats.strikeouts == 1  # batter's so → pitcher's strikeouts
    assert stats.hits_allowed == 1  # batter's hit → pitcher hits_allowed
    assert stats.bb == 1  # batter's bb → pitcher bb (walks allowed)
    assert stats.games == 1


def test_decision_integration(team_a, team_b, player_batter, player_pitcher, game):
    """Decision.win=1 for a pitcher results in wins=1 in PitchingSeasonStats.

    What: Add a single StratPlay to establish the pitcher in pitching pairs,
    then create a Decision row recording a win. Call update_season_stats()
    and verify the wins column is 1.

    Why: Decisions are stored in a separate table from StratPlay. If
    _recalc_decisions() fails to merge them (wrong FK lookup, key mismatch),
    pitchers would always show 0 wins/losses/saves regardless of actual game
    outcomes, breaking standings and evolution criteria.
    """
    make_play(
        game,
        1,
        player_batter,
        team_a,
        player_pitcher,
        team_b,
        pa=1,
        ab=1,
        outs=1,
    )
    Decision.create(
        season=11,
        game=game,
        pitcher=player_pitcher,
        pitcher_team=team_b,
        win=1,
        loss=0,
        is_save=0,
        hold=0,
        b_save=0,
        is_start=True,
    )

    update_season_stats(game.id)

    stats = PitchingSeasonStats.get(
        PitchingSeasonStats.player == player_pitcher,
        PitchingSeasonStats.team == team_b,
        PitchingSeasonStats.season == 11,
    )
    assert stats.wins == 1
    assert stats.losses == 0


def test_double_count_prevention(team_a, team_b, player_batter, player_pitcher, game):
    """Calling update_season_stats() twice for the same game must not double the stats.

    What: Process a game once (pa=3), then immediately call the function
    again with the same game_id. The second call finds the ProcessedGame
    ledger row and returns early with 'skipped'=True. The resulting pa
    should still be 3, not 6.

    Why: The bot infrastructure may deliver game-complete events more than
    once (network retries, message replays). The ProcessedGame ledger
    provides full idempotency for all replay scenarios.
    """
    for i in range(3):
        make_play(
            game,
            i + 1,
            player_batter,
            team_a,
            player_pitcher,
            team_b,
            pa=1,
            ab=1,
            outs=1,
        )

    first_result = update_season_stats(game.id)
    assert "skipped" not in first_result

    second_result = update_season_stats(game.id)
    assert second_result.get("skipped") is True
    assert second_result["batters_updated"] == 0
    assert second_result["pitchers_updated"] == 0

    stats = BattingSeasonStats.get(
        BattingSeasonStats.player == player_batter,
        BattingSeasonStats.team == team_a,
        BattingSeasonStats.season == 11,
    )
    # Must still be 3, not 6
    assert stats.pa == 3


def test_two_games_accumulate(team_a, team_b, player_batter, player_pitcher):
    """Stats from two separate games are summed in a single BattingSeasonStats row.

    What: Process game 1 (pa=2) then game 2 (pa=3) for the same batter/team.
    After both updates the stats row should show pa=5.

    Why: BattingSeasonStats is a season-long accumulator, not a per-game
    snapshot. The full recalculation queries all StratPlay rows for the season,
    so processing game 2 recomputes with all 5 PAs included.
    """
    game1 = StratGame.create(
        season=11, game_type="ranked", away_team=team_a, home_team=team_b
    )
    game2 = StratGame.create(
        season=11, game_type="ranked", away_team=team_a, home_team=team_b
    )

    # Game 1: 2 plate appearances
    for i in range(2):
        make_play(
            game1,
            i + 1,
            player_batter,
            team_a,
            player_pitcher,
            team_b,
            pa=1,
            ab=1,
            outs=1,
        )

    # Game 2: 3 plate appearances
    for i in range(3):
        make_play(
            game2,
            i + 1,
            player_batter,
            team_a,
            player_pitcher,
            team_b,
            pa=1,
            ab=1,
            outs=1,
        )

    update_season_stats(game1.id)
    update_season_stats(game2.id)

    stats = BattingSeasonStats.get(
        BattingSeasonStats.player == player_batter,
        BattingSeasonStats.team == team_a,
        BattingSeasonStats.season == 11,
    )
    assert stats.pa == 5
    assert stats.games == 2


def test_two_team_game(team_a, team_b):
    """Players from both teams in a game each get their own stats row.

    What: Create a batter+pitcher pair for team A and another pair for team B.
    In the same game, team A bats against team B's pitcher and vice versa.
    After update_season_stats(), both batters and both pitchers must have
    correct, isolated stats rows.

    Why: A key correctness guarantee is that stats are attributed to the
    correct (player, team) combination. If team attribution is wrong,
    a player's stats could appear under the wrong franchise or be merged
    with an opponent's row.
    """
    batter_a = _make_player("Batter A", pos="CF")
    pitcher_a = _make_player("Pitcher A", pos="SP")
    batter_b = _make_player("Batter B", pos="CF")
    pitcher_b = _make_player("Pitcher B", pos="SP")

    game = StratGame.create(
        season=11, game_type="ranked", away_team=team_a, home_team=team_b
    )

    # Team A bats against team B's pitcher (away half)
    make_play(
        game,
        1,
        batter_a,
        team_a,
        pitcher_b,
        team_b,
        pa=1,
        ab=1,
        hit=1,
        outs=0,
        inning_half="top",
    )
    make_play(
        game,
        2,
        batter_a,
        team_a,
        pitcher_b,
        team_b,
        pa=1,
        ab=1,
        so=1,
        outs=1,
        inning_half="top",
    )

    # Team B bats against team A's pitcher (home half)
    make_play(
        game,
        3,
        batter_b,
        team_b,
        pitcher_a,
        team_a,
        pa=1,
        ab=1,
        bb=1,
        outs=0,
        inning_half="bottom",
    )

    update_season_stats(game.id)

    # Team A's batter: 2 PA, 1 hit, 1 SO
    stats_ba = BattingSeasonStats.get(
        BattingSeasonStats.player == batter_a,
        BattingSeasonStats.team == team_a,
    )
    assert stats_ba.pa == 2
    assert stats_ba.hits == 1
    assert stats_ba.strikeouts == 1

    # Team B's batter: 1 PA, 1 BB
    stats_bb = BattingSeasonStats.get(
        BattingSeasonStats.player == batter_b,
        BattingSeasonStats.team == team_b,
    )
    assert stats_bb.pa == 1
    assert stats_bb.bb == 1

    # Team B's pitcher (faced team A's batter): 1 hit allowed, 1 strikeout
    stats_pb = PitchingSeasonStats.get(
        PitchingSeasonStats.player == pitcher_b,
        PitchingSeasonStats.team == team_b,
    )
    assert stats_pb.hits_allowed == 1
    assert stats_pb.strikeouts == 1

    # Team A's pitcher (faced team B's batter): 1 BB allowed
    stats_pa = PitchingSeasonStats.get(
        PitchingSeasonStats.player == pitcher_a,
        PitchingSeasonStats.team == team_a,
    )
    assert stats_pa.bb == 1


def test_out_of_order_replay_prevented(team_a, team_b, player_batter, player_pitcher):
    """Out-of-order processing and re-delivery produce correct stats.

    What: Process game G+1 first (pa=2), then game G (pa=3). The full
    recalculation approach means both calls query all StratPlay rows for the
    season, so the final stats are always correct regardless of processing
    order. Re-delivering game G returns 'skipped'=True and leaves stats at 5.

    Why: With full recalculation, out-of-order processing is inherently safe.
    The ProcessedGame ledger still prevents redundant work on re-delivery.
    """
    game_g = StratGame.create(
        season=11, game_type="ranked", away_team=team_a, home_team=team_b
    )
    game_g1 = StratGame.create(
        season=11, game_type="ranked", away_team=team_a, home_team=team_b
    )

    # Game G: 3 plate appearances
    for i in range(3):
        make_play(
            game_g,
            i + 1,
            player_batter,
            team_a,
            player_pitcher,
            team_b,
            pa=1,
            ab=1,
            outs=1,
        )

    # Game G+1: 2 plate appearances
    for i in range(2):
        make_play(
            game_g1,
            i + 1,
            player_batter,
            team_a,
            player_pitcher,
            team_b,
            pa=1,
            ab=1,
            outs=1,
        )

    # Process G+1 first, then G — simulates out-of-order delivery
    update_season_stats(game_g1.id)
    update_season_stats(game_g.id)

    stats = BattingSeasonStats.get(
        BattingSeasonStats.player == player_batter,
        BattingSeasonStats.team == team_a,
        BattingSeasonStats.season == 11,
    )
    assert stats.pa == 5  # 3 (game G) + 2 (game G+1)

    # Re-deliver game G — must be blocked by ProcessedGame ledger
    replay_result = update_season_stats(game_g.id)
    assert replay_result.get("skipped") is True

    # Stats must remain at 5, not 8
    stats = BattingSeasonStats.get(
        BattingSeasonStats.player == player_batter,
        BattingSeasonStats.team == team_a,
        BattingSeasonStats.season == 11,
    )
    assert stats.pa == 5


# ---------------------------------------------------------------------------
# Tests — New (force recalc / idempotency / self-healing)
# ---------------------------------------------------------------------------


def test_force_recalc(team_a, team_b, player_batter, player_pitcher, game):
    """Processing with force=True after initial processing does not double stats.

    What: Process a game normally (pa=3), then reprocess with force=True.
    Because the recalculation reads all StratPlay rows and writes totals
    (not deltas), the stats remain at pa=3 after the forced reprocess.

    Why: The force flag bypasses the ProcessedGame ledger skip, but since
    the underlying data hasn't changed, the recalculated totals must be
    identical. This proves the replacement upsert is safe.
    """
    for i in range(3):
        make_play(
            game,
            i + 1,
            player_batter,
            team_a,
            player_pitcher,
            team_b,
            pa=1,
            ab=1,
            hit=1,
            outs=0,
        )

    first_result = update_season_stats(game.id)
    assert first_result["batters_updated"] >= 1
    assert "skipped" not in first_result

    # Force reprocess — should NOT double stats
    force_result = update_season_stats(game.id, force=True)
    assert "skipped" not in force_result
    assert force_result["batters_updated"] >= 1

    stats = BattingSeasonStats.get(
        BattingSeasonStats.player == player_batter,
        BattingSeasonStats.team == team_a,
        BattingSeasonStats.season == 11,
    )
    assert stats.pa == 3
    assert stats.hits == 3
    assert stats.games == 1


def test_idempotent_reprocessing(team_a, team_b, player_batter, player_pitcher, game):
    """Two consecutive force=True calls produce identical stats.

    What: Force-process the same game twice. Both calls recompute from
    scratch, so the stats after the second call must be identical to the
    stats after the first call.

    Why: Idempotency is a critical property of the recalculation engine.
    External systems (admin scripts, retry loops) may call force=True
    multiple times; the result must be stable.
    """
    for i in range(4):
        make_play(
            game,
            i + 1,
            player_batter,
            team_a,
            player_pitcher,
            team_b,
            pa=1,
            ab=1,
            so=1 if i % 2 == 0 else 0,
            hit=0 if i % 2 == 0 else 1,
            outs=1 if i % 2 == 0 else 0,
        )

    update_season_stats(game.id, force=True)
    stats_after_first = BattingSeasonStats.get(
        BattingSeasonStats.player == player_batter,
        BattingSeasonStats.team == team_a,
        BattingSeasonStats.season == 11,
    )
    pa_1, hits_1, so_1 = (
        stats_after_first.pa,
        stats_after_first.hits,
        stats_after_first.strikeouts,
    )

    update_season_stats(game.id, force=True)
    stats_after_second = BattingSeasonStats.get(
        BattingSeasonStats.player == player_batter,
        BattingSeasonStats.team == team_a,
        BattingSeasonStats.season == 11,
    )

    assert stats_after_second.pa == pa_1
    assert stats_after_second.hits == hits_1
    assert stats_after_second.strikeouts == so_1


def test_partial_reprocessing_heals(
    team_a, team_b, player_batter, player_pitcher, game
):
    """Force reprocessing corrects manually corrupted stats.

    What: Process a game (pa=3, hits=2), then manually corrupt the stats
    row (set pa=999). Force-reprocess the game. The stats should be healed
    back to the correct totals (pa=3, hits=2).

    Why: This is the primary self-healing benefit of full recalculation.
    Partial processing, bugs, or manual edits can corrupt season stats;
    force=True recomputes from the source-of-truth StratPlay data and
    writes the correct totals regardless of current row state.
    """
    # PA 1: single
    make_play(
        game,
        1,
        player_batter,
        team_a,
        player_pitcher,
        team_b,
        pa=1,
        ab=1,
        hit=1,
        outs=0,
    )
    # PA 2: double
    make_play(
        game,
        2,
        player_batter,
        team_a,
        player_pitcher,
        team_b,
        pa=1,
        ab=1,
        hit=1,
        double=1,
        outs=0,
    )
    # PA 3: strikeout
    make_play(
        game,
        3,
        player_batter,
        team_a,
        player_pitcher,
        team_b,
        pa=1,
        ab=1,
        so=1,
        outs=1,
    )

    update_season_stats(game.id)

    # Verify correct initial state
    stats = BattingSeasonStats.get(
        BattingSeasonStats.player == player_batter,
        BattingSeasonStats.team == team_a,
        BattingSeasonStats.season == 11,
    )
    assert stats.pa == 3
    assert stats.hits == 2
    assert stats.doubles == 1

    # Corrupt the stats manually
    stats.pa = 999
    stats.hits = 0
    stats.doubles = 50
    stats.save()

    # Verify corruption took effect
    stats = BattingSeasonStats.get_by_id(stats.id)
    assert stats.pa == 999

    # Force reprocess — should heal the corruption
    update_season_stats(game.id, force=True)

    stats = BattingSeasonStats.get(
        BattingSeasonStats.player == player_batter,
        BattingSeasonStats.team == team_a,
        BattingSeasonStats.season == 11,
    )
    assert stats.pa == 3
    assert stats.hits == 2
    assert stats.doubles == 1
    assert stats.strikeouts == 1
    assert stats.games == 1


def test_decision_only_pitcher(team_a, team_b, player_batter, player_pitcher, game):
    """A pitcher with a Decision but no StratPlay rows still gets stats recorded.

    What: Create a second pitcher who has a Decision (win) for the game but
    does not appear in any StratPlay rows. After update_season_stats(), the
    decision-only pitcher should have a PitchingSeasonStats row with wins=1
    and all play-level stats at 0.

    Why: In rare cases a pitcher may be credited with a decision without
    recording any plays (e.g. inherited runner scoring rules, edge cases in
    game simulation). The old code handled this in _apply_decisions(); the
    new code must include Decision-scanned pitchers in _get_player_pairs().
    """
    relief_pitcher = _make_player("Relief Pitcher", pos="RP")

    # The main pitcher has plays
    make_play(
        game,
        1,
        player_batter,
        team_a,
        player_pitcher,
        team_b,
        pa=1,
        ab=1,
        outs=1,
    )

    # The relief pitcher has a Decision but NO StratPlay rows
    Decision.create(
        season=11,
        game=game,
        pitcher=relief_pitcher,
        pitcher_team=team_b,
        win=1,
        loss=0,
        is_save=0,
        hold=0,
        b_save=0,
        is_start=False,
    )

    update_season_stats(game.id)

    # The relief pitcher should have a PitchingSeasonStats row
    stats = PitchingSeasonStats.get(
        PitchingSeasonStats.player == relief_pitcher,
        PitchingSeasonStats.team == team_b,
        PitchingSeasonStats.season == 11,
    )
    assert stats.wins == 1
    assert stats.games == 0  # no plays, so COUNT(DISTINCT game) = 0
    assert stats.outs == 0
    assert stats.strikeouts == 0
    assert stats.games_started == 0