sba-scouting/src/sba_scout/calc/league_stats.py
Cal Corum 3c76ce1cf0 Add Lineup Builder, Gameday screen, and matchup scoring system
Features:
- Lineup Builder screen: set batting order, assign positions, save/load lineups
- Gameday screen: integrated matchup scout + lineup builder side-by-side
- Matchup Scout: analyze batters vs opposing pitchers with standardized scoring
- Standardized scoring system with league AVG/STDEV calculations
- Score caching for fast matchup lookups

Lineup Builder (press 'l'):
- Dual-panel UI with available batters and 9-slot lineup
- Keyboard controls: a=add, r=remove, k/j=reorder, p=change position
- Save/load named lineups, delete saved lineups with 'd'

Gameday screen (press 'g'):
- Left panel: team/pitcher selection with matchup ratings
- Right panel: lineup builder with live matchup ratings per batter
- Players in lineup marked with * in matchup list
- Click highlighted row to toggle selection for screenshots

Other changes:
- Dynamic season configuration (removed hardcoded season=13)
- Added delete_lineup query function
- StandardizedScoreCache model for pre-computed scores
- Auto-rebuild score cache after card imports
2026-01-25 14:09:22 -06:00

264 lines
8.8 KiB
Python

"""
League statistics calculation for standardized scoring.
Calculates league-wide averages and standard deviations for batter and pitcher
card stats, which are used to convert raw values into standardized scores (-3 to +3).
"""
import statistics
from dataclasses import dataclass
from typing import Sequence
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from ..db.models import BatterCard, PitcherCard
@dataclass
class StatDistribution:
"""Average and standard deviation for a single stat."""
avg: float
stdev: float
def __repr__(self) -> str:
return f"StatDistribution(avg={self.avg:.2f}, stdev={self.stdev:.2f})"
@dataclass
class BatterLeagueStats:
"""League-wide averages and standard deviations for batter card stats."""
# vs Left-Handed Pitchers
so_vlhp: StatDistribution
bb_vlhp: StatDistribution
hit_vlhp: StatDistribution
ob_vlhp: StatDistribution
tb_vlhp: StatDistribution
hr_vlhp: StatDistribution
dp_vlhp: StatDistribution
bphr_vlhp: StatDistribution
bp1b_vlhp: StatDistribution
# vs Right-Handed Pitchers
so_vrhp: StatDistribution
bb_vrhp: StatDistribution
hit_vrhp: StatDistribution
ob_vrhp: StatDistribution
tb_vrhp: StatDistribution
hr_vrhp: StatDistribution
dp_vrhp: StatDistribution
bphr_vrhp: StatDistribution
bp1b_vrhp: StatDistribution
@dataclass
class PitcherLeagueStats:
"""League-wide averages and standard deviations for pitcher card stats."""
# vs Left-Handed Batters
so_vlhb: StatDistribution
bb_vlhb: StatDistribution
hit_vlhb: StatDistribution
ob_vlhb: StatDistribution
tb_vlhb: StatDistribution
hr_vlhb: StatDistribution
dp_vlhb: StatDistribution
bphr_vlhb: StatDistribution
bp1b_vlhb: StatDistribution
# vs Right-Handed Batters
so_vrhb: StatDistribution
bb_vrhb: StatDistribution
hit_vrhb: StatDistribution
ob_vrhb: StatDistribution
tb_vrhb: StatDistribution
hr_vrhb: StatDistribution
dp_vrhb: StatDistribution
bphr_vrhb: StatDistribution
bp1b_vrhb: StatDistribution
def _calc_distribution(values: list[float]) -> StatDistribution:
"""Calculate average and standard deviation for a list of values."""
# Filter out None and zero values for average calculation (matching spreadsheet AVERAGEIF)
non_zero = [v for v in values if v and v > 0]
if len(non_zero) < 2:
# Not enough data - return defaults that will make all scores 0
return StatDistribution(avg=0.0, stdev=1.0)
avg = statistics.mean(non_zero)
# Use all values (including zeros) for stdev calculation
all_values = [v or 0 for v in values]
stdev = statistics.stdev(all_values) if len(all_values) >= 2 else 1.0
# Prevent division by zero
if stdev == 0:
stdev = 1.0
return StatDistribution(avg=avg, stdev=stdev)
async def calculate_batter_league_stats(
session: AsyncSession,
) -> BatterLeagueStats:
"""
Calculate league-wide averages and standard deviations for all batter stats.
Queries all batter cards in the database and computes statistics for each
stat column, separated by vs-LHP and vs-RHP splits.
Args:
session: Database session
Returns:
BatterLeagueStats with avg/stdev for each stat
"""
query = select(BatterCard)
result = await session.execute(query)
cards: Sequence[BatterCard] = result.scalars().all()
if not cards:
# Return default stats if no cards exist
default = StatDistribution(avg=0.0, stdev=1.0)
return BatterLeagueStats(
so_vlhp=default,
bb_vlhp=default,
hit_vlhp=default,
ob_vlhp=default,
tb_vlhp=default,
hr_vlhp=default,
dp_vlhp=default,
bphr_vlhp=default,
bp1b_vlhp=default,
so_vrhp=default,
bb_vrhp=default,
hit_vrhp=default,
ob_vrhp=default,
tb_vrhp=default,
hr_vrhp=default,
dp_vrhp=default,
bphr_vrhp=default,
bp1b_vrhp=default,
)
return BatterLeagueStats(
# vs LHP
so_vlhp=_calc_distribution([c.so_vlhp for c in cards]),
bb_vlhp=_calc_distribution([c.bb_vlhp for c in cards]),
hit_vlhp=_calc_distribution([c.hit_vlhp for c in cards]),
ob_vlhp=_calc_distribution([c.ob_vlhp for c in cards]),
tb_vlhp=_calc_distribution([c.tb_vlhp for c in cards]),
hr_vlhp=_calc_distribution([c.hr_vlhp for c in cards]),
dp_vlhp=_calc_distribution([c.dp_vlhp for c in cards]),
bphr_vlhp=_calc_distribution([c.bphr_vlhp for c in cards]),
bp1b_vlhp=_calc_distribution([c.bp1b_vlhp for c in cards]),
# vs RHP
so_vrhp=_calc_distribution([c.so_vrhp for c in cards]),
bb_vrhp=_calc_distribution([c.bb_vrhp for c in cards]),
hit_vrhp=_calc_distribution([c.hit_vrhp for c in cards]),
ob_vrhp=_calc_distribution([c.ob_vrhp for c in cards]),
tb_vrhp=_calc_distribution([c.tb_vrhp for c in cards]),
hr_vrhp=_calc_distribution([c.hr_vrhp for c in cards]),
dp_vrhp=_calc_distribution([c.dp_vrhp for c in cards]),
bphr_vrhp=_calc_distribution([c.bphr_vrhp for c in cards]),
bp1b_vrhp=_calc_distribution([c.bp1b_vrhp for c in cards]),
)
async def calculate_pitcher_league_stats(
session: AsyncSession,
) -> PitcherLeagueStats:
"""
Calculate league-wide averages and standard deviations for all pitcher stats.
Queries all pitcher cards in the database and computes statistics for each
stat column, separated by vs-LHB and vs-RHB splits.
Args:
session: Database session
Returns:
PitcherLeagueStats with avg/stdev for each stat
"""
query = select(PitcherCard)
result = await session.execute(query)
cards: Sequence[PitcherCard] = result.scalars().all()
if not cards:
# Return default stats if no cards exist
default = StatDistribution(avg=0.0, stdev=1.0)
return PitcherLeagueStats(
so_vlhb=default,
bb_vlhb=default,
hit_vlhb=default,
ob_vlhb=default,
tb_vlhb=default,
hr_vlhb=default,
dp_vlhb=default,
bphr_vlhb=default,
bp1b_vlhb=default,
so_vrhb=default,
bb_vrhb=default,
hit_vrhb=default,
ob_vrhb=default,
tb_vrhb=default,
hr_vrhb=default,
dp_vrhb=default,
bphr_vrhb=default,
bp1b_vrhb=default,
)
return PitcherLeagueStats(
# vs LHB
so_vlhb=_calc_distribution([c.so_vlhb for c in cards]),
bb_vlhb=_calc_distribution([c.bb_vlhb for c in cards]),
hit_vlhb=_calc_distribution([c.hit_vlhb for c in cards]),
ob_vlhb=_calc_distribution([c.ob_vlhb for c in cards]),
tb_vlhb=_calc_distribution([c.tb_vlhb for c in cards]),
hr_vlhb=_calc_distribution([c.hr_vlhb for c in cards]),
dp_vlhb=_calc_distribution([c.dp_vlhb for c in cards]),
bphr_vlhb=_calc_distribution([c.bphr_vlhb for c in cards]),
bp1b_vlhb=_calc_distribution([c.bp1b_vlhb for c in cards]),
# vs RHB
so_vrhb=_calc_distribution([c.so_vrhb for c in cards]),
bb_vrhb=_calc_distribution([c.bb_vrhb for c in cards]),
hit_vrhb=_calc_distribution([c.hit_vrhb for c in cards]),
ob_vrhb=_calc_distribution([c.ob_vrhb for c in cards]),
tb_vrhb=_calc_distribution([c.tb_vrhb for c in cards]),
hr_vrhb=_calc_distribution([c.hr_vrhb for c in cards]),
dp_vrhb=_calc_distribution([c.dp_vrhb for c in cards]),
bphr_vrhb=_calc_distribution([c.bphr_vrhb for c in cards]),
bp1b_vrhb=_calc_distribution([c.bp1b_vrhb for c in cards]),
)
# Cached league stats (computed once per session)
_batter_stats_cache: BatterLeagueStats | None = None
_pitcher_stats_cache: PitcherLeagueStats | None = None
async def get_batter_league_stats(session: AsyncSession) -> BatterLeagueStats:
"""Get cached batter league stats, computing if necessary."""
global _batter_stats_cache
if _batter_stats_cache is None:
_batter_stats_cache = await calculate_batter_league_stats(session)
return _batter_stats_cache
async def get_pitcher_league_stats(session: AsyncSession) -> PitcherLeagueStats:
"""Get cached pitcher league stats, computing if necessary."""
global _pitcher_stats_cache
if _pitcher_stats_cache is None:
_pitcher_stats_cache = await calculate_pitcher_league_stats(session)
return _pitcher_stats_cache
def clear_league_stats_cache() -> None:
"""Clear the cached league stats (call when card data changes)."""
global _batter_stats_cache, _pitcher_stats_cache
_batter_stats_cache = None
_pitcher_stats_cache = None