paper-dynasty-card-creation/batters/models.py
Cal Corum 2bf3a6cee7 Fix SLG formula drift in extracted rating models
The extracted batting and pitching models used malformed SLG equations that double-counted and omitted outcomes, skewing slash lines. Align formulas with canonical weighting and add regression tests to prevent recurrence.

Co-Authored-By: Claude GPT-5.3-Codex <noreply@anthropic.com>
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-02-26 07:47:15 -06:00

310 lines
11 KiB
Python

import random
import pydantic
from creation_helpers import mround, sanitize_chance_output
from typing import Literal
from decimal import Decimal
from exceptions import logger
def bp_singles(all_singles):
if all_singles < 6:
return mround(0)
else:
return mround(5)
def wh_singles(rem_singles, hard_rate):
if rem_singles == 0 or hard_rate < .2:
return 0
elif hard_rate > .4:
return sanitize_chance_output(rem_singles * 2 / 3, min_chances=2)
else:
return sanitize_chance_output(rem_singles / 3, min_chances=2)
def one_singles(rem_singles, ifh_rate, force_rem=False):
if force_rem:
return mround(rem_singles)
elif rem_singles == 0 or ifh_rate < .05:
return mround(0)
else:
return sanitize_chance_output(rem_singles * min(ifh_rate * mround(3), 0.75), min_chances=2)
def bp_homeruns(all_hr, hr_rate):
if all_hr == 0 or hr_rate == 0:
return mround(0)
elif hr_rate > .2:
return mround(all_hr * 0.4, base=1.0)
else:
return mround(all_hr * 0.8, base=1.0)
def triples(all_xbh, tr_count, do_count):
if all_xbh == mround(0) or tr_count == mround(0):
return mround(0)
else:
return sanitize_chance_output(all_xbh * mround(tr_count / max(tr_count + do_count, 1)), min_chances=1)
def two_doubles(all_doubles, soft_rate):
if all_doubles == 0 or soft_rate == 0:
return mround(0)
elif soft_rate > .2:
return sanitize_chance_output(all_doubles / 2)
else:
return sanitize_chance_output(all_doubles / 4)
def hit_by_pitch(other_ob, hbps, walks):
if hbps == 0 or other_ob * mround(hbps / max(hbps + walks, 1)) < 1:
return 0
else:
return sanitize_chance_output(other_ob * mround(hbps / max(hbps + walks, 1)), rounding=1.0)
def strikeouts(all_outs, k_rate):
if all_outs == 0 or k_rate == 0:
return mround(0)
else:
return sanitize_chance_output(all_outs * k_rate)
def flyout_a(all_flyouts, hard_rate):
if all_flyouts == 0 or hard_rate < .4:
return mround(0)
else:
return mround(1.0)
def flyout_bq(rem_flyouts, soft_rate):
if rem_flyouts == 0 or soft_rate < .1:
return mround(0)
else:
return sanitize_chance_output(rem_flyouts * min(soft_rate * 3, mround(.75)))
def flyout_b(rem_flyouts, pull_rate, cent_rate):
if rem_flyouts == 0 or pull_rate == 0:
return mround(0)
else:
return sanitize_chance_output(rem_flyouts * (pull_rate + cent_rate / 2))
def groundball_a(all_groundouts, gidps, abs):
if all_groundouts == 0 or gidps == 0:
return mround(0)
else:
return sanitize_chance_output(mround(min(gidps ** 2.5, abs) / max(abs, 1)) * all_groundouts)
def groundball_c(rem_groundouts, med_rate):
if rem_groundouts == 0 or med_rate < .4:
return mround(0)
elif med_rate > .6:
return sanitize_chance_output(rem_groundouts)
else:
return sanitize_chance_output(rem_groundouts * med_rate)
class BattingCardRatingsModel(pydantic.BaseModel):
battingcard_id: int
bat_hand: Literal['R', 'L', 'S']
vs_hand: Literal['R', 'L']
all_hits: float = 0.0
all_other_ob: float = 0.0
all_outs: float = 0.0
rem_singles: float = 0.0
rem_xbh: float = 0.0
rem_hr: float = 0.0
rem_doubles: float = 0.0
hard_rate: float
med_rate: float
soft_rate: float
pull_rate: float
center_rate: float
slap_rate: float
homerun: float = 0.0
bp_homerun: float = 0.0
triple: float = 0.0
double_three: float = 0.0
double_two: float = 0.0
double_pull: float = 0.0
single_two: float = 0.0
single_one: float = 0.0
single_center: float = 0.0
bp_single: float = 0.0
hbp: float = 0.0
walk: float = 0.0
strikeout: float = 0.0
lineout: float = 0.0
popout: float = 0.0
rem_flyballs: float = 0.0
flyout_a: float = 0.0
flyout_bq: float = 0.0
flyout_lf_b: float = 0.0
flyout_rf_b: float = 0.0
rem_groundballs: float = 0.0
groundout_a: float = 0.0
groundout_b: float = 0.0
groundout_c: float = 0.0
avg: float = 0.0
obp: float = 0.0
slg: float = 0.0
def total_chances(self):
return mround(sum([
self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_pull,
self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk, self.strikeout,
self.lineout, self.popout, self.flyout_a, self.flyout_bq, self.flyout_lf_b, self.flyout_rf_b,
self.groundout_a, self.groundout_b, self.groundout_c
]))
def total_hits(self):
return mround(sum([
self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_pull,
self.single_two, self.single_one, self.single_center, self.bp_single
]))
def rem_hits(self):
return (self.all_hits -
sum([
self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_pull,
self.single_two, self.single_one, self.single_center, self.bp_single
]))
def rem_outs(self):
return mround(self.all_outs -
sum([
self.strikeout, self.lineout, self.popout, self.flyout_a, self.flyout_bq, self.flyout_lf_b,
self.flyout_rf_b, self.groundout_a, self.groundout_b, self.groundout_c
]))
def rem_other_ob(self):
return self.all_other_ob - self.hbp - self.walk
def calculate_singles(self, szn_singles, szn_hits, ifh_rate: Decimal):
tot = sanitize_chance_output(self.all_hits * mround((szn_singles * .8) / max(szn_hits, 1)))
logger.debug(f'tot: {tot}')
self.rem_singles = tot
self.bp_single = bp_singles(self.rem_singles)
self.rem_singles -= self.bp_single
self.single_two = wh_singles(self.rem_singles, self.hard_rate)
self.rem_singles -= self.single_two
self.single_one = one_singles(self.rem_singles, ifh_rate)
self.rem_singles -= self.single_one
self.single_center = sanitize_chance_output(self.rem_singles)
self.rem_singles -= self.single_center
self.rem_xbh = self.all_hits - self.bp_single - self.single_two - self.single_one - self.single_center
def calculate_xbh(self, szn_triples, szn_doubles, szn_hr, hr_per_fb: Decimal):
self.triple = triples(self.rem_xbh, szn_triples, szn_doubles + szn_hr)
self.rem_xbh -= self.triple
tot_doubles = sanitize_chance_output(self.rem_xbh * mround(szn_doubles / max(szn_hr + szn_doubles, 1)))
self.double_two = two_doubles(tot_doubles, self.soft_rate)
self.double_pull = sanitize_chance_output(tot_doubles - self.double_two)
self.rem_xbh -= mround(self.double_two + self.double_pull)
if (self.rem_xbh > mround(0)) and szn_hr > 0:
self.bp_homerun = bp_homeruns(self.rem_xbh, hr_per_fb)
self.homerun = sanitize_chance_output(self.rem_xbh - self.bp_homerun, min_chances=0.5)
self.rem_xbh -= mround(self.bp_homerun + self.homerun)
if szn_triples > 0 and self.rem_xbh > 0:
logger.error(f'Adding {self.rem_xbh} results to triples')
self.triple += sanitize_chance_output(self.rem_xbh, min_chances=0.5)
elif self.rem_xbh > 0:
logger.error(f'Adding {self.rem_xbh} results to all other ob')
self.all_other_ob += self.rem_xbh
def calculate_other_ob(self, szn_bb, szn_hbp):
self.hbp = hit_by_pitch(self.all_other_ob, szn_hbp, szn_bb)
self.walk = sanitize_chance_output(self.all_other_ob - self.hbp)
if self.walk + self.hbp < self.all_other_ob:
rem = self.all_other_ob - self.walk - self.hbp
logger.error(f'Adding {rem} chances to all_outs')
self.all_outs += mround(rem)
def calculate_strikeouts(self, szn_so, szn_ab, szn_hits):
self.strikeout = strikeouts(self.all_outs, (szn_so / max(szn_ab - szn_hits, 1)))
def calculate_other_outs(self, fb_rate, ld_rate, gb_rate, szn_gidp, szn_ab):
self.rem_flyballs = sanitize_chance_output(self.rem_outs() * mround(fb_rate))
self.flyout_a = flyout_a(self.rem_flyballs, self.hard_rate)
self.rem_flyballs -= self.flyout_a
self.flyout_bq = flyout_bq(self.rem_flyballs, self.soft_rate)
self.rem_flyballs -= self.flyout_bq
self.flyout_lf_b = flyout_b(
self.rem_flyballs,
pull_rate=self.pull_rate if self.bat_hand == 'R' else self.slap_rate,
cent_rate=self.center_rate
)
self.rem_flyballs -= self.flyout_lf_b
self.flyout_rf_b = sanitize_chance_output(self.rem_flyballs)
self.rem_flyballs -= self.flyout_rf_b
if self.rem_flyballs > 0:
logger.debug(f'Adding {self.rem_flyballs} chances to lineouts')
tot_oneouts = sanitize_chance_output(self.rem_outs() * mround(ld_rate / max(ld_rate + gb_rate, .01)))
self.lineout = sanitize_chance_output(mround(random.random()) * tot_oneouts)
self.popout = sanitize_chance_output(tot_oneouts - self.lineout)
self.groundout_a = groundball_a(self.rem_outs(), szn_gidp, szn_ab)
self.groundout_c = groundball_c(self.rem_outs(), self.med_rate)
self.groundout_b = self.rem_outs()
def calculate_rate_stats(self):
self.avg = mround(self.total_hits() / 108, prec=5, base=0.00001)
self.obp = mround((self.total_hits() + self.hbp + self.walk) / 108, prec=5, base=0.00001)
self.slg = mround((
self.homerun * 4 + self.bp_homerun * 2 + self.triple * 3 + self.double_three * 2 +
self.double_two * 2 + self.double_pull * 2 + self.single_two + self.single_one +
self.single_center + self.bp_single / 2) / 108, prec=5, base=0.00001)
def custom_to_dict(self):
self.calculate_rate_stats()
return {
'battingcard_id': self.battingcard_id,
'vs_hand': self.vs_hand,
'homerun': self.homerun,
'bp_homerun': self.bp_homerun,
'triple': self.triple,
'double_three': self.double_three,
'double_two': self.double_two,
'double_pull': self.double_pull,
'single_two': self.single_two,
'single_one': self.single_one,
'single_center': self.single_center,
'bp_single': self.bp_single,
'hbp': self.hbp,
'walk': self.walk,
'strikeout': mround(self.strikeout),
'lineout': self.lineout,
'popout': self.popout,
'flyout_a': self.flyout_a,
'flyout_bq': self.flyout_bq,
'flyout_lf_b': self.flyout_lf_b,
'flyout_rf_b': self.flyout_rf_b,
'groundout_a': self.groundout_a,
'groundout_b': self.groundout_b,
'groundout_c': self.groundout_c,
'pull_rate': self.pull_rate,
'center_rate': self.center_rate,
'slap_rate': self.slap_rate,
'avg': self.avg,
'obp': self.obp,
'slg': self.slg
}