diff --git a/batters/calcs_batter.py b/batters/calcs_batter.py
index 0a7b928..e261db7 100644
--- a/batters/calcs_batter.py
+++ b/batters/calcs_batter.py
@@ -1,358 +1,24 @@
import random
-import pydantic
-
from creation_helpers import mround, sanitize_chance_output
-from typing import List, Literal
+from typing import List
from decimal import Decimal
from exceptions import logger
-
-class BattingCardRatingsModel(pydantic.BaseModel):
- battingcard_id: int
- bat_hand: Literal['R', 'L', 'S']
- vs_hand: Literal['R', 'L']
- all_hits: float = 0.0
- all_other_ob: float = 0.0
- all_outs: float = 0.0
- rem_singles: float = 0.0
- rem_xbh: float = 0.0
- rem_hr: float = 0.0
- rem_doubles: float = 0.0
- hard_rate: float
- med_rate: float
- soft_rate: float
- pull_rate: float
- center_rate: float
- slap_rate: float
- homerun: float = 0.0
- bp_homerun: float = 0.0
- triple: float = 0.0
- double_three: float = 0.0
- double_two: float = 0.0
- double_pull: float = 0.0
- single_two: float = 0.0
- single_one: float = 0.0
- single_center: float = 0.0
- bp_single: float = 0.0
- hbp: float = 0.0
- walk: float = 0.0
- strikeout: float = 0.0
- lineout: float = 0.0
- popout: float = 0.0
- rem_flyballs: float = 0.0
- flyout_a: float = 0.0
- flyout_bq: float = 0.0
- flyout_lf_b: float = 0.0
- flyout_rf_b: float = 0.0
- rem_groundballs: float = 0.0
- groundout_a: float = 0.0
- groundout_b: float = 0.0
- groundout_c: float = 0.0
- avg: float = 0.0
- obp: float = 0.0
- slg: float = 0.0
-
- def total_chances(self):
- return mround(sum([
- self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_pull,
- self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk, self.strikeout,
- self.lineout, self.popout, self.flyout_a, self.flyout_bq, self.flyout_lf_b, self.flyout_rf_b,
- self.groundout_a, self.groundout_b, self.groundout_c
- ]))
-
- def total_hits(self):
- return mround(sum([
- self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_pull,
- self.single_two, self.single_one, self.single_center, self.bp_single
- ]))
-
- def rem_hits(self):
- return (self.all_hits -
- sum([
- self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_pull,
- self.single_two, self.single_one, self.single_center, self.bp_single
- ]))
-
- def rem_outs(self):
- return mround(self.all_outs -
- sum([
- self.strikeout, self.lineout, self.popout, self.flyout_a, self.flyout_bq, self.flyout_lf_b,
- self.flyout_rf_b, self.groundout_a, self.groundout_b, self.groundout_c
- ]))
-
- def rem_other_ob(self):
- return self.all_other_ob - self.hbp - self.walk
-
- def calculate_singles(self, szn_singles, szn_hits, ifh_rate: Decimal):
- tot = sanitize_chance_output(self.all_hits * mround((szn_singles * .8) / max(szn_hits, 1)))
- logger.debug(f'tot: {tot}')
- self.rem_singles = tot
-
- self.bp_single = bp_singles(self.rem_singles)
- self.rem_singles -= self.bp_single
-
- self.single_two = wh_singles(self.rem_singles, self.hard_rate)
- self.rem_singles -= self.single_two
-
- self.single_one = one_singles(self.rem_singles, ifh_rate)
- self.rem_singles -= self.single_one
-
- self.single_center = sanitize_chance_output(self.rem_singles)
- self.rem_singles -= self.single_center
-
- self.rem_xbh = self.all_hits - self.bp_single - self.single_two - self.single_one - self.single_center
-
- def calculate_xbh(self, szn_triples, szn_doubles, szn_hr, hr_per_fb: Decimal):
- self.triple = triples(self.rem_xbh, szn_triples, szn_doubles + szn_hr)
- self.rem_xbh -= self.triple
-
- tot_doubles = sanitize_chance_output(self.rem_xbh * mround(szn_doubles / max(szn_hr + szn_doubles, 1)))
- self.double_two = two_doubles(tot_doubles, self.soft_rate)
- self.double_pull = sanitize_chance_output(tot_doubles - self.double_two)
- self.rem_xbh -= mround(self.double_two + self.double_pull)
-
- if (self.rem_xbh > mround(0)) and szn_hr > 0:
- self.bp_homerun = bp_homeruns(self.rem_xbh, hr_per_fb)
- self.homerun = sanitize_chance_output(self.rem_xbh - self.bp_homerun, min_chances=0.5)
- self.rem_xbh -= mround(self.bp_homerun + self.homerun)
-
- if szn_triples > 0 and self.rem_xbh > 0:
- logger.error(f'Adding {self.rem_xbh} results to triples')
- self.triple += sanitize_chance_output(self.rem_xbh, min_chances=0.5)
- elif self.rem_xbh > 0:
- logger.error(f'Adding {self.rem_xbh} results to all other ob')
- # print(self)
- self.all_other_ob += self.rem_xbh
-
- def calculate_other_ob(self, szn_bb, szn_hbp):
- self.hbp = hit_by_pitch(self.all_other_ob, szn_hbp, szn_bb)
- self.walk = sanitize_chance_output(self.all_other_ob - self.hbp)
-
- if self.walk + self.hbp < self.all_other_ob:
- rem = self.all_other_ob - self.walk - self.hbp
- logger.error(f'Adding {rem} chances to all_outs')
- # print(self)
- self.all_outs += mround(rem)
-
- def calculate_strikeouts(self, szn_so, szn_ab, szn_hits):
- self.strikeout = strikeouts(self.all_outs, (szn_so / max(szn_ab - szn_hits, 1)))
-
- def calculate_other_outs(self, fb_rate, ld_rate, gb_rate, szn_gidp, szn_ab):
- self.rem_flyballs = sanitize_chance_output(self.rem_outs() * mround(fb_rate))
- self.flyout_a = flyout_a(self.rem_flyballs, self.hard_rate)
- self.rem_flyballs -= self.flyout_a
-
- self.flyout_bq = flyout_bq(self.rem_flyballs, self.soft_rate)
- self.rem_flyballs -= self.flyout_bq
-
- self.flyout_lf_b = flyout_b(
- self.rem_flyballs,
- pull_rate=self.pull_rate if self.bat_hand == 'R' else self.slap_rate,
- cent_rate=self.center_rate
- )
- self.rem_flyballs -= self.flyout_lf_b
- self.flyout_rf_b = sanitize_chance_output(self.rem_flyballs)
- self.rem_flyballs -= self.flyout_rf_b
-
- if self.rem_flyballs > 0:
- logger.debug(f'Adding {self.rem_flyballs} chances to lineouts')
-
- tot_oneouts = sanitize_chance_output(self.rem_outs() * mround(ld_rate / max(ld_rate + gb_rate, .01)))
- self.lineout = sanitize_chance_output(mround(random.random()) * tot_oneouts)
- self.popout = sanitize_chance_output(tot_oneouts - self.lineout)
-
- self.groundout_a = groundball_a(self.rem_outs(), szn_gidp, szn_ab)
- self.groundout_c = groundball_c(self.rem_outs(), self.med_rate)
- self.groundout_b = self.rem_outs()
-
- def calculate_rate_stats(self):
- self.avg = mround(self.total_hits() / 108, prec=5, base=0.00001)
- self.obp = mround((self.total_hits() + self.hbp + self.walk) / 108, prec=5, base=0.00001)
- self.slg = mround((
- self.homerun * 4 + self.triple * 3 + self.single_center + self.single_two + self.single_two +
- (self.double_two + self.double_three + self.double_two + self.bp_homerun) * 2 + self.bp_single / 2) / 108, prec=5, base=0.00001)
-
- def custom_to_dict(self):
- self.calculate_rate_stats()
- return {
- 'battingcard_id': self.battingcard_id,
- 'vs_hand': self.vs_hand,
- 'homerun': self.homerun,
- 'bp_homerun': self.bp_homerun,
- 'triple': self.triple,
- 'double_three': self.double_three,
- 'double_two': self.double_two,
- 'double_pull': self.double_pull,
- 'single_two': self.single_two,
- 'single_one': self.single_one,
- 'single_center': self.single_center,
- 'bp_single': self.bp_single,
- 'hbp': self.hbp,
- 'walk': self.walk,
- 'strikeout': mround(self.strikeout),
- 'lineout': self.lineout,
- 'popout': self.popout,
- 'flyout_a': self.flyout_a,
- 'flyout_bq': self.flyout_bq,
- 'flyout_lf_b': self.flyout_lf_b,
- 'flyout_rf_b': self.flyout_rf_b,
- 'groundout_a': self.groundout_a,
- 'groundout_b': self.groundout_b,
- 'groundout_c': self.groundout_c,
- 'pull_rate': self.pull_rate,
- 'center_rate': self.center_rate,
- 'slap_rate': self.slap_rate,
- 'avg': self.avg,
- 'obp': self.obp,
- 'slg': self.slg
- }
-
-# def total_chances(chance_data):
-# sum_chances = 0
-# for key in chance_data:
-# if key not in ['id', 'player_id', 'cardset_id', 'vs_hand', 'is_prep']:
-# sum_chances += chance_data[key]
-#
-# return mround(sum_chances)
+from batters.models import BattingCardRatingsModel
+from batters.card_builder import build_batter_full_cards
-def total_singles(all_hits, szn_singles, szn_hits):
- return sanitize_chance_output(all_hits * ((szn_singles * .8) / max(szn_hits, 1)))
-
-
-def bp_singles(all_singles):
- if all_singles < 6:
- return mround(0)
- else:
- return mround(5)
-
-
-def wh_singles(rem_singles, hard_rate):
- if rem_singles == 0 or hard_rate < .2:
- return 0
- elif hard_rate > .4:
- return sanitize_chance_output(rem_singles * 2 / 3, min_chances=2)
- else:
- return sanitize_chance_output(rem_singles / 3, min_chances=2)
-
-
-def one_singles(rem_singles, ifh_rate, force_rem=False):
- if force_rem:
- return mround(rem_singles)
- elif rem_singles == 0 or ifh_rate < .05:
- return mround(0)
- else:
- return sanitize_chance_output(rem_singles * min(ifh_rate * mround(3), 0.75), min_chances=2)
-
-
-def all_homeruns(rem_hits, all_hits, hrs, hits, singles):
- if rem_hits == 0 or all_hits == 0 or hrs == 0 or hits - singles == 0:
- return 0
- else:
- return mround(min(rem_hits, all_hits * ((hrs * 1.15) / max(hits, 1))))
-
-
-def nd_homeruns(all_hr, hr_rate):
- if all_hr == 0 or hr_rate == 0:
- return mround(0)
- elif hr_rate > .2:
- return sanitize_chance_output(all_hr * .6)
- else:
- return sanitize_chance_output(all_hr * .25)
-
-
-def bp_homeruns(all_hr, hr_rate):
- if all_hr == 0 or hr_rate == 0:
- return mround(0)
- elif hr_rate > .2:
- return mround(all_hr * 0.4, base=1.0)
- else:
- return mround(all_hr * 0.8, base=1.0)
-
-
-def triples(all_xbh, tr_count, do_count):
- if all_xbh == mround(0) or tr_count == mround(0):
- return mround(0)
- else:
- return sanitize_chance_output(all_xbh * mround(tr_count / max(tr_count + do_count, 1)), min_chances=1)
-
-
-def two_doubles(all_doubles, soft_rate):
- if all_doubles == 0 or soft_rate == 0:
- return mround(0)
- elif soft_rate > .2:
- return sanitize_chance_output(all_doubles / 2)
- else:
- return sanitize_chance_output(all_doubles / 4)
-
-
-def hit_by_pitch(other_ob, hbps, walks):
- if hbps == 0 or other_ob * mround(hbps / max(hbps + walks, 1)) < 1:
- return 0
- else:
- return sanitize_chance_output(other_ob * mround(hbps / max(hbps + walks, 1)), rounding=1.0)
-
-
-def strikeouts(all_outs, k_rate):
- if all_outs == 0 or k_rate == 0:
- return mround(0)
- else:
- return sanitize_chance_output(all_outs * k_rate)
-
-
-def flyout_a(all_flyouts, hard_rate):
- if all_flyouts == 0 or hard_rate < .4:
- return mround(0)
- else:
- return mround(1.0)
-
-
-def flyout_bq(rem_flyouts, soft_rate):
- if rem_flyouts == 0 or soft_rate < .1:
- return mround(0)
- else:
- return sanitize_chance_output(rem_flyouts * min(soft_rate * 3, mround(.75)))
-
-
-def flyout_b(rem_flyouts, pull_rate, cent_rate):
- if rem_flyouts == 0 or pull_rate == 0:
- return mround(0)
- else:
- return sanitize_chance_output(rem_flyouts * (pull_rate + cent_rate / 2))
-
-
-def popouts(rem_outs, iffb_rate):
- if rem_outs == 0 or iffb_rate * rem_outs < 1:
- return 0
- else:
- return mround(rem_outs * iffb_rate)
-
-
-def groundball_a(all_groundouts, gidps, abs):
- if all_groundouts == 0 or gidps == 0:
- return mround(0)
- else:
- return sanitize_chance_output(mround(min(gidps ** 2.5, abs) / max(abs, 1)) * all_groundouts)
-
-
-def groundball_c(rem_groundouts, med_rate):
- if rem_groundouts == 0 or med_rate < .4:
- return mround(0)
- elif med_rate > .6:
- return sanitize_chance_output(rem_groundouts)
- else:
- return sanitize_chance_output(rem_groundouts * med_rate)
-
-
-def stealing(chances: int, sb2s: int, cs2s: int, sb3s: int, cs3s: int, season_pct: float):
+def stealing(
+ chances: int, sb2s: int, cs2s: int, sb3s: int, cs3s: int, season_pct: float
+):
if chances == 0 or sb2s + cs2s == 0:
return 0, 0, False, 0
total_attempts = sb2s + cs2s + sb3s + cs3s
attempt_pct = total_attempts / chances
- if attempt_pct >= .08:
+ if attempt_pct >= 0.08:
st_auto = True
else:
st_auto = False
@@ -399,7 +65,7 @@ def stealing_line(steal_data: dict):
jump_chances = round(sd[3] * 36)
if jump_chances == 0:
- good_jump = '-'
+ good_jump = "-"
elif jump_chances <= 6:
if jump_chances == 6:
good_jump = 7
@@ -414,76 +80,76 @@ def stealing_line(steal_data: dict):
elif jump_chances == 1:
good_jump = 2
elif jump_chances == 7:
- good_jump = '4,5'
+ good_jump = "4,5"
elif jump_chances == 8:
- good_jump = '4,6'
+ good_jump = "4,6"
elif jump_chances == 9:
- good_jump = '3-5'
+ good_jump = "3-5"
elif jump_chances == 10:
- good_jump = '2-5'
+ good_jump = "2-5"
elif jump_chances == 11:
- good_jump = '6,7'
+ good_jump = "6,7"
elif jump_chances == 12:
- good_jump = '4-6'
+ good_jump = "4-6"
elif jump_chances == 13:
- good_jump = '2,4-6'
+ good_jump = "2,4-6"
elif jump_chances == 14:
- good_jump = '3-6'
+ good_jump = "3-6"
elif jump_chances == 15:
- good_jump = '2-6'
+ good_jump = "2-6"
elif jump_chances == 16:
- good_jump = '2,5-6'
+ good_jump = "2,5-6"
elif jump_chances == 17:
- good_jump = '3,5-6'
+ good_jump = "3,5-6"
elif jump_chances == 18:
- good_jump = '4-6'
+ good_jump = "4-6"
elif jump_chances == 19:
- good_jump = '2,4-7'
+ good_jump = "2,4-7"
elif jump_chances == 20:
- good_jump = '3-7'
+ good_jump = "3-7"
elif jump_chances == 21:
- good_jump = '2-7'
+ good_jump = "2-7"
elif jump_chances == 22:
- good_jump = '2-7,12'
+ good_jump = "2-7,12"
elif jump_chances == 23:
- good_jump = '2-7,11'
+ good_jump = "2-7,11"
elif jump_chances == 24:
- good_jump = '2,4-8'
+ good_jump = "2,4-8"
elif jump_chances == 25:
- good_jump = '3-8'
+ good_jump = "3-8"
elif jump_chances == 26:
- good_jump = '2-8'
+ good_jump = "2-8"
elif jump_chances == 27:
- good_jump = '2-8,12'
+ good_jump = "2-8,12"
elif jump_chances == 28:
- good_jump = '2-8,11'
+ good_jump = "2-8,11"
elif jump_chances == 29:
- good_jump = '3-9'
+ good_jump = "3-9"
elif jump_chances == 30:
- good_jump = '2-9'
+ good_jump = "2-9"
elif jump_chances == 31:
- good_jump = '2-9,12'
+ good_jump = "2-9,12"
elif jump_chances == 32:
- good_jump = '2-9,11'
+ good_jump = "2-9,11"
elif jump_chances == 33:
- good_jump = '2-10'
+ good_jump = "2-10"
elif jump_chances == 34:
- good_jump = '3-11'
+ good_jump = "3-11"
elif jump_chances == 35:
- good_jump = '2-11'
+ good_jump = "2-11"
else:
- good_jump = '2-12'
+ good_jump = "2-12"
return f'{"*" if sd[2] else ""}{good_jump}/- ({sd[1] if sd[1] else "-"}-{sd[0] if sd[0] else "-"})'
def running(extra_base_pct: str):
- if extra_base_pct == '':
+ if extra_base_pct == "":
return 8
try:
xb_pct = float(extra_base_pct.strip("%")) / 80
except Exception as e:
- logger.error(f'calcs_batter running - {e}')
+ logger.error(f"calcs_batter running - {e}")
xb_pct = 20
return max(min(round(6 + (10 * xb_pct)), 17), 8)
@@ -491,26 +157,36 @@ def running(extra_base_pct: str):
def bunting(num_bunts: int, season_pct: float):
if num_bunts > max(round(10 * season_pct), 4):
- return 'A'
+ return "A"
elif num_bunts > max(round(5 * season_pct), 2):
- return 'B'
+ return "B"
elif num_bunts > 1:
- return 'C'
+ return "C"
else:
- return 'D'
+ return "D"
-
-def hit_and_run(ab_vl: int, ab_vr: int, hits_vl: int, hits_vr: int, hr_vl: int, hr_vr: int, so_vl: int, so_vr: int):
- babip = (hits_vr + hits_vl - hr_vl - hr_vr) / max(ab_vl + ab_vr - so_vl - so_vr - hr_vl - hr_vl, 1)
- if babip >= .35:
- return 'A'
- elif babip >= .3:
- return 'B'
- elif babip >= .25:
- return 'C'
+def hit_and_run(
+ ab_vl: int,
+ ab_vr: int,
+ hits_vl: int,
+ hits_vr: int,
+ hr_vl: int,
+ hr_vr: int,
+ so_vl: int,
+ so_vr: int,
+):
+ babip = (hits_vr + hits_vl - hr_vl - hr_vr) / max(
+ ab_vl + ab_vr - so_vl - so_vr - hr_vl - hr_vl, 1
+ )
+ if babip >= 0.35:
+ return "A"
+ elif babip >= 0.3:
+ return "B"
+ elif babip >= 0.25:
+ return "C"
else:
- return 'D'
+ return "D"
def get_batter_ratings(df_data) -> List[dict]:
@@ -518,108 +194,154 @@ def get_batter_ratings(df_data) -> List[dict]:
offense_mod = 1.2
vl = BattingCardRatingsModel(
battingcard_id=df_data.battingcard_id,
- bat_hand=df_data['bat_hand'],
- vs_hand='L',
- all_hits=sanitize_chance_output(108 * offense_mod * df_data['AVG_vL']),
- all_other_ob=sanitize_chance_output(108 * offense_mod *
- ((df_data['BB_vL'] + df_data['HBP_vL']) / df_data['PA_vL'])),
- hard_rate=df_data['Hard%_vL'],
- med_rate=df_data['Med%_vL'],
- soft_rate=df_data['Soft%_vL'],
- pull_rate=df_data['Pull%_vL'],
- center_rate=df_data['Cent%_vL'],
- slap_rate=df_data['Oppo%_vL']
+ bat_hand=df_data["bat_hand"],
+ vs_hand="L",
+ all_hits=sanitize_chance_output(108 * offense_mod * df_data["AVG_vL"]),
+ all_other_ob=sanitize_chance_output(
+ 108
+ * offense_mod
+ * ((df_data["BB_vL"] + df_data["HBP_vL"]) / df_data["PA_vL"])
+ ),
+ hard_rate=df_data["Hard%_vL"],
+ med_rate=df_data["Med%_vL"],
+ soft_rate=df_data["Soft%_vL"],
+ pull_rate=df_data["Pull%_vL"],
+ center_rate=df_data["Cent%_vL"],
+ slap_rate=df_data["Oppo%_vL"],
)
vr = BattingCardRatingsModel(
battingcard_id=df_data.battingcard_id,
- bat_hand=df_data['bat_hand'],
- vs_hand='R',
- all_hits=sanitize_chance_output(108 * offense_mod * df_data['AVG_vR']),
- all_other_ob=sanitize_chance_output(108 * offense_mod *
- ((df_data['BB_vR'] + df_data['HBP_vR']) / df_data['PA_vR'])),
- hard_rate=df_data['Hard%_vR'],
- med_rate=df_data['Med%_vR'],
- soft_rate=df_data['Soft%_vR'],
- pull_rate=df_data['Pull%_vR'],
- center_rate=df_data['Cent%_vR'],
- slap_rate=df_data['Oppo%_vR']
+ bat_hand=df_data["bat_hand"],
+ vs_hand="R",
+ all_hits=sanitize_chance_output(108 * offense_mod * df_data["AVG_vR"]),
+ all_other_ob=sanitize_chance_output(
+ 108
+ * offense_mod
+ * ((df_data["BB_vR"] + df_data["HBP_vR"]) / df_data["PA_vR"])
+ ),
+ hard_rate=df_data["Hard%_vR"],
+ med_rate=df_data["Med%_vR"],
+ soft_rate=df_data["Soft%_vR"],
+ pull_rate=df_data["Pull%_vR"],
+ center_rate=df_data["Cent%_vR"],
+ slap_rate=df_data["Oppo%_vR"],
)
- vl.all_outs = mround(108 - vl.all_hits - vl.all_other_ob) #.quantize(Decimal("0.05"))
- vr.all_outs = mround(108 - vr.all_hits - vr.all_other_ob) #.quantize(Decimal("0.05"))
+ vl.all_outs = mround(
+ 108 - vl.all_hits - vl.all_other_ob
+ ) # .quantize(Decimal("0.05"))
+ vr.all_outs = mround(
+ 108 - vr.all_hits - vr.all_other_ob
+ ) # .quantize(Decimal("0.05"))
- vl.calculate_singles(df_data['1B_vL'], df_data['H_vL'], mround(df_data['IFH%_vL']))
- vr.calculate_singles(df_data['1B_vR'], df_data['H_vR'], mround(df_data['IFH%_vR']))
+ vl.calculate_singles(df_data["1B_vL"], df_data["H_vL"], mround(df_data["IFH%_vL"]))
+ vr.calculate_singles(df_data["1B_vR"], df_data["H_vR"], mround(df_data["IFH%_vR"]))
logger.debug(
- f'vL - All Hits: {vl.all_hits} / Other OB: {vl.all_other_ob} / All Outs: {vl.all_outs} '
- f'/ Total: {vl.all_hits + vl.all_other_ob + vl.all_outs}'
+ f"vL - All Hits: {vl.all_hits} / Other OB: {vl.all_other_ob} / All Outs: {vl.all_outs} "
+ f"/ Total: {vl.all_hits + vl.all_other_ob + vl.all_outs}"
)
logger.debug(
- f'vR - All Hits: {vr.all_hits} / Other OB: {vr.all_other_ob} / All Outs: {vr.all_outs} '
- f'/ Total: {vr.all_hits + vr.all_other_ob + vr.all_outs}'
+ f"vR - All Hits: {vr.all_hits} / Other OB: {vr.all_other_ob} / All Outs: {vr.all_outs} "
+ f"/ Total: {vr.all_hits + vr.all_other_ob + vr.all_outs}"
)
- vl.calculate_xbh(df_data['3B_vL'], df_data['2B_vL'], df_data['HR_vL'], df_data['HR/FB_vL'])
- vr.calculate_xbh(df_data['3B_vR'], df_data['2B_vR'], df_data['HR_vR'], df_data['HR/FB_vR'])
+ vl.calculate_xbh(
+ df_data["3B_vL"], df_data["2B_vL"], df_data["HR_vL"], df_data["HR/FB_vL"]
+ )
+ vr.calculate_xbh(
+ df_data["3B_vR"], df_data["2B_vR"], df_data["HR_vR"], df_data["HR/FB_vR"]
+ )
- logger.debug(f'all_hits: {vl.all_hits} / sum of hits: {vl.total_chances()}')
- logger.debug(f'all_hits: {vr.all_hits} / sum of hits: {vr.total_chances()}')
+ logger.debug(f"all_hits: {vl.all_hits} / sum of hits: {vl.total_chances()}")
+ logger.debug(f"all_hits: {vr.all_hits} / sum of hits: {vr.total_chances()}")
- vl.calculate_other_ob(df_data['BB_vL'], df_data['HBP_vL'])
- vr.calculate_other_ob(df_data['BB_vR'], df_data['HBP_vR'])
+ vl.calculate_other_ob(df_data["BB_vL"], df_data["HBP_vL"])
+ vr.calculate_other_ob(df_data["BB_vR"], df_data["HBP_vR"])
- logger.debug(f'all on base: {vl.hbp + vl.walk + vl.total_hits()} / all chances: {vl.total_chances()}'
- f'{"*******ERROR ABOVE*******" if vl.hbp + vl.walk + vl.total_hits() != vl.total_chances() else ""}')
- logger.debug(f'all on base: {vr.hbp + vr.walk + vr.total_hits()} / all chances: {vr.total_chances()}'
- f'{"*******ERROR ABOVE*******" if vr.hbp + vr.walk + vr.total_hits() != vr.total_chances() else ""}')
+ logger.debug(
+ f"all on base: {vl.hbp + vl.walk + vl.total_hits()} / all chances: {vl.total_chances()}"
+ f'{"*******ERROR ABOVE*******" if vl.hbp + vl.walk + vl.total_hits() != vl.total_chances() else ""}'
+ )
+ logger.debug(
+ f"all on base: {vr.hbp + vr.walk + vr.total_hits()} / all chances: {vr.total_chances()}"
+ f'{"*******ERROR ABOVE*******" if vr.hbp + vr.walk + vr.total_hits() != vr.total_chances() else ""}'
+ )
- vl.calculate_strikeouts(df_data['SO_vL'], df_data['AB_vL'], df_data['H_vL'])
- vr.calculate_strikeouts(df_data['SO_vR'], df_data['AB_vR'], df_data['H_vR'])
+ vl.calculate_strikeouts(df_data["SO_vL"], df_data["AB_vL"], df_data["H_vL"])
+ vr.calculate_strikeouts(df_data["SO_vR"], df_data["AB_vR"], df_data["H_vR"])
- logger.debug(f'K rate vL: {round(vl.strikeout / vl.all_outs, 2)} / '
- f'K rate vR: {round(vr.strikeout / vr.all_outs, 2)}')
+ logger.debug(
+ f"K rate vL: {round(vl.strikeout / vl.all_outs, 2)} / "
+ f"K rate vR: {round(vr.strikeout / vr.all_outs, 2)}"
+ )
vl.calculate_other_outs(
- df_data['FB%_vL'], df_data['LD%_vL'], df_data['GB%_vL'], df_data['GDP_vL'], df_data['AB_vL']
+ df_data["FB%_vL"],
+ df_data["LD%_vL"],
+ df_data["GB%_vL"],
+ df_data["GDP_vL"],
+ df_data["AB_vL"],
)
vr.calculate_other_outs(
- df_data['FB%_vR'], df_data['LD%_vR'], df_data['GB%_vR'], df_data['GDP_vR'], df_data['AB_vR']
+ df_data["FB%_vR"],
+ df_data["LD%_vR"],
+ df_data["GB%_vR"],
+ df_data["GDP_vR"],
+ df_data["AB_vR"],
)
# Correct total chance errors
for x in [vl, vr]:
if x.total_chances() < 108:
diff = mround(108) - x.total_chances()
- logger.error(f'Adding {diff} strikeouts to close gap')
+ logger.error(f"Adding {diff} strikeouts to close gap")
x.strikeout += diff
elif x.total_chances() > 108:
diff = x.total_chances() - mround(108)
- logger.error(f'Have surplus of {diff} chances')
+ logger.error(f"Have surplus of {diff} chances")
if x.strikeout + 1 > diff:
- logger.error(f'Subtracting {diff} strikeouts to close gap')
+ logger.error(f"Subtracting {diff} strikeouts to close gap")
x.strikeout -= diff
elif x.lineout + 1 > diff:
- logger.error(f'Subtracting {diff} lineouts to close gap')
+ logger.error(f"Subtracting {diff} lineouts to close gap")
x.lineout -= diff
elif x.groundout_a + 1 > diff:
- logger.error(f'Subtracting {diff} gbA to close gap')
+ logger.error(f"Subtracting {diff} gbA to close gap")
x.groundout_a -= diff
elif x.groundout_b + 1 > diff:
- logger.error(f'Subtracting {diff} gbB to close gap')
+ logger.error(f"Subtracting {diff} gbB to close gap")
x.groundout_b -= diff
elif x.groundout_c + 1 > diff:
- logger.error(f'Subtracting {diff} gbC to close gap')
+ logger.error(f"Subtracting {diff} gbC to close gap")
x.groundout_c -= diff
vl_total_chances = vl.total_chances()
vr_total_chances = vr.total_chances()
if vl_total_chances != 108:
- logger.error(f'total chances for {df_data.name} come to {vl_total_chances}')
+ logger.error(f"total chances for {df_data.name} come to {vl_total_chances}")
else:
- logger.debug(f'total chances: {vl_total_chances}')
+ logger.debug(f"total chances: {vl_total_chances}")
if vr_total_chances != 108:
- logger.error(f'total chances for {df_data.name} come to {vr_total_chances}')
+ logger.error(f"total chances for {df_data.name} come to {vr_total_chances}")
else:
- logger.debug(f'total chances: {vr_total_chances}')
+ logger.debug(f"total chances: {vr_total_chances}")
- return [vl.custom_to_dict(), vr.custom_to_dict()]
+ vl_dict = vl.custom_to_dict()
+ vr_dict = vr.custom_to_dict()
+
+ try:
+ offense_col = int(df_data["offense_col"]) if "offense_col" in df_data else 1
+ player_id = (
+ int(df_data["player_id"])
+ if "player_id" in df_data
+ else abs(hash(df_data["key_bbref"])) % 10000
+ )
+ vl_card, vr_card = build_batter_full_cards(
+ vl, vr, offense_col, player_id, df_data["bat_hand"]
+ )
+ vl_dict.update(vl_card.card_output())
+ vr_dict.update(vr_card.card_output())
+ except Exception as e:
+ logger.warning(f"Card layout builder failed for {df_data.name}: {e}")
+
+ return [vl_dict, vr_dict]
diff --git a/batters/card_builder.py b/batters/card_builder.py
new file mode 100644
index 0000000..58a375b
--- /dev/null
+++ b/batters/card_builder.py
@@ -0,0 +1,802 @@
+"""
+Batter card-building algorithm, ported from database/app/card_creation.py (~lines 1357-2226).
+
+Converts BattingCardRatingsModel instances (vL and vR) into FullBattingCard objects
+that represent the physical card layout.
+"""
+import copy
+import math
+import logging
+from decimal import Decimal
+
+from card_layout import FullBattingCard, PLAY_RESULTS, PlayResult, EXACT_CHANCES, get_chances
+from batters.models import BattingCardRatingsModel
+
+logger = logging.getLogger(__name__)
+
+
+def build_batter_full_cards(
+ ratings_vl: BattingCardRatingsModel,
+ ratings_vr: BattingCardRatingsModel,
+ offense_col: int,
+ player_id: int,
+ hand: str, # player's batting hand: 'R', 'L', or 'S'
+) -> tuple:
+ """Build vL and vR FullBattingCard objects from pre-calculated ratings.
+
+ Returns (vl_card, vr_card).
+ """
+ player_binary = player_id % 2
+
+ vl = FullBattingCard(offense_col=offense_col, alt_direction=player_binary)
+ vr = FullBattingCard(offense_col=offense_col, alt_direction=player_binary)
+
+ def assign_bchances(this_card, play, chances, secondary_play=None):
+ r_data = this_card.add_result(play, chances, secondary_play)
+ if r_data:
+ return float(r_data[0]), float(r_data[1])
+ else:
+ for x in EXACT_CHANCES:
+ if x < math.floor(chances):
+ r_data = this_card.add_result(play, Decimal(math.floor(chances)), secondary_play)
+ if r_data:
+ return float(r_data[0]), float(r_data[1])
+ break
+ if x < chances:
+ r_data = this_card.add_result(play, x, secondary_play)
+ if r_data:
+ return float(r_data[0]), float(r_data[1])
+ return 0, 0
+
+ def get_pullside_of(vs_hand):
+ if hand == 'L':
+ return 'rf'
+ elif hand == 'R':
+ return 'lf'
+ elif vs_hand == 'L':
+ return 'lf'
+ else:
+ return 'rf'
+
+ def get_preferred_mif(ratings):
+ if hand == 'L' and ratings.slap_rate > .24:
+ return 'ss'
+ elif hand == 'L' or (hand == 'R' and ratings.slap_rate > .24):
+ return '2b'
+ else:
+ return 'ss'
+
+ for card, data, vs_hand in [
+ (vl, copy.deepcopy(ratings_vl), 'L'),
+ (vr, copy.deepcopy(ratings_vr), 'R'),
+ ]:
+ logger.info(f'\n\nBeginning v{vs_hand}')
+
+ new_ratings = BattingCardRatingsModel(
+ battingcard_id=data.battingcard_id,
+ bat_hand=data.bat_hand,
+ vs_hand=vs_hand,
+ hard_rate=data.hard_rate,
+ med_rate=data.med_rate,
+ soft_rate=data.soft_rate,
+ pull_rate=data.pull_rate,
+ center_rate=data.center_rate,
+ slap_rate=data.slap_rate,
+ )
+ pull_of = get_pullside_of(vs_hand)
+ pref_mif = get_preferred_mif(data)
+
+ # BP Homerun
+ res_chances = data.bp_homerun
+ while res_chances > 0:
+ ch = get_chances(res_chances)
+ r_val = assign_bchances(card, PLAY_RESULTS['bp-hr'], ch)
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ res_chances -= r_val[0]
+ new_ratings.bp_homerun += r_val[0]
+
+ # HBP
+ retries = 0
+ res_chances = data.hbp
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ break
+ ch = get_chances(res_chances)
+ r_val = assign_bchances(card, PlayResult(full_name='HBP', short_name='HBP'), ch)
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ res_chances -= r_val[0]
+ new_ratings.hbp += r_val[0]
+ if r_val[0] == 0:
+ retries += 1
+
+ # Homerun
+ retries = 0
+ res_chances = data.homerun
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.double_pull > 0:
+ data.double_pull += res_chances
+ elif data.double_two > 0:
+ data.double_two += res_chances
+ elif data.triple > 0:
+ data.triple += res_chances
+ elif data.single_two > 0:
+ data.single_two += res_chances
+ elif data.single_center > 0:
+ data.single_center += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.double_pull > (data.flyout_rf_b + data.flyout_lf_b) and data.double_pull > max(1 - ch, 0):
+ secondary = PLAY_RESULTS[f'do-{pull_of}']
+ elif data.flyout_lf_b > data.flyout_rf_b and data.flyout_lf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['fly-lf']
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['fly-rf']
+ elif data.double_pull > max(1 - ch, 0):
+ secondary = PLAY_RESULTS[f'do-{pull_of}']
+ elif data.double_three > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['do***']
+ elif data.double_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['do**']
+ elif data.triple > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['tr']
+ else:
+ secondary = None
+
+ r_val = assign_bchances(card, PLAY_RESULTS['hr'], ch, secondary)
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ res_chances -= r_val[0]
+ new_ratings.homerun += r_val[0]
+ if r_val[1] > 0:
+ if secondary.short_name[:4] == 'DO (':
+ data.double_pull -= r_val[1]
+ new_ratings.double_pull += r_val[1]
+ elif 'lf' in secondary.short_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif 'rf' in secondary.short_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+ elif '***' in secondary.short_name:
+ data.double_three -= r_val[1]
+ new_ratings.double_three += r_val[1]
+ elif '**' in secondary.short_name:
+ data.double_two -= r_val[1]
+ new_ratings.double_two += r_val[1]
+ elif 'TR' in secondary.short_name:
+ data.triple -= r_val[1]
+ new_ratings.triple += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ # Triple
+ retries = 0
+ res_chances = data.triple
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.double_pull > 0:
+ data.double_pull += res_chances
+ elif data.double_two > 0:
+ data.double_two += res_chances
+ elif data.single_two > 0:
+ data.single_two += res_chances
+ elif data.single_center > 0:
+ data.single_center += res_chances
+ elif data.single_one > 0:
+ data.single_one += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.single_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['si**']
+ elif data.flyout_lf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['fly-lf']
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['fly-rf']
+ elif data.double_pull > max(1 - ch, 0):
+ secondary = PLAY_RESULTS[f'do-{pull_of}']
+ elif data.double_three > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['do***']
+ elif data.double_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['do**']
+ else:
+ secondary = None
+
+ r_val = assign_bchances(card, PLAY_RESULTS['tr'], ch, secondary)
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ res_chances -= r_val[0]
+ new_ratings.triple += r_val[0]
+ if r_val[1] > 0:
+ if 'DO (' in secondary.short_name:
+ data.double_pull -= r_val[1]
+ new_ratings.double_pull += r_val[1]
+ elif 'lf' in secondary.short_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif 'rf' in secondary.short_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+ elif '***' in secondary.short_name:
+ data.double_three -= r_val[1]
+ new_ratings.double_three += r_val[1]
+ elif 'SI' in secondary.short_name:
+ data.single_two -= r_val[1]
+ new_ratings.single_two += r_val[1]
+ elif '**' in secondary.short_name:
+ data.double_two -= r_val[1]
+ new_ratings.double_two += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ # Double***
+ retries = 0
+ res_chances = data.double_three
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.double_pull > 0:
+ data.double_pull += res_chances
+ elif data.double_two > 0:
+ data.double_two += res_chances
+ elif data.single_two > 0:
+ data.single_two += res_chances
+ elif data.single_center > 0:
+ data.single_center += res_chances
+ elif data.single_one > 0:
+ data.single_one += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.single_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['si**']
+ elif data.flyout_lf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['fly-lf']
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['fly-rf']
+ elif data.double_pull > max(1 - ch, 0):
+ secondary = PLAY_RESULTS[f'do-{pull_of}']
+ elif data.double_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['do**']
+ else:
+ secondary = None
+
+ r_val = assign_bchances(card, PLAY_RESULTS['do***'], ch, secondary)
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ res_chances -= r_val[0]
+ new_ratings.double_three += r_val[0]
+ if r_val[1] > 0:
+ if 'DO (' in secondary.short_name:
+ data.double_pull -= r_val[1]
+ new_ratings.double_pull += r_val[1]
+ elif 'lf' in secondary.short_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif 'rf' in secondary.short_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+ elif 'SI' in secondary.short_name:
+ data.single_two -= r_val[1]
+ new_ratings.single_two += r_val[1]
+ elif '**' in secondary.short_name:
+ data.double_two -= r_val[1]
+ new_ratings.double_two += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ # Double pull-side
+ retries = 0
+ res_chances = data.double_pull
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.double_two > 0:
+ data.double_two += res_chances
+ elif data.single_two > 0:
+ data.single_two += res_chances
+ elif data.single_center > 0:
+ data.single_center += res_chances
+ elif data.single_one > 0:
+ data.single_one += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.flyout_lf_b > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'fly (lf) B', short_name=f'fly B')
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'fly (rf) B', short_name=f'fly b')
+ elif data.single_one > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['si*']
+ elif data.single_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['si**']
+ else:
+ secondary = None
+
+ r_val = assign_bchances(card, PLAY_RESULTS[f'do-{pull_of}'], ch, secondary)
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ res_chances -= r_val[0]
+ new_ratings.double_pull += r_val[0]
+ if r_val[1] > 0:
+ if 'lf' in secondary.full_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif 'rf' in secondary.full_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+ elif '***' in secondary.short_name:
+ data.double_three -= r_val[1]
+ new_ratings.double_three += r_val[1]
+ elif 'SI' in secondary.short_name:
+ data.single_two -= r_val[1]
+ new_ratings.single_two += r_val[1]
+ elif '**' in secondary.short_name:
+ data.double_two -= r_val[1]
+ new_ratings.double_two += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ # Double**
+ retries = 0
+ res_chances = data.double_two
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.single_two > 0:
+ data.single_two += res_chances
+ elif data.single_center > 0:
+ data.single_center += res_chances
+ elif data.single_one > 0:
+ data.single_one += res_chances
+ elif data.walk > 0:
+ data.walk += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.single_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['si**']
+ elif data.single_center > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['si-cf']
+ elif data.flyout_lf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['fly-lf']
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS['fly-rf']
+ else:
+ secondary = None
+
+ r_val = assign_bchances(card, PLAY_RESULTS['do**'], ch, secondary)
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ res_chances -= r_val[0]
+ new_ratings.double_two += r_val[0]
+ if r_val[1] > 0:
+ if 'lf' in secondary.full_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif 'rf' in secondary.full_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+ elif 'SI' in secondary.short_name:
+ data.single_two -= r_val[1]
+ new_ratings.single_two += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ # Single**
+ retries = 0
+ res_chances = data.single_two
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.single_center > 0:
+ data.single_center += res_chances
+ elif data.single_one > 0:
+ data.single_one += res_chances
+ elif data.walk > 0:
+ data.walk += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.groundout_a > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'gb ({pref_mif}) A', short_name=f'gb ({pref_mif}) A')
+ elif data.groundout_b > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'gb ({pref_mif}) B', short_name=f'gb ({pref_mif}) B')
+ elif data.groundout_c > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'gb ({pref_mif}) C', short_name=f'gb ({pref_mif}) C')
+ elif data.lineout > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'lo ({pref_mif})', short_name=f'lo ({pref_mif})')
+ else:
+ secondary = None
+
+ r_val = assign_bchances(card, PLAY_RESULTS['si**'], ch, secondary)
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ res_chances -= r_val[0]
+ new_ratings.single_two += r_val[0]
+ if r_val[1] > 0:
+ if 'C' in secondary.short_name:
+ data.groundout_c -= r_val[1]
+ new_ratings.groundout_c += r_val[1]
+ elif 'B' in secondary.short_name:
+ data.groundout_b -= r_val[1]
+ new_ratings.groundout_b += r_val[1]
+ elif 'A' in secondary.short_name:
+ data.groundout_a -= r_val[1]
+ new_ratings.groundout_a += r_val[1]
+ elif 'lo' in secondary.short_name:
+ data.lineout -= r_val[1]
+ new_ratings.lineout += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ # Single (cf)
+ retries = 0
+ res_chances = data.single_center
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.single_one > 0:
+ data.single_one += res_chances
+ elif data.walk > 0:
+ data.walk += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.flyout_bq > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'fly B?', short_name=f'fly B?')
+ elif data.flyout_lf_b > max(1 - ch, 0) and data.flyout_lf_b > data.flyout_rf_b:
+ secondary = PlayResult(full_name=f'fly (LF) B', short_name=f'fly B')
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'fly (RF) B', short_name=f'fly B')
+ elif data.flyout_lf_b > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'fly (LF) B', short_name=f'fly B')
+ elif data.lineout > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'lo ({pref_mif})', short_name=f'lo ({pref_mif})')
+ else:
+ secondary = None
+
+ r_val = assign_bchances(card, PLAY_RESULTS['si-cf'], ch, secondary)
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ res_chances -= r_val[0]
+ new_ratings.single_center += r_val[0]
+ if r_val[1] > 0:
+ if '?' in secondary.short_name:
+ data.flyout_bq -= r_val[1]
+ new_ratings.flyout_bq += r_val[1]
+ elif 'LF' in secondary.full_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif 'RF' in secondary.full_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+ elif 'lo' in secondary.short_name:
+ data.lineout -= r_val[1]
+ new_ratings.lineout += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ # Single*
+ retries = 0
+ res_chances = data.single_one
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.walk > 0:
+ data.walk += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.groundout_c > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'gb ({pref_mif}) C', short_name=f'gb ({pref_mif}) C')
+ elif data.groundout_b > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'gb ({pref_mif}) B', short_name=f'gb ({pref_mif}) B')
+ elif data.groundout_a > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'gb ({pref_mif}) A', short_name=f'gb ({pref_mif}) A')
+ elif data.lineout > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'lo ({pref_mif})', short_name=f'lo ({pref_mif})')
+ else:
+ secondary = None
+
+ r_val = assign_bchances(card, PLAY_RESULTS['si*'], ch, secondary)
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ res_chances -= r_val[0]
+ new_ratings.single_one += r_val[0]
+ if r_val[1] > 0:
+ if 'C' in secondary.short_name:
+ data.groundout_c -= r_val[1]
+ new_ratings.groundout_c += r_val[1]
+ elif 'B' in secondary.short_name:
+ data.groundout_b -= r_val[1]
+ new_ratings.groundout_b += r_val[1]
+ elif 'A' in secondary.short_name:
+ data.groundout_a -= r_val[1]
+ new_ratings.groundout_a += r_val[1]
+ elif 'lo' in secondary.short_name:
+ data.lineout -= r_val[1]
+ new_ratings.lineout += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ # Walk
+ retries = 0
+ res_chances = data.walk
+ while res_chances >= 1:
+ if res_chances < 1 or retries > 0:
+ break
+
+ ch = get_chances(res_chances)
+ if data.strikeout > max(1 - ch, 0):
+ secondary = PlayResult(full_name=f'strikeout', short_name=f'so')
+ else:
+ secondary = None
+
+ r_val = assign_bchances(card, PLAY_RESULTS['walk'], ch, secondary)
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ res_chances -= r_val[0]
+ new_ratings.walk += r_val[0]
+ if r_val[1] > 0:
+ data.strikeout -= r_val[1]
+ new_ratings.strikeout += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ # BP Single
+ retries = 0
+ res_chances = data.bp_single
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ break
+
+ ch = get_chances(res_chances)
+ r_val = assign_bchances(card, PLAY_RESULTS['bp-si'], ch)
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ res_chances -= r_val[0]
+ new_ratings.bp_single += r_val[0]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ # Special lomax result
+ r_val = assign_bchances(
+ card, PlayResult(full_name=f'lo ({pref_mif}) max', short_name=f'lo ({pref_mif}) max'), Decimal(1))
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+ data.lineout -= r_val[0]
+ new_ratings.lineout += r_val[0]
+
+ # Popout
+ retries = 0
+ res_chances = data.popout
+ while res_chances >= 1:
+ if res_chances < 1 or retries > 0:
+ break
+
+ ch = get_chances(res_chances)
+ this_if = '2b' if pref_mif == 'ss' else 'ss'
+ r_val = assign_bchances(
+ card,
+ PlayResult(full_name=f'popout ({this_if})', short_name=f'popout ({this_if})'),
+ Decimal(math.floor(ch))
+ )
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+
+ if r_val[0] == 0:
+ data.lineout += res_chances
+ break
+ else:
+ res_chances -= r_val[0]
+ new_ratings.popout += r_val[0]
+
+ # Flyout A
+ retries = 0
+ res_chances = data.flyout_a
+ while res_chances >= 1:
+ if res_chances < 1 or retries > 0:
+ break
+
+ ch = get_chances(res_chances)
+ r_val = assign_bchances(
+ card, PlayResult(full_name=f'fly (cf) A', short_name=f'fly (cf) A'), Decimal(math.floor(ch)))
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+
+ if r_val[0] == 0:
+ data.strikeout += res_chances if data.strikeout > 2 else 0
+ break
+ else:
+ res_chances -= r_val[0]
+ new_ratings.flyout_a += r_val[0]
+
+ # Flyout LF B
+ retries = 0
+ res_chances = data.flyout_lf_b
+ while res_chances >= 1:
+ if res_chances < 1 or retries > 0:
+ break
+
+ ch = get_chances(res_chances)
+ r_val = assign_bchances(
+ card, PlayResult(full_name=f'fly (lf) B', short_name=f'fly (lf) B'), Decimal(math.floor(ch)))
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+
+ if r_val[0] == 0:
+ data.strikeout += res_chances if data.strikeout > 2 else 0
+ break
+ else:
+ res_chances -= r_val[0]
+ new_ratings.flyout_lf_b += r_val[0]
+
+ # Flyout RF B
+ retries = 0
+ res_chances = data.flyout_rf_b
+ while res_chances >= 1:
+ if res_chances < 1 or retries > 0:
+ break
+
+ ch = get_chances(res_chances)
+ r_val = assign_bchances(
+ card, PlayResult(full_name=f'fly (rf) B', short_name=f'fly (rf) B'), Decimal(math.floor(ch)))
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+
+ if r_val[0] == 0:
+ data.strikeout += res_chances if data.strikeout > 2 else 0
+ break
+ else:
+ res_chances -= r_val[0]
+ new_ratings.flyout_rf_b += r_val[0]
+
+ # Groundout A
+ count_gb = 0
+
+ def get_gb_if():
+ if count_gb % 4 == 1:
+ return pref_mif
+ elif count_gb % 4 == 2:
+ return '2b' if pref_mif == 'ss' else 'ss'
+ elif count_gb % 4 == 3:
+ return '1b' if pref_mif == '2b' else 'p'
+ else:
+ return '3b' if pref_mif == 'ss' else 'p'
+
+ retries = 0
+ res_chances = data.groundout_a
+ while res_chances >= 1:
+ if res_chances < 1 or retries > 0:
+ break
+
+ count_gb += 1
+ this_if = get_gb_if()
+ ch = get_chances(res_chances)
+ r_val = assign_bchances(
+ card, PlayResult(full_name=f'gb ({this_if}) A', short_name=f'gb ({this_if}) A'),
+ Decimal(math.floor(ch)))
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+
+ if r_val[0] == 0:
+ data.groundout_b += res_chances
+ break
+ else:
+ res_chances -= r_val[0]
+ new_ratings.groundout_a += r_val[0]
+
+ # Groundout B
+ retries = 0
+ res_chances = data.groundout_b
+ while res_chances >= 1:
+ if res_chances < 1 or retries > 0:
+ break
+
+ count_gb += 1
+ this_if = get_gb_if()
+ ch = get_chances(res_chances)
+ r_val = assign_bchances(
+ card, PlayResult(full_name=f'gb ({this_if}) B', short_name=f'gb ({this_if}) B'),
+ Decimal(math.floor(ch)))
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+
+ if r_val[0] == 0:
+ data.groundout_c += res_chances
+ break
+ else:
+ res_chances -= r_val[0]
+ new_ratings.groundout_b += r_val[0]
+
+ # Groundout C
+ retries = 0
+ res_chances = data.groundout_c
+ while res_chances >= 1:
+ if res_chances < 1 or retries > 0:
+ break
+
+ count_gb += 1
+ this_if = get_gb_if()
+ ch = get_chances(res_chances)
+ r_val = assign_bchances(
+ card, PlayResult(full_name=f'gb ({this_if}) C', short_name=f'gb ({this_if}) C'),
+ Decimal(math.floor(ch)))
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+
+ if r_val[0] == 0:
+ data.strikeout += res_chances
+ break
+ else:
+ res_chances -= r_val[0]
+ new_ratings.groundout_c += r_val[0]
+
+ # Lineout
+ retries = 0
+ res_chances = data.lineout
+ while res_chances >= 1:
+ if res_chances < 1 or retries > 0:
+ break
+
+ ch = get_chances(res_chances)
+ this_if = '3b' if pref_mif == 'ss' else '1b'
+ r_val = assign_bchances(
+ card,
+ PlayResult(full_name=f'lineout ({this_if})', short_name=f'lineout ({this_if})'),
+ Decimal(math.floor(ch))
+ )
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+
+ if r_val[0] == 0:
+ break
+ else:
+ res_chances -= r_val[0]
+ new_ratings.lineout += r_val[0]
+
+ # Strikeout
+ retries = 0
+ res_chances = data.strikeout
+ while res_chances >= 1:
+ if res_chances < 1 or retries > 0:
+ break
+
+ ch = get_chances(res_chances)
+ r_val = assign_bchances(
+ card, PlayResult(full_name=f'strikeout', short_name=f'strikeout'), Decimal(math.floor(ch)))
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+
+ if r_val[0] == 0:
+ break
+ else:
+ res_chances -= r_val[0]
+ new_ratings.strikeout += r_val[0]
+
+ # Filler loop — fill any remaining empty card slots
+ plays = sorted(
+ [(data.strikeout, 'so'), (data.lineout, 'lo'), (data.groundout_c, 'gb'), (data.popout, 'po')],
+ key=lambda z: z[0],
+ reverse=True
+ )
+ count_filler = -1
+ while not card.is_complete():
+ count_filler += 1
+ this_play = plays[count_filler % 4]
+ if this_play[1] == 'so':
+ play_res = PlayResult(full_name=f'strikeout', short_name=f'strikeout')
+ elif this_play[1] == 'lo':
+ this_if = '3b' if pref_mif == 'ss' else '1b'
+ play_res = PlayResult(full_name=f'lineout ({this_if})', short_name=f'lineout ({this_if})')
+ elif this_play[1] == 'gb':
+ count_gb += 1
+ this_if = get_gb_if()
+ play_res = PlayResult(full_name=f'gb ({this_if}) C', short_name=f'gb ({this_if}) C')
+ else:
+ play_res = PlayResult(full_name=f'popout (c)', short_name=f'popout (c)')
+
+ logger.debug(f'Send Card Fill\n{play_res}')
+ r_raw = card.card_fill(play_res)
+ r_val = (float(r_raw[0]), float(r_raw[1]))
+ logger.debug(f'Returned batting chances: {r_val[0]} / {r_val[1]}\n')
+
+ if this_play[1] == 'so':
+ new_ratings.strikeout += r_val[0]
+ elif this_play[1] == 'lo':
+ new_ratings.lineout += r_val[0]
+ elif this_play[1] == 'gb':
+ new_ratings.groundout_c += r_val[0]
+ else:
+ new_ratings.popout += r_val[0]
+
+ new_ratings.calculate_rate_stats()
+
+ return vl, vr
diff --git a/batters/models.py b/batters/models.py
new file mode 100644
index 0000000..6623119
--- /dev/null
+++ b/batters/models.py
@@ -0,0 +1,309 @@
+import random
+
+import pydantic
+
+from creation_helpers import mround, sanitize_chance_output
+from typing import Literal
+from decimal import Decimal
+from exceptions import logger
+
+
+def bp_singles(all_singles):
+ if all_singles < 6:
+ return mround(0)
+ else:
+ return mround(5)
+
+
+def wh_singles(rem_singles, hard_rate):
+ if rem_singles == 0 or hard_rate < .2:
+ return 0
+ elif hard_rate > .4:
+ return sanitize_chance_output(rem_singles * 2 / 3, min_chances=2)
+ else:
+ return sanitize_chance_output(rem_singles / 3, min_chances=2)
+
+
+def one_singles(rem_singles, ifh_rate, force_rem=False):
+ if force_rem:
+ return mround(rem_singles)
+ elif rem_singles == 0 or ifh_rate < .05:
+ return mround(0)
+ else:
+ return sanitize_chance_output(rem_singles * min(ifh_rate * mround(3), 0.75), min_chances=2)
+
+
+def bp_homeruns(all_hr, hr_rate):
+ if all_hr == 0 or hr_rate == 0:
+ return mround(0)
+ elif hr_rate > .2:
+ return mround(all_hr * 0.4, base=1.0)
+ else:
+ return mround(all_hr * 0.8, base=1.0)
+
+
+def triples(all_xbh, tr_count, do_count):
+ if all_xbh == mround(0) or tr_count == mround(0):
+ return mround(0)
+ else:
+ return sanitize_chance_output(all_xbh * mround(tr_count / max(tr_count + do_count, 1)), min_chances=1)
+
+
+def two_doubles(all_doubles, soft_rate):
+ if all_doubles == 0 or soft_rate == 0:
+ return mround(0)
+ elif soft_rate > .2:
+ return sanitize_chance_output(all_doubles / 2)
+ else:
+ return sanitize_chance_output(all_doubles / 4)
+
+
+def hit_by_pitch(other_ob, hbps, walks):
+ if hbps == 0 or other_ob * mround(hbps / max(hbps + walks, 1)) < 1:
+ return 0
+ else:
+ return sanitize_chance_output(other_ob * mround(hbps / max(hbps + walks, 1)), rounding=1.0)
+
+
+def strikeouts(all_outs, k_rate):
+ if all_outs == 0 or k_rate == 0:
+ return mround(0)
+ else:
+ return sanitize_chance_output(all_outs * k_rate)
+
+
+def flyout_a(all_flyouts, hard_rate):
+ if all_flyouts == 0 or hard_rate < .4:
+ return mround(0)
+ else:
+ return mround(1.0)
+
+
+def flyout_bq(rem_flyouts, soft_rate):
+ if rem_flyouts == 0 or soft_rate < .1:
+ return mround(0)
+ else:
+ return sanitize_chance_output(rem_flyouts * min(soft_rate * 3, mround(.75)))
+
+
+def flyout_b(rem_flyouts, pull_rate, cent_rate):
+ if rem_flyouts == 0 or pull_rate == 0:
+ return mround(0)
+ else:
+ return sanitize_chance_output(rem_flyouts * (pull_rate + cent_rate / 2))
+
+
+def groundball_a(all_groundouts, gidps, abs):
+ if all_groundouts == 0 or gidps == 0:
+ return mround(0)
+ else:
+ return sanitize_chance_output(mround(min(gidps ** 2.5, abs) / max(abs, 1)) * all_groundouts)
+
+
+def groundball_c(rem_groundouts, med_rate):
+ if rem_groundouts == 0 or med_rate < .4:
+ return mround(0)
+ elif med_rate > .6:
+ return sanitize_chance_output(rem_groundouts)
+ else:
+ return sanitize_chance_output(rem_groundouts * med_rate)
+
+
+class BattingCardRatingsModel(pydantic.BaseModel):
+ battingcard_id: int
+ bat_hand: Literal['R', 'L', 'S']
+ vs_hand: Literal['R', 'L']
+ all_hits: float = 0.0
+ all_other_ob: float = 0.0
+ all_outs: float = 0.0
+ rem_singles: float = 0.0
+ rem_xbh: float = 0.0
+ rem_hr: float = 0.0
+ rem_doubles: float = 0.0
+ hard_rate: float
+ med_rate: float
+ soft_rate: float
+ pull_rate: float
+ center_rate: float
+ slap_rate: float
+ homerun: float = 0.0
+ bp_homerun: float = 0.0
+ triple: float = 0.0
+ double_three: float = 0.0
+ double_two: float = 0.0
+ double_pull: float = 0.0
+ single_two: float = 0.0
+ single_one: float = 0.0
+ single_center: float = 0.0
+ bp_single: float = 0.0
+ hbp: float = 0.0
+ walk: float = 0.0
+ strikeout: float = 0.0
+ lineout: float = 0.0
+ popout: float = 0.0
+ rem_flyballs: float = 0.0
+ flyout_a: float = 0.0
+ flyout_bq: float = 0.0
+ flyout_lf_b: float = 0.0
+ flyout_rf_b: float = 0.0
+ rem_groundballs: float = 0.0
+ groundout_a: float = 0.0
+ groundout_b: float = 0.0
+ groundout_c: float = 0.0
+ avg: float = 0.0
+ obp: float = 0.0
+ slg: float = 0.0
+
+ def total_chances(self):
+ return mround(sum([
+ self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_pull,
+ self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk, self.strikeout,
+ self.lineout, self.popout, self.flyout_a, self.flyout_bq, self.flyout_lf_b, self.flyout_rf_b,
+ self.groundout_a, self.groundout_b, self.groundout_c
+ ]))
+
+ def total_hits(self):
+ return mround(sum([
+ self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_pull,
+ self.single_two, self.single_one, self.single_center, self.bp_single
+ ]))
+
+ def rem_hits(self):
+ return (self.all_hits -
+ sum([
+ self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_pull,
+ self.single_two, self.single_one, self.single_center, self.bp_single
+ ]))
+
+ def rem_outs(self):
+ return mround(self.all_outs -
+ sum([
+ self.strikeout, self.lineout, self.popout, self.flyout_a, self.flyout_bq, self.flyout_lf_b,
+ self.flyout_rf_b, self.groundout_a, self.groundout_b, self.groundout_c
+ ]))
+
+ def rem_other_ob(self):
+ return self.all_other_ob - self.hbp - self.walk
+
+ def calculate_singles(self, szn_singles, szn_hits, ifh_rate: Decimal):
+ tot = sanitize_chance_output(self.all_hits * mround((szn_singles * .8) / max(szn_hits, 1)))
+ logger.debug(f'tot: {tot}')
+ self.rem_singles = tot
+
+ self.bp_single = bp_singles(self.rem_singles)
+ self.rem_singles -= self.bp_single
+
+ self.single_two = wh_singles(self.rem_singles, self.hard_rate)
+ self.rem_singles -= self.single_two
+
+ self.single_one = one_singles(self.rem_singles, ifh_rate)
+ self.rem_singles -= self.single_one
+
+ self.single_center = sanitize_chance_output(self.rem_singles)
+ self.rem_singles -= self.single_center
+
+ self.rem_xbh = self.all_hits - self.bp_single - self.single_two - self.single_one - self.single_center
+
+ def calculate_xbh(self, szn_triples, szn_doubles, szn_hr, hr_per_fb: Decimal):
+ self.triple = triples(self.rem_xbh, szn_triples, szn_doubles + szn_hr)
+ self.rem_xbh -= self.triple
+
+ tot_doubles = sanitize_chance_output(self.rem_xbh * mround(szn_doubles / max(szn_hr + szn_doubles, 1)))
+ self.double_two = two_doubles(tot_doubles, self.soft_rate)
+ self.double_pull = sanitize_chance_output(tot_doubles - self.double_two)
+ self.rem_xbh -= mround(self.double_two + self.double_pull)
+
+ if (self.rem_xbh > mround(0)) and szn_hr > 0:
+ self.bp_homerun = bp_homeruns(self.rem_xbh, hr_per_fb)
+ self.homerun = sanitize_chance_output(self.rem_xbh - self.bp_homerun, min_chances=0.5)
+ self.rem_xbh -= mround(self.bp_homerun + self.homerun)
+
+ if szn_triples > 0 and self.rem_xbh > 0:
+ logger.error(f'Adding {self.rem_xbh} results to triples')
+ self.triple += sanitize_chance_output(self.rem_xbh, min_chances=0.5)
+ elif self.rem_xbh > 0:
+ logger.error(f'Adding {self.rem_xbh} results to all other ob')
+ self.all_other_ob += self.rem_xbh
+
+ def calculate_other_ob(self, szn_bb, szn_hbp):
+ self.hbp = hit_by_pitch(self.all_other_ob, szn_hbp, szn_bb)
+ self.walk = sanitize_chance_output(self.all_other_ob - self.hbp)
+
+ if self.walk + self.hbp < self.all_other_ob:
+ rem = self.all_other_ob - self.walk - self.hbp
+ logger.error(f'Adding {rem} chances to all_outs')
+ self.all_outs += mround(rem)
+
+ def calculate_strikeouts(self, szn_so, szn_ab, szn_hits):
+ self.strikeout = strikeouts(self.all_outs, (szn_so / max(szn_ab - szn_hits, 1)))
+
+ def calculate_other_outs(self, fb_rate, ld_rate, gb_rate, szn_gidp, szn_ab):
+ self.rem_flyballs = sanitize_chance_output(self.rem_outs() * mround(fb_rate))
+ self.flyout_a = flyout_a(self.rem_flyballs, self.hard_rate)
+ self.rem_flyballs -= self.flyout_a
+
+ self.flyout_bq = flyout_bq(self.rem_flyballs, self.soft_rate)
+ self.rem_flyballs -= self.flyout_bq
+
+ self.flyout_lf_b = flyout_b(
+ self.rem_flyballs,
+ pull_rate=self.pull_rate if self.bat_hand == 'R' else self.slap_rate,
+ cent_rate=self.center_rate
+ )
+ self.rem_flyballs -= self.flyout_lf_b
+ self.flyout_rf_b = sanitize_chance_output(self.rem_flyballs)
+ self.rem_flyballs -= self.flyout_rf_b
+
+ if self.rem_flyballs > 0:
+ logger.debug(f'Adding {self.rem_flyballs} chances to lineouts')
+
+ tot_oneouts = sanitize_chance_output(self.rem_outs() * mround(ld_rate / max(ld_rate + gb_rate, .01)))
+ self.lineout = sanitize_chance_output(mround(random.random()) * tot_oneouts)
+ self.popout = sanitize_chance_output(tot_oneouts - self.lineout)
+
+ self.groundout_a = groundball_a(self.rem_outs(), szn_gidp, szn_ab)
+ self.groundout_c = groundball_c(self.rem_outs(), self.med_rate)
+ self.groundout_b = self.rem_outs()
+
+ def calculate_rate_stats(self):
+ self.avg = mround(self.total_hits() / 108, prec=5, base=0.00001)
+ self.obp = mround((self.total_hits() + self.hbp + self.walk) / 108, prec=5, base=0.00001)
+ self.slg = mround((
+ self.homerun * 4 + self.bp_homerun * 2 + self.triple * 3 + self.double_three * 2 +
+ self.double_two * 2 + self.double_pull * 2 + self.single_two + self.single_one +
+ self.single_center + self.bp_single / 2) / 108, prec=5, base=0.00001)
+
+ def custom_to_dict(self):
+ self.calculate_rate_stats()
+ return {
+ 'battingcard_id': self.battingcard_id,
+ 'vs_hand': self.vs_hand,
+ 'homerun': self.homerun,
+ 'bp_homerun': self.bp_homerun,
+ 'triple': self.triple,
+ 'double_three': self.double_three,
+ 'double_two': self.double_two,
+ 'double_pull': self.double_pull,
+ 'single_two': self.single_two,
+ 'single_one': self.single_one,
+ 'single_center': self.single_center,
+ 'bp_single': self.bp_single,
+ 'hbp': self.hbp,
+ 'walk': self.walk,
+ 'strikeout': mround(self.strikeout),
+ 'lineout': self.lineout,
+ 'popout': self.popout,
+ 'flyout_a': self.flyout_a,
+ 'flyout_bq': self.flyout_bq,
+ 'flyout_lf_b': self.flyout_lf_b,
+ 'flyout_rf_b': self.flyout_rf_b,
+ 'groundout_a': self.groundout_a,
+ 'groundout_b': self.groundout_b,
+ 'groundout_c': self.groundout_c,
+ 'pull_rate': self.pull_rate,
+ 'center_rate': self.center_rate,
+ 'slap_rate': self.slap_rate,
+ 'avg': self.avg,
+ 'obp': self.obp,
+ 'slg': self.slg
+ }
diff --git a/card_layout.py b/card_layout.py
new file mode 100644
index 0000000..863a8c0
--- /dev/null
+++ b/card_layout.py
@@ -0,0 +1,1015 @@
+"""
+Card layout models: PlayResult, CardResult, CardColumn, FullCard, FullBattingCard, FullPitchingCard.
+
+Adapted from database/app/card_creation.py for use in the card-creation pipeline.
+These models represent the actual card layout (three 2d6 columns with text results)
+as opposed to the raw rating chances stored in BattingCardRatingsModel / PitchingCardRatingsModel.
+"""
+import logging
+import math
+import re
+
+import pydantic
+
+from decimal import Decimal
+from pydantic import validator
+from typing import Optional
+
+
+EXACT_CHANCES = [
+ Decimal('5.7'), Decimal('5.4'), Decimal('5.1'), Decimal('4.8'), Decimal('4.75'), Decimal('4.5'), Decimal('4.25'),
+ Decimal('4.2'), Decimal('3.9'), Decimal('3.8'), Decimal('3.75'), Decimal('3.6'), Decimal('3.5'), Decimal('3.4'),
+ Decimal('3.3'), Decimal('3.25'), Decimal('3.2'), Decimal('2.85'), Decimal('2.8'), Decimal('2.75'), Decimal('2.7'),
+ Decimal('2.6'), Decimal('2.55'), Decimal('2.5'), Decimal('2.4'), Decimal('2.25'), Decimal('2.2'), Decimal('2.1'),
+ Decimal('1.95'), Decimal('1.9'), Decimal('1.8'), Decimal('1.75'), Decimal('1.7'), Decimal('1.65'), Decimal('1.6'),
+ Decimal('1.5'), Decimal('1.4'), Decimal('1.35'), Decimal('1.3'), Decimal('1.25'), Decimal('1.2'), Decimal('1.1'),
+ Decimal('1.05')
+]
+
+
+class PlayResult(pydantic.BaseModel):
+ full_name: str
+ short_name: str
+ is_offense: bool = True
+
+ @validator("is_offense", always=True)
+ def offense_validator(cls, v, values, **kwargs):
+ return values['short_name'][:2] in ['HR', 'TR', 'DO', 'SI', 'WA', 'HB', '◆B', '▼B']
+
+
+PLAY_RESULTS = {
+ 'hr': PlayResult(full_name='HOMERUN', short_name='HR'),
+ 'bp-hr': PlayResult(full_name='◆BP-HR', short_name='◆BP-HR'),
+ 'tr': PlayResult(full_name='TRIPLE', short_name='TR'),
+ 'do-lf': PlayResult(full_name=f'DOUBLE (lf)', short_name=f'DO (lf)'),
+ 'do-cf': PlayResult(full_name=f'DOUBLE (cf)', short_name=f'DO (cf)'),
+ 'do-rf': PlayResult(full_name=f'DOUBLE (rf)', short_name=f'DO (rf)'),
+ 'do***': PlayResult(full_name=f'DOUBLE***', short_name=f'DO***'),
+ 'do**': PlayResult(full_name=f'DOUBLE**', short_name=f'DO**'),
+ 'si**': PlayResult(full_name='SINGLE**', short_name='SI**'),
+ 'si*': PlayResult(full_name='SINGLE*', short_name='SI*'),
+ 'si-cf': PlayResult(full_name='SINGLE (cf)', short_name='SI (cf)'),
+ 'bp-si': PlayResult(full_name='▼BP-SI', short_name='▼BP-SI'),
+ 'walk': PlayResult(full_name='WALK', short_name='WALK'),
+ 'fly-rf': PlayResult(full_name=f'fly (rf) B', short_name=f'fly (rf) B'),
+ 'fly-lf': PlayResult(full_name=f'fly (lf) B', short_name=f'fly (lf) B'),
+ 'fly-cf': PlayResult(full_name=f'fly (cf) B', short_name=f'fly (cf) B'),
+ 'fly-bq': PlayResult(full_name=f'fly B?', short_name=f'fly B?')
+}
+
+
+def get_chances(total_chances, apply_limits=True) -> Decimal:
+ """Convert a raw chance value to a Decimal suitable for card slot assignment."""
+ if total_chances > 12.5 and apply_limits:
+ return Decimal(6)
+ elif total_chances > 10.5 and apply_limits:
+ return Decimal(5)
+ elif total_chances > 8.5 and apply_limits:
+ return Decimal(4)
+ elif total_chances > 5.5 and apply_limits:
+ return Decimal(3)
+ else:
+ val = min(float(total_chances), 6.0)
+ return Decimal(str(val))
+
+
+class CardResult(pydantic.BaseModel):
+ result_one: str = None
+ result_two: str = None
+ d20_one: str = None
+ d20_two: str = None
+ bold_one: bool = False
+ bold_two: bool = False
+
+ def __str__(self):
+ res_text = f'Empty'
+ if self.result_one is not None:
+ res_text = f'{self.result_one}'
+ if self.d20_one is not None:
+ res_text += f' | {self.d20_one}'
+ if self.result_two is not None:
+ res_text += f'\n{self.result_two} | {self.d20_two}'
+ return res_text
+
+ def is_full(self):
+ return self.result_one is not None
+
+ def assign_play(self, play: PlayResult, secondary_play: Optional[PlayResult] = None, d20: Optional[int] = None):
+ if secondary_play is None:
+ self.result_one = play.full_name
+ if '++' in play.full_name:
+ logging.warning(f'Too many plus symbols: {play.full_name}')
+ self.result_one = re.sub(r'\++', '+', play.full_name)
+
+ if play.is_offense:
+ self.bold_one = True
+ else:
+ self.result_one = play.short_name
+ self.result_two = secondary_play.short_name
+ self.d20_one = f'1-{d20}'
+ if d20 == 19:
+ self.d20_two = f'20'
+ else:
+ self.d20_two = f'{d20 + 1}-20'
+
+ if play.is_offense:
+ self.bold_one = True
+ if secondary_play.is_offense:
+ self.bold_two = True
+
+ logging.debug(f'this result: {self}')
+
+
+class CardColumn(pydantic.BaseModel):
+ two: CardResult = CardResult() # 1 chance
+ three: CardResult = CardResult() # 2 chances
+ four: CardResult = CardResult() # 3 chances
+ five: CardResult = CardResult() # 4 chances
+ six: CardResult = CardResult() # 5 chances
+ seven: CardResult = CardResult() # 6 chances
+ eight: CardResult = CardResult() # 5 chances
+ nine: CardResult = CardResult() # 4 chances
+ ten: CardResult = CardResult() # 3 chances
+ eleven: CardResult = CardResult() # 2 chances
+ twelve: CardResult = CardResult() # 1 chance
+ num_splits: int = 0
+ num_lomax: int = 0
+ num_plusgb: int = 0
+
+ def __str__(self):
+ return (f'2-{self.two}\n'
+ f'3-{self.three}\n'
+ f'4-{self.four}\n'
+ f'5-{self.five}\n'
+ f'6-{self.six}\n'
+ f'7-{self.seven}\n'
+ f'8-{self.eight}\n'
+ f'9-{self.nine}\n'
+ f'10-{self.ten}\n'
+ f'11-{self.eleven}\n'
+ f'12-{self.twelve}')
+
+ def get_text(self) -> dict:
+ sixes = ''
+ results = ''
+ d20 = ''
+
+ def bold(text):
+ return f'{text}'
+
+ def blank():
+ return ' '
+
+ for count, x in enumerate(
+ [self.two, self.three, self.four, self.five, self.six, self.seven, self.eight, self.nine,
+ self.ten, self.eleven, self.twelve], start=2):
+ if x.bold_one:
+ this_six = bold(f'{count}-')
+ this_result = bold(x.result_one)
+ this_d20 = bold(x.d20_one) if x.d20_one is not None else blank()
+ else:
+ this_six = f'{count}-'
+ this_result = f'{x.result_one}'
+ this_d20 = f'{x.d20_one}' if x.d20_one is not None else blank()
+
+ if x.result_two is not None:
+ if x.bold_two:
+ this_six += f'
{bold(blank())}'
+ this_result += f'
{bold(x.result_two)}'
+ this_d20 += f'
{bold(x.d20_two)}'
+ else:
+ this_six += f'
{blank()}'
+ this_result += f'
{x.result_two}'
+ this_d20 += f'
{x.d20_two}'
+
+ sixes += f'{this_six}
'
+ results += f'{this_result}
'
+ d20 += f'{this_d20}
'
+
+ return {'sixes': sixes, 'results': results, 'd20': d20}
+
+ def is_full(self):
+ return (self.two.is_full() and self.three.is_full() and self.four.is_full() and self.five.is_full() and
+ self.six.is_full() and self.seven.is_full() and self.eight.is_full() and self.nine.is_full() and
+ self.ten.is_full() and self.eleven.is_full() and self.twelve.is_full())
+
+ def add_result(
+ self, play: PlayResult, alt_direction: int, chances: Decimal,
+ secondary_play: Optional[PlayResult] = None):
+ if chances > Decimal(6.0):
+ logging.error(f'Cannot assign more than 6 chances per call\n'
+ f'Play: {play}\nAlt Direction: {alt_direction}\nChances: {chances}\n'
+ f'Secondary Play: {secondary_play}')
+ raise ValueError(f'Cannot assign more than 6 chances per call')
+ elif math.floor(chances) != chances and secondary_play is None:
+ if chances > Decimal(1.0):
+ chances = Decimal(math.floor(chances))
+ else:
+ logging.error(f'Must have secondary play for fractional chances; could not round down to an integer\n'
+ f'Play: {play}\nChances: {chances}\nSecondary Play: {secondary_play}')
+ return False
+
+ # Chances is whole number
+ if math.floor(chances) == chances:
+ if chances == Decimal(6):
+ if not self.seven.is_full():
+ self.seven.assign_play(play)
+ return chances, 0
+
+ # Plus one
+ if not self.six.is_full():
+ if not self.two.is_full():
+ self.six.assign_play(play)
+ self.two.assign_play(play)
+ return chances, 0
+ elif not self.twelve.is_full():
+ self.six.assign_play(play)
+ self.twelve.assign_play(play)
+ return chances, 0
+
+ # Plus one
+ if not self.eight.is_full():
+ if not self.two.is_full():
+ self.eight.assign_play(play)
+ self.two.assign_play(play)
+ return chances, 0
+ elif not self.twelve.is_full():
+ self.eight.assign_play(play)
+ self.twelve.assign_play(play)
+ return chances, 0
+
+ # Plus two
+ if not self.five.is_full():
+ if not self.three.is_full():
+ self.five.assign_play(play)
+ self.three.assign_play(play)
+ return chances, 0
+ elif not self.eleven.is_full():
+ self.five.assign_play(play)
+ self.eleven.assign_play(play)
+ return chances, 0
+
+ # Bulk 2, 3, 4 and 10, 11, 12
+ if not self.three.is_full() and not self.two.is_full() and not self.four.is_full():
+ self.four.assign_play(play)
+ self.three.assign_play(play)
+ self.two.assign_play(play)
+ return chances, 0
+
+ if not self.ten.is_full() and not self.eleven.is_full() and not self.twelve.is_full():
+ self.ten.assign_play(play)
+ self.eleven.assign_play(play)
+ self.twelve.assign_play(play)
+ return chances, 0
+
+ if not self.nine.is_full():
+ if not self.three.is_full():
+ self.nine.assign_play(play)
+ self.three.assign_play(play)
+ return chances, 0
+ elif not self.eleven.is_full():
+ self.nine.assign_play(play)
+ self.eleven.assign_play(play)
+ return chances, 0
+
+ if chances == Decimal(5):
+ if not self.six.is_full():
+ self.six.assign_play(play)
+ return chances, 0
+
+ if not self.eight.is_full():
+ self.eight.assign_play(play)
+ return chances, 0
+
+ # Bulk 3, 4 and 10, 11
+ if not self.three.is_full() and not self.four.is_full():
+ self.four.assign_play(play)
+ self.three.assign_play(play)
+ return chances, 0
+
+ if not self.ten.is_full() and not self.eleven.is_full():
+ self.ten.assign_play(play)
+ self.eleven.assign_play(play)
+ return chances, 0
+
+ # Plus one
+ if not self.five.is_full():
+ if not self.two.is_full():
+ self.five.assign_play(play)
+ self.two.assign_play(play)
+ return chances, 0
+ elif not self.twelve.is_full():
+ self.five.assign_play(play)
+ self.twelve.assign_play(play)
+ return chances, 0
+
+ # Plus one
+ if not self.nine.is_full():
+ if not self.two.is_full():
+ self.nine.assign_play(play)
+ self.two.assign_play(play)
+ return chances, 0
+ elif not self.twelve.is_full():
+ self.nine.assign_play(play)
+ self.twelve.assign_play(play)
+ return chances, 0
+
+ # Plus two
+ if not self.four.is_full():
+ if not self.three.is_full():
+ self.four.assign_play(play)
+ self.three.assign_play(play)
+ return chances, 0
+ elif not self.eleven.is_full():
+ self.four.assign_play(play)
+ self.eleven.assign_play(play)
+ return chances, 0
+
+ # Plus two
+ if not self.ten.is_full():
+ if not self.three.is_full():
+ self.ten.assign_play(play)
+ self.three.assign_play(play)
+ return chances, 0
+ elif not self.eleven.is_full():
+ self.ten.assign_play(play)
+ self.eleven.assign_play(play)
+ return chances, 0
+
+ if chances == Decimal(4):
+ if not self.five.is_full():
+ self.five.assign_play(play)
+ return chances, 0
+
+ if not self.nine.is_full():
+ self.nine.assign_play(play)
+ return chances, 0
+
+ # Plus one
+ if not self.four.is_full():
+ if not self.two.is_full():
+ self.four.assign_play(play)
+ self.two.assign_play(play)
+ return chances, 0
+ elif not self.twelve.is_full():
+ self.four.assign_play(play)
+ self.twelve.assign_play(play)
+ return chances, 0
+
+ # Plus one
+ if not self.ten.is_full():
+ if not self.two.is_full():
+ self.ten.assign_play(play)
+ self.two.assign_play(play)
+ return chances, 0
+ elif not self.twelve.is_full():
+ self.ten.assign_play(play)
+ self.twelve.assign_play(play)
+ return chances, 0
+
+ if not self.three.is_full() and not self.eleven.is_full():
+ self.three.assign_play(play)
+ self.eleven.assign_play(play)
+ return chances, 0
+
+ if chances == Decimal(3):
+ if not self.four.is_full():
+ self.four.assign_play(play)
+ return chances, 0
+
+ if not self.ten.is_full():
+ self.ten.assign_play(play)
+ return chances, 0
+
+ # Plus one
+ if not self.three.is_full():
+ if not self.two.is_full():
+ self.three.assign_play(play)
+ self.two.assign_play(play)
+ return chances, 0
+ elif not self.twelve.is_full():
+ self.three.assign_play(play)
+ self.twelve.assign_play(play)
+ return chances, 0
+
+ # Plus one
+ if not self.eleven.is_full():
+ if not self.twelve.is_full():
+ self.eleven.assign_play(play)
+ self.twelve.assign_play(play)
+ return chances, 0
+ if not self.two.is_full():
+ self.eleven.assign_play(play)
+ self.two.assign_play(play)
+ return chances, 0
+
+ if chances == Decimal(2):
+ if not self.three.is_full():
+ self.three.assign_play(play)
+ return chances, 0
+
+ if not self.eleven.is_full():
+ self.eleven.assign_play(play)
+ return chances, 0
+
+ if not self.two.is_full() and not self.twelve.is_full():
+ self.two.assign_play(play)
+ self.twelve.assign_play(play)
+ return chances, 0
+
+ if chances == Decimal(1):
+ if not self.two.is_full():
+ self.two.assign_play(play)
+ return chances, 0
+
+ if not self.twelve.is_full():
+ self.twelve.assign_play(play)
+ return chances, 0
+
+ return False
+
+ logging.debug(f'Not a whole number | Chances: {chances}')
+ if chances in EXACT_CHANCES and self.num_splits < 4 and secondary_play is not None:
+ logging.debug(f'In Exact Chances!')
+ if chances >= 3:
+ self.num_splits += 1
+ logging.debug(f'Chances is greater than 3')
+ if chances == Decimal('3.2'):
+ if not self.five.is_full():
+ self.five.assign_play(play, secondary_play, 16)
+ return chances, Decimal('0.8')
+ elif not self.nine.is_full():
+ self.nine.assign_play(play, secondary_play, 16)
+ return chances, Decimal('0.8')
+ elif chances == Decimal('3.25'):
+ if not self.six.is_full():
+ self.six.assign_play(play, secondary_play, 13)
+ return chances, Decimal('1.75')
+ elif not self.eight.is_full():
+ self.eight.assign_play(play, secondary_play, 13)
+ return chances, Decimal('1.75')
+ elif chances == Decimal('3.3') and not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 11)
+ return chances, Decimal('2.7')
+ elif chances == Decimal('3.4'):
+ if not self.five.is_full():
+ self.five.assign_play(play, secondary_play, 17)
+ return chances, Decimal('0.6')
+ elif not self.nine.is_full():
+ self.nine.assign_play(play, secondary_play, 17)
+ return chances, Decimal('0.6')
+ elif chances == Decimal('3.5'):
+ if not self.six.is_full():
+ self.six.assign_play(play, secondary_play, 14)
+ return chances, Decimal('1.5')
+ elif not self.eight.is_full():
+ self.eight.assign_play(play, secondary_play, 14)
+ return chances, Decimal('1.5')
+ elif chances == Decimal('3.6'):
+ if not self.nine.is_full():
+ self.nine.assign_play(play, secondary_play, 18)
+ return chances, Decimal('0.4')
+ if not self.five.is_full():
+ self.five.assign_play(play, secondary_play, 18)
+ return chances, Decimal('0.4')
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 12)
+ return chances, Decimal('2.4')
+ elif chances == Decimal('3.75'):
+ if not self.six.is_full():
+ self.six.assign_play(play, secondary_play, 15)
+ return chances, Decimal('1.25')
+ elif not self.eight.is_full():
+ self.eight.assign_play(play, secondary_play, 15)
+ return chances, Decimal('1.25')
+ elif chances == Decimal('3.8'):
+ if not self.five.is_full():
+ self.five.assign_play(play, secondary_play, 19)
+ return chances, Decimal('0.2')
+ elif not self.nine.is_full():
+ self.nine.assign_play(play, secondary_play, 19)
+ return chances, Decimal('0.2')
+ elif chances == Decimal('3.9'):
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 13)
+ return chances, Decimal('2.1')
+ elif chances == Decimal('4.2'):
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 14)
+ return chances, Decimal('1.8')
+ elif chances == Decimal('4.25'):
+ if not self.six.is_full():
+ self.six.assign_play(play, secondary_play, 17)
+ return chances, Decimal('0.75')
+ elif not self.eight.is_full():
+ self.eight.assign_play(play, secondary_play, 17)
+ return chances, Decimal('0.75')
+ elif chances == Decimal('4.5'):
+ if not self.six.is_full():
+ self.six.assign_play(play, secondary_play, 18)
+ return chances, Decimal('0.5')
+ if not self.eight.is_full():
+ self.eight.assign_play(play, secondary_play, 18)
+ return chances, Decimal('0.5')
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 15)
+ return chances, Decimal('1.5')
+ elif chances == Decimal('4.75'):
+ if not self.six.is_full():
+ self.six.assign_play(play, secondary_play, 19)
+ return chances, Decimal('0.25')
+ elif not self.eight.is_full():
+ self.eight.assign_play(play, secondary_play, 19)
+ return chances, Decimal('0.25')
+ elif chances == Decimal('4.8'):
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 16)
+ return chances, Decimal('1.2')
+ elif chances == Decimal('5.1'):
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 17)
+ return chances, Decimal('0.9')
+ elif chances == Decimal('5.4'):
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 18)
+ return chances, Decimal('0.6')
+ elif chances == Decimal('5.7'):
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 19)
+ return chances, Decimal('0.3')
+ elif chances >= 1:
+ self.num_splits += 1
+ logging.debug(f'Chances is greater than 1')
+ if chances == Decimal('1.05'):
+ if not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 7)
+ return chances, Decimal('1.95')
+ elif not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 7)
+ return chances, Decimal('1.95')
+ if chances == Decimal('1.1'):
+ if not self.three.is_full():
+ self.three.assign_play(play, secondary_play, 11)
+ return chances, Decimal('0.9')
+ elif not self.eleven.is_full():
+ self.eleven.assign_play(play, secondary_play, 11)
+ return chances, Decimal('0.9')
+ if chances == Decimal('1.2'):
+ if not self.five.is_full():
+ self.five.assign_play(play, secondary_play, 6)
+ return chances, Decimal('2.8')
+ elif not self.nine.is_full():
+ self.nine.assign_play(play, secondary_play, 6)
+ return chances, Decimal('2.8')
+ elif not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 8)
+ return chances, Decimal('1.8')
+ elif not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 8)
+ return chances, Decimal('1.8')
+ elif not self.three.is_full():
+ self.three.assign_play(play, secondary_play, 12)
+ return chances, Decimal('0.8')
+ elif not self.eleven.is_full():
+ self.eleven.assign_play(play, secondary_play, 12)
+ return chances, Decimal('0.8')
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 4)
+ return chances, Decimal('4.8')
+ if chances == Decimal('1.25'):
+ if not self.six.is_full():
+ self.six.assign_play(play, secondary_play, 5)
+ return chances, Decimal('3.75')
+ elif not self.eight.is_full():
+ self.eight.assign_play(play, secondary_play, 5)
+ return chances, Decimal('3.75')
+ if chances == Decimal('1.3'):
+ if not self.three.is_full():
+ self.three.assign_play(play, secondary_play, 13)
+ return chances, Decimal('0.7')
+ elif not self.eleven.is_full():
+ self.eleven.assign_play(play, secondary_play, 13)
+ return chances, Decimal('0.7')
+ if chances == Decimal('1.35'):
+ if not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 9)
+ return chances, Decimal('1.65')
+ elif not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 9)
+ return chances, Decimal('1.65')
+ if chances == Decimal('1.4'):
+ if not self.five.is_full():
+ self.five.assign_play(play, secondary_play, 7)
+ return chances, Decimal('2.6')
+ elif not self.nine.is_full():
+ self.nine.assign_play(play, secondary_play, 7)
+ return chances, Decimal('2.6')
+ elif not self.three.is_full():
+ self.three.assign_play(play, secondary_play, 14)
+ return chances, Decimal('0.6')
+ elif not self.eleven.is_full():
+ self.eleven.assign_play(play, secondary_play, 14)
+ return chances, Decimal('0.6')
+ if chances == Decimal('1.5'):
+ if not self.six.is_full():
+ self.six.assign_play(play, secondary_play, 6)
+ return chances, Decimal('3.5')
+ elif not self.eight.is_full():
+ self.eight.assign_play(play, secondary_play, 6)
+ return chances, Decimal('3.5')
+ elif not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 10)
+ return chances, Decimal('1.5')
+ elif not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 10)
+ return chances, Decimal('1.5')
+ elif not self.three.is_full():
+ self.three.assign_play(play, secondary_play, 15)
+ return chances, Decimal('0.5')
+ elif not self.eleven.is_full():
+ self.eleven.assign_play(play, secondary_play, 15)
+ return chances, Decimal('0.5')
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 5)
+ return chances, Decimal('4.5')
+ if chances == Decimal('1.6'):
+ if not self.five.is_full():
+ self.five.assign_play(play, secondary_play, 8)
+ return chances, Decimal('2.4')
+ elif not self.nine.is_full():
+ self.nine.assign_play(play, secondary_play, 8)
+ return chances, Decimal('2.4')
+ elif not self.three.is_full():
+ self.three.assign_play(play, secondary_play, 16)
+ return chances, Decimal('0.4')
+ elif not self.eleven.is_full():
+ self.eleven.assign_play(play, secondary_play, 16)
+ return chances, Decimal('0.4')
+ if chances == Decimal('1.65'):
+ if not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 11)
+ return chances, Decimal('1.35')
+ elif not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 11)
+ return chances, Decimal('1.35')
+ if chances == Decimal('1.7'):
+ if not self.three.is_full():
+ self.three.assign_play(play, secondary_play, 17)
+ return chances, Decimal('0.3')
+ elif not self.eleven.is_full():
+ self.eleven.assign_play(play, secondary_play, 17)
+ return chances, Decimal('0.3')
+ if chances == Decimal('1.75'):
+ if not self.six.is_full():
+ self.six.assign_play(play, secondary_play, 7)
+ return chances, Decimal('3.25')
+ elif not self.eight.is_full():
+ self.eight.assign_play(play, secondary_play, 7)
+ return chances, Decimal('3.25')
+ if chances == Decimal('1.8'):
+ if not self.five.is_full():
+ self.five.assign_play(play, secondary_play, 9)
+ return chances, Decimal('2.2')
+ if not self.nine.is_full():
+ self.nine.assign_play(play, secondary_play, 9)
+ return chances, Decimal('2.2')
+ if not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 12)
+ return chances, Decimal('1.2')
+ if not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 12)
+ return chances, Decimal('1.2')
+ if not self.three.is_full():
+ self.three.assign_play(play, secondary_play, 18)
+ return chances, Decimal('0.2')
+ if not self.eleven.is_full():
+ self.eleven.assign_play(play, secondary_play, 18)
+ return chances, Decimal('0.2')
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 6)
+ return chances, Decimal('4.2')
+ if chances == Decimal('1.9'):
+ if not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 13)
+ return chances, Decimal('1.1')
+ if not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 13)
+ return chances, Decimal('1.1')
+ if not self.three.is_full():
+ self.three.assign_play(play, secondary_play, 19)
+ return chances, Decimal('0.1')
+ if not self.eleven.is_full():
+ self.eleven.assign_play(play, secondary_play, 19)
+ return chances, Decimal('0.1')
+ if chances == Decimal('1.95'):
+ if not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 13)
+ return chances, Decimal('1.05')
+ elif not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 13)
+ return chances, Decimal('1.05')
+ if chances == Decimal('2.1'):
+ if not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 14)
+ return chances, Decimal('0.9')
+ if not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 14)
+ return chances, Decimal('0.9')
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 7)
+ return chances, Decimal('3.9')
+ if chances == Decimal('2.2'):
+ if not self.five.is_full():
+ self.five.assign_play(play, secondary_play, 11)
+ return chances, Decimal('1.8')
+ if not self.nine.is_full():
+ self.nine.assign_play(play, secondary_play, 11)
+ return chances, Decimal('1.8')
+ if chances == Decimal('2.25'):
+ if not self.six.is_full():
+ self.six.assign_play(play, secondary_play, 9)
+ return chances, Decimal('2.75')
+ if not self.eight.is_full():
+ self.eight.assign_play(play, secondary_play, 9)
+ return chances, Decimal('2.75')
+ if not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 15)
+ return chances, Decimal('0.75')
+ if not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 15)
+ return chances, Decimal('0.75')
+ if chances == Decimal('2.4'):
+ if not self.five.is_full():
+ self.five.assign_play(play, secondary_play, 12)
+ return chances, Decimal('1.6')
+ if not self.nine.is_full():
+ self.nine.assign_play(play, secondary_play, 12)
+ return chances, Decimal('1.6')
+ if not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 16)
+ return chances, Decimal('0.6')
+ if not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 16)
+ return chances, Decimal('0.6')
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 8)
+ return chances, Decimal('3.6')
+ if chances == Decimal('2.5'):
+ if not self.six.is_full():
+ self.six.assign_play(play, secondary_play, 10)
+ return chances, Decimal('2.5')
+ if not self.eight.is_full():
+ self.eight.assign_play(play, secondary_play, 10)
+ return chances, Decimal('2.5')
+ if chances == Decimal('2.55'):
+ if not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 17)
+ return chances, Decimal('0.45')
+ if not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 17)
+ return chances, Decimal('0.45')
+ if chances == Decimal('2.6'):
+ if not self.five.is_full():
+ self.five.assign_play(play, secondary_play, 13)
+ return chances, Decimal('1.4')
+ if not self.nine.is_full():
+ self.nine.assign_play(play, secondary_play, 13)
+ return chances, Decimal('1.4')
+ if chances == Decimal('2.7'):
+ if not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 18)
+ return chances, Decimal('0.3')
+ if not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 18)
+ return chances, Decimal('0.3')
+ if not self.seven.is_full():
+ self.seven.assign_play(play, secondary_play, 9)
+ return chances, Decimal('3.3')
+ if chances == Decimal('2.75'):
+ if not self.six.is_full():
+ self.six.assign_play(play, secondary_play, 11)
+ return chances, Decimal('2.25')
+ if not self.eight.is_full():
+ self.eight.assign_play(play, secondary_play, 11)
+ return chances, Decimal('2.25')
+ if chances == Decimal('2.8'):
+ if not self.five.is_full():
+ self.five.assign_play(play, secondary_play, 14)
+ return chances, Decimal('1.2')
+ if not self.nine.is_full():
+ self.nine.assign_play(play, secondary_play, 14)
+ return chances, Decimal('1.2')
+ if chances == Decimal('2.85'):
+ if not self.four.is_full():
+ self.four.assign_play(play, secondary_play, 19)
+ return chances, Decimal('0.15')
+ if not self.ten.is_full():
+ self.ten.assign_play(play, secondary_play, 19)
+ return chances, Decimal('0.15')
+ else:
+ logging.debug(f'Chances is less than 1')
+ return False
+
+ self.num_splits -= 1
+
+ else:
+ logging.debug(f'Not a whole number and not in Exact Chances! Trying to add a subset')
+ for x in EXACT_CHANCES:
+ if x < chances and ((chances - x) == round(chances - x)):
+ logging.debug(f'Trying to add {x} chances')
+ return self.add_result(play, alt_direction, x, secondary_play)
+ logging.debug(f'Could not find a valid match')
+ return False
+
+ def total_chances(self):
+ total = 0
+ total += 1 if self.two.is_full() else 0
+ total += 2 if self.three.is_full() else 0
+ total += 3 if self.four.is_full() else 0
+ total += 4 if self.five.is_full() else 0
+ total += 5 if self.six.is_full() else 0
+ total += 6 if self.seven.is_full() else 0
+ total += 5 if self.eight.is_full() else 0
+ total += 4 if self.nine.is_full() else 0
+ total += 3 if self.ten.is_full() else 0
+ total += 2 if self.eleven.is_full() else 0
+ total += 1 if self.twelve.is_full() else 0
+ return total
+
+ def add_fatigue(self, num_chances: int, k_only: bool = False):
+ def is_valid_result(this_result: CardResult):
+ if k_only:
+ return this_result.result_one == 'strikeout' and '•' not in this_result.result_one
+ else:
+ return (this_result.result_two is None and not this_result.bold_one
+ and 'X' not in this_result.result_one and '•' not in this_result.result_one)
+
+ if num_chances == 6:
+ if is_valid_result(self.seven):
+ self.seven.result_one += ' •'
+ return 6
+ elif num_chances == 5:
+ if is_valid_result(self.six):
+ self.six.result_one += ' •'
+ return 5
+ if is_valid_result(self.eight):
+ self.eight.result_one += ' •'
+ return 5
+ elif num_chances == 4:
+ if is_valid_result(self.five):
+ self.five.result_one += ' •'
+ return 4
+ if is_valid_result(self.nine):
+ self.nine.result_one += ' •'
+ return 4
+
+ return 0
+
+
+class FullCard(pydantic.BaseModel):
+ col_one: CardColumn = CardColumn()
+ col_two: CardColumn = CardColumn()
+ col_three: CardColumn = CardColumn()
+ offense_col: int
+ alt_direction: int = 1
+ num_plusgb: int = 0
+ num_lomax: int = 0
+ is_batter: bool = False
+
+ class Config:
+ arbitrary_types_allowed = True
+
+ def get_columns(self, is_offense: bool):
+ if is_offense:
+ if self.offense_col == 1:
+ first = self.col_one
+ second, third = (self.col_two, self.col_three) if self.alt_direction else (self.col_three, self.col_two)
+ elif self.offense_col == 2:
+ first = self.col_two
+ second, third = (self.col_three, self.col_one) if self.alt_direction else (self.col_one, self.col_three)
+ else:
+ first = self.col_three
+ second, third = (self.col_one, self.col_two) if self.alt_direction else (self.col_two, self.col_one)
+ else:
+ if self.offense_col == 1:
+ third = self.col_one
+ first, second = (self.col_two, self.col_three) if self.alt_direction else (self.col_three, self.col_two)
+ elif self.offense_col == 2:
+ third = self.col_two
+ first, second = (self.col_three, self.col_one) if self.alt_direction else (self.col_one, self.col_three)
+ else:
+ third = self.col_three
+ first, second = (self.col_one, self.col_two) if self.alt_direction else (self.col_two, self.col_one)
+
+ return first, second, third
+
+ def is_complete(self):
+ return self.col_one.is_full() and self.col_two.is_full() and self.col_three.is_full()
+
+ def sample_output(self):
+ return (f'{"" if self.is_complete() else "NOT "}COMPLETE\n'
+ f'Column 1\n{self.col_one}\n\n'
+ f'Column 2\n{self.col_two}\n\n'
+ f'Column 3\n{self.col_three}')
+
+ def add_result(self, play: PlayResult, chances: Decimal, secondary_play: Optional[PlayResult] = None):
+ first, second, third = self.get_columns(is_offense=play.is_offense)
+
+ if 'gb' in play.full_name and chances + self.num_plusgb <= 6 and self.is_batter:
+ play.full_name += '+'
+
+ for x in [first, second, third]:
+ r_data = x.add_result(play, self.alt_direction, chances, secondary_play)
+ if r_data:
+ if '+' in play.full_name:
+ self.num_plusgb += r_data[0]
+ elif 'max' in play.full_name:
+ self.num_lomax += r_data[0]
+ return r_data
+
+ return False
+
+ def card_fill(self, play: PlayResult):
+ for x in range(6, 0, -1):
+ r_data = self.add_result(play, Decimal(x))
+ if r_data:
+ return r_data
+
+ return 0, 0
+
+ def card_output(self) -> dict:
+ """Return the pre-rendered card columns as 9 HTML strings suitable for direct storage/display."""
+ c1_output = self.col_one.get_text()
+ c2_output = self.col_two.get_text()
+ c3_output = self.col_three.get_text()
+
+ return {
+ 'col_one_2d6': c1_output['sixes'],
+ 'col_one_results': c1_output['results'],
+ 'col_one_d20': c1_output['d20'],
+ 'col_two_2d6': c2_output['sixes'],
+ 'col_two_results': c2_output['results'],
+ 'col_two_d20': c2_output['d20'],
+ 'col_three_2d6': c3_output['sixes'],
+ 'col_three_results': c3_output['results'],
+ 'col_three_d20': c3_output['d20'],
+ }
+
+ def total_chances(self):
+ return self.col_one.total_chances() + self.col_two.total_chances() + self.col_three.total_chances()
+
+ def add_fatigue(self):
+ first, second, third = self.get_columns(is_offense=False)
+
+ total_added = 0
+ for x in [first, second, third]:
+ resp = x.add_fatigue(6, k_only=True)
+ if resp:
+ total_added += resp
+ break
+
+ if total_added == 0:
+ for x in [first, second, third]:
+ resp = x.add_fatigue(6, k_only=False)
+ if resp:
+ total_added += resp
+ break
+
+ if total_added == 0:
+ for x in [first, second, third]:
+ resp = x.add_fatigue(5, k_only=True)
+ if resp:
+ total_added += resp
+ break
+
+ if total_added == 0:
+ for x in [first, second, third]:
+ resp = x.add_fatigue(5, k_only=False)
+ if resp:
+ total_added += resp
+ break
+
+ if total_added != 10:
+ for x in [first, second, third]:
+ resp = x.add_fatigue(10 - total_added, k_only=True)
+ if resp:
+ total_added += resp
+ break
+
+ if total_added != 10:
+ for x in [first, second, third]:
+ resp = x.add_fatigue(10 - total_added, k_only=False)
+ if resp:
+ total_added += resp
+ break
+
+ if total_added != 10:
+ logging.error(f'FullCard add_fatigue - Could not add all fatigue results / total_added: {total_added}')
+
+
+class FullBattingCard(FullCard):
+ is_batter: bool = True
+
+
+class FullPitchingCard(FullCard):
+ is_batter: bool = False
diff --git a/docs/FULLCARD_MIGRATION_STATUS.md b/docs/FULLCARD_MIGRATION_STATUS.md
new file mode 100644
index 0000000..f9b3b67
--- /dev/null
+++ b/docs/FULLCARD_MIGRATION_STATUS.md
@@ -0,0 +1,89 @@
+# FullCard Migration Status
+
+**Branch:** `feature/fullcard-migration` (5 commits ahead of main)
+**Last Updated:** 2026-02-26
+
+## What This Branch Does
+
+Moves card-building logic (fitting continuous chances to discrete 2d6×d20 card mechanics) from the database to Python. Previously, Python sent raw continuous values and the database fitted them — meaning what you sent ≠ what got stored. Now Python builds the complete discrete card structure before POSTing.
+
+### New Files
+
+| File | Purpose |
+|------|---------|
+| `card_layout.py` | Core card models: PlayResult, CardResult, CardColumn, FullCard, FullBattingCard, FullPitchingCard. Ported from database's `card_creation.py`. Uses `col_*` key names. |
+| `batters/card_builder.py` | `build_batter_full_cards()` — takes vL/vR ratings, returns two FullBattingCard objects |
+| `pitchers/card_builder.py` | `build_pitcher_full_cards()` — same for pitchers |
+| `batters/models.py` | Extracted `BattingCardRatingsModel` from `calcs_batter.py` |
+| `pitchers/models.py` | Extracted `PitchingCardRatingsModel` from `calcs_pitcher.py` |
+| `offense_col_resolver.py` | Maps player→offense_col for retrosheet pipeline (fixed 883 silent KeyErrors) |
+| `tests/test_rate_stats_formulas.py` | Tests for extracted rating model formulas |
+
+### Integration Path
+
+```
+retrosheet_data.py → calcs_batter.py → build_batter_full_cards() → card_output() → vl_dict.update()
+ → calcs_pitcher.py → build_pitcher_full_cards() → card_output() → vr_dict.update()
+```
+
+Card builders are called inside a `try/except` in `calcs_batter.py:339` and `calcs_pitcher.py:134`. On failure, logs a warning and the card still posts without col_* layout data (backwards compatible).
+
+## Commits on Branch
+
+1. `a72abc0` — Add FullCard/CardColumn/CardResult models and card builder pipeline
+2. `39c652e` — Extract BattingCardRatingsModel and PitchingCardRatingsModel into models.py files
+3. `2bf3a6c` — Fix SLG formula drift in extracted rating models
+4. `32cadb1` — Fix two bugs in pitcher card builder dispatch logic
+5. `db38225` — Add offense_col resolver for retrosheet pipeline to fix 883 silent KeyErrors
+
+## What's Left Before Merge
+
+### 1. Database Migration (BLOCKING if you want col_* data persisted)
+
+The database repo (`paper-dynasty-database`) has a parallel `feature/fullcard-migration` branch with:
+- 9 new nullable TextFields on `BattingCardRatings` and `PitchingCardRatings` tables
+- Pydantic model updates in `routers_v2/battingcardratings.py` and `pitchingcardratings.py`
+- Migration SQL documented but **intentionally not run**
+
+Without this migration, the col_* fields in `card_output()` are computed but silently ignored by the API. The card-creation side works either way — it's a no-op until the DB accepts those fields.
+
+### 2. `live_series_update.py` Not Integrated
+
+This file has its own inline card generation — does NOT use `calcs_batter`/`calcs_pitcher`. Only the retrosheet pipeline benefits from the new card builders. Live series integration is a separate effort.
+
+### 3. No Tests for Core New Code
+
+`card_layout.py` (1015 lines), `batters/card_builder.py` (802 lines), `pitchers/card_builder.py` (776 lines) have zero test coverage. Priority targets:
+- `get_chances()` — maps continuous values to discrete EXACT_CHANCES
+- `CardColumn.add_result()` / `FullCard.card_fill()` — the filling algorithm
+- `card_output()` — serialization to col_* dict format
+- End-to-end: feed known ratings → assert expected card structure
+
+### 4. Test Failures (Pre-existing, Not From This Branch)
+
+- `test_wh_singles` — `wh_singles(12, .45)` returns `8.0` vs expected `Decimal('7.95')`. Fails on main too. The test file has a 1-line diff on this branch (import change).
+- 11 failures in `test_automated_data_fetcher.py` — mock setup issues, no diff on this branch.
+
+## Key Bugs Fixed During Development
+
+1. **Float/Decimal mismatch** — card_layout uses Decimal internally, models use float. Fix: wrap `card_fill()` outputs with `float()` in `assign_bchances()`/`assign_pchances()`.
+2. **PitchingCardRatingsModel xcheck defaults** — Non-zero defaults (xcheck_ss=7.0, etc.) corrupted accumulation. Fix: explicitly zero all xcheck fields in `new_ratings` constructor.
+3. **Pitcher dispatch logic** — Two bugs in how pitcher card builder routed plays to columns.
+4. **offense_col KeyError** — Retrosheet pipeline had no offense_col resolver, causing 883 silent failures.
+
+## Architecture Reference
+
+Full design doc: `docs/architecture/CARD_BUILDER_REDESIGN.md`
+
+Migration phases:
+- **Phase 1 (Extract & Validate)** — Done
+- **Phase 2 (Python Adoption)** — In progress (retrosheet pipeline wired up)
+- **Phase 3 (Database Simplification)** — Pending DB migration + removing fitting logic from DB
+- **Phase 4 (Enhancements)** — Future (contracts/card personalities, preview endpoint)
+
+## Decision Points for Next Session
+
+1. **Merge as-is?** Branch is safe to merge — col_* fields are computed but harmlessly ignored until DB migrates. Card generation behavior is unchanged.
+2. **Add tests first?** Recommended but not strictly required since card builders are behind try/except.
+3. **Run DB migration?** Enables end-to-end persistence. Requires deploying database branch too.
+4. **Wire up live series?** Separate PR recommended — different pipeline, different concerns.
diff --git a/offense_col_resolver.py b/offense_col_resolver.py
new file mode 100644
index 0000000..18b86b1
--- /dev/null
+++ b/offense_col_resolver.py
@@ -0,0 +1,102 @@
+"""Resolve offense_col for players in the retrosheet pipeline.
+
+Three-tier resolution:
+1. Cache hit → stored value from data-input/offense_col_cache.csv
+2. API pre-fetch → bulk-fetch all MlbPlayers, merge new entries into cache
+3. Hash fallback → deterministic hash(player_name) % 3 + 1
+"""
+
+import hashlib
+import os
+
+import pandas as pd
+
+from db_calls import db_get
+from exceptions import logger
+
+CACHE_PATH = "data-input/offense_col_cache.csv"
+
+
+def hash_offense_col(player_name: str) -> int:
+ """Deterministic offense_col from player name. Returns 1, 2, or 3."""
+ normalized = player_name.strip().lower()
+ digest = hashlib.md5(normalized.encode()).hexdigest()
+ return int(digest, 16) % 3 + 1
+
+
+def load_cache(path: str = CACHE_PATH) -> dict[str, int]:
+ """Load {key_bbref: offense_col} from CSV cache."""
+ if not os.path.exists(path):
+ return {}
+ df = pd.read_csv(path, dtype={"key_bbref": str, "offense_col": int})
+ return dict(zip(df["key_bbref"], df["offense_col"]))
+
+
+def save_cache(cache: dict[str, tuple[str, int]], path: str = CACHE_PATH):
+ """Write cache to CSV. cache values are (player_name, offense_col)."""
+ rows = sorted(
+ [
+ {"key_bbref": k, "player_name": v[0], "offense_col": v[1]}
+ for k, v in cache.items()
+ ],
+ key=lambda r: r["key_bbref"],
+ )
+ pd.DataFrame(rows).to_csv(path, index=False)
+
+
+async def resolve_offense_cols(
+ df: pd.DataFrame, api_available: bool = True
+) -> pd.DataFrame:
+ """Add offense_col column to a stats DataFrame.
+
+ Args:
+ df: DataFrame with key_bbref, use_name, last_name columns.
+ api_available: If True, fetch from API to refresh cache.
+
+ Returns:
+ df with offense_col column added.
+ """
+ cache = load_cache()
+ full_cache: dict[str, tuple[str, int]] = {}
+
+ # Seed full_cache from existing file cache
+ for bbref, oc in cache.items():
+ full_cache[bbref] = ("", oc)
+
+ # Refresh from API if available
+ if api_available:
+ try:
+ result = await db_get("mlbplayers")
+ if result and "players" in result:
+ api_count = 0
+ for p in result["players"]:
+ bbref = p.get("key_bbref")
+ oc = p.get("offense_col")
+ name = f'{p.get("first_name", "")} {p.get("last_name", "")}'.strip()
+ if bbref and oc:
+ full_cache[bbref] = (name, int(oc))
+ api_count += 1
+ logger.info(
+ f"offense_col_resolver: loaded {api_count} entries from API"
+ )
+ save_cache(full_cache)
+ except Exception as e:
+ logger.warning(
+ f"offense_col_resolver: API fetch failed, using cache only: {e}"
+ )
+
+ # Build lookup from full_cache
+ lookup = {k: v[1] for k, v in full_cache.items()}
+
+ # Resolve for each row
+ def resolve_row(row):
+ bbref = row.get("key_bbref", "")
+ if bbref in lookup:
+ return lookup[bbref]
+ name = f'{row.get("use_name", "")} {row.get("last_name", "")}'.strip()
+ oc = hash_offense_col(name)
+ logger.debug(f"offense_col_resolver: hash fallback for {name} ({bbref}) → {oc}")
+ return oc
+
+ df["offense_col"] = df.apply(resolve_row, axis=1)
+ return df
diff --git a/pitchers/calcs_pitcher.py b/pitchers/calcs_pitcher.py
index 4950b5a..8380bdd 100644
--- a/pitchers/calcs_pitcher.py
+++ b/pitchers/calcs_pitcher.py
@@ -1,440 +1,205 @@
import math
-import pydantic
-
from creation_helpers import mround, sanitize_chance_output
-from typing import List, Literal
+from typing import List
from exceptions import logger
-
-class PitchingCardRatingsModel(pydantic.BaseModel):
- pitchingcard_id: int
- pit_hand: Literal['R', 'L']
- vs_hand: Literal['R', 'L']
- all_hits: float = 0.0
- all_other_ob: float = 0.0
- all_outs: float = 0.0
- rem_singles: float = 0.0
- rem_xbh: float = 0.0
- rem_hr: float = 0.0
- rem_doubles: float = 0.0
- hard_rate: float
- med_rate: float
- soft_rate: float
- # pull_rate: float
- # center_rate: float
- # slap_rate: float
- homerun: float = 0.0
- bp_homerun: float = 0.0
- triple: float = 0.0
- double_three: float = 0.0
- double_two: float = 0.0
- double_cf: float = 0.0
- single_two: float = 0.0
- single_one: float = 0.0
- single_center: float = 0.0
- bp_single: float = 0.0
- hbp: float = 0.0
- walk: float = 0.0
- strikeout: float = 0.0
- rem_flyballs: float = 0.0
- flyout_lf_b: float = 0.0
- flyout_cf_b: float = 0.0
- flyout_rf_b: float = 0.0
- rem_groundballs: float = 0.0
- groundout_a: float = 0.0
- groundout_b: float = 0.0
- xcheck_p: float = float(1.0)
- xcheck_c: float = float(3.0)
- xcheck_1b: float = float(2.0)
- xcheck_2b: float = float(6.0)
- xcheck_3b: float = float(3.0)
- xcheck_ss: float = float(7.0)
- xcheck_lf: float = float(2.0)
- xcheck_cf: float = float(3.0)
- xcheck_rf: float = float(2.0)
- avg: float = 0.0
- obp: float = 0.0
- slg: float = 0.0
-
- def total_chances(self):
- return mround(sum([
- self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
- self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk, self.strikeout,
- self.flyout_lf_b, self.flyout_cf_b, self.flyout_rf_b, self.groundout_a, self.groundout_b, self.xcheck_p,
- self.xcheck_c, self.xcheck_1b, self.xcheck_2b, self.xcheck_3b, self.xcheck_ss, self.xcheck_lf,
- self.xcheck_cf, self.xcheck_rf
- ]))
-
- def total_hits(self):
- return mround(sum([
- self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
- self.single_two, self.single_one, self.single_center, self.bp_single
- ]))
-
- def total_ob(self):
- return mround(sum([
- self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
- self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk
- ]))
-
- def total_outs(self):
- return mround(sum([
- self.strikeout, self.flyout_lf_b, self.flyout_cf_b, self.flyout_rf_b, self.groundout_a, self.groundout_b,
- self.xcheck_p, self.xcheck_c, self.xcheck_1b, self.xcheck_2b, self.xcheck_3b, self.xcheck_ss,
- self.xcheck_lf, self.xcheck_cf, self.xcheck_rf
- ]))
-
- def calculate_rate_stats(self):
- self.avg = mround(self.total_hits() / 108, prec=5, base=0.00001)
- self.obp = mround((self.total_hits() + self.hbp + self.walk) / 108, prec=5, base=0.00001)
- self.slg = mround((
- self.homerun * 4 + self.triple * 3 + self.single_center + self.single_two + self.single_two +
- (self.double_two + self.double_three + self.double_two + self.bp_homerun) * 2 + self.bp_single / 2) / 108, prec=5, base=0.00001)
-
- def custom_to_dict(self):
- self.calculate_rate_stats()
- return {
- 'pitchingcard_id': self.pitchingcard_id,
- 'vs_hand': self.vs_hand,
- 'homerun': self.homerun,
- 'bp_homerun': self.bp_homerun,
- 'triple': self.triple,
- 'double_three': self.double_three,
- 'double_two': self.double_two,
- 'double_cf': self.double_cf,
- 'single_two': self.single_two,
- 'single_one': self.single_one,
- 'single_center': self.single_center,
- 'bp_single': self.bp_single,
- 'hbp': self.hbp,
- 'walk': self.walk,
- 'strikeout': self.strikeout,
- 'flyout_lf_b': self.flyout_lf_b,
- 'flyout_cf_b': self.flyout_cf_b,
- 'flyout_rf_b': self.flyout_rf_b,
- 'groundout_a': self.groundout_a,
- 'groundout_b': self.groundout_b,
- 'xcheck_p': self.xcheck_p,
- 'xcheck_c': self.xcheck_c,
- 'xcheck_1b': self.xcheck_1b,
- 'xcheck_2b': self.xcheck_2b,
- 'xcheck_3b': self.xcheck_3b,
- 'xcheck_ss': self.xcheck_ss,
- 'xcheck_lf': self.xcheck_lf,
- 'xcheck_cf': self.xcheck_cf,
- 'xcheck_rf': self.xcheck_rf,
- 'avg': self.avg,
- 'obp': self.obp,
- 'slg': self.slg
- }
-
- def calculate_singles(self, szn_hits, szn_singles):
- if szn_hits == 0:
- return
-
- tot = sanitize_chance_output(self.all_hits * (szn_singles / szn_hits))
- logger.debug(f'total singles: {tot}')
- self.rem_singles = tot
-
- self.bp_single = 5.0 if self.rem_singles >= 5 else 0.0
- self.rem_singles -= self.bp_single
-
- self.single_two = sanitize_chance_output(self.rem_singles / 2) if self.hard_rate >= 0.2 else 0.0
- self.rem_singles -= self.single_two
-
- self.single_one = sanitize_chance_output(self.rem_singles) if self.soft_rate >= .2 else 0.0
- self.rem_singles -= self.single_one
-
- self.single_center = sanitize_chance_output(self.rem_singles)
- self.rem_singles -= self.single_center
-
- self.rem_xbh = self.all_hits - self.single_center - self.single_one - self.single_two - self.bp_single
- logger.info(f'remaining singles: {self.rem_singles} / total xbh: {self.rem_xbh}')
-
- def calculate_xbh(self, szn_doubles, szn_triples, szn_homeruns, hr_per_fb_rate):
- szn_xbh = szn_doubles + szn_triples + szn_homeruns
- if szn_xbh == 0:
- return
-
- hr_rate = mround(szn_homeruns / szn_xbh)
- tr_rate = mround(szn_triples / szn_xbh)
- do_rate = mround(szn_doubles / szn_xbh)
- logger.info(f'hr%: {hr_rate:.2f} / tr%: {tr_rate:.2f} / do%: {do_rate:.2f}')
-
- raw_do_chances = sanitize_chance_output(self.rem_xbh * do_rate)
- logger.info(f'raw do chances: {raw_do_chances}')
- self.double_two = raw_do_chances if self.soft_rate > .2 else 0.0
- self.double_cf = mround(raw_do_chances - self.double_two)
- self.rem_xbh -= mround(self.double_two + self.double_cf + self.double_three)
- logger.info(f'Double**: {self.double_two} / Double(cf): {self.double_cf} / rem xbh: {self.rem_xbh}')
-
- self.triple = sanitize_chance_output(self.rem_xbh * tr_rate)
- self.rem_xbh = mround(self.rem_xbh - self.triple)
- logger.info(f'Triple: {self.triple} / rem xbh: {self.rem_xbh}')
-
- raw_hr_chances = self.rem_xbh
- logger.info(f'raw hr chances: {raw_hr_chances}')
-
- if hr_per_fb_rate < .08:
- self.bp_homerun = sanitize_chance_output(raw_hr_chances, min_chances=1.0, rounding=1.0)
- elif hr_per_fb_rate > .28:
- self.homerun = raw_hr_chances
- elif hr_per_fb_rate > .18:
- self.bp_homerun = sanitize_chance_output(raw_hr_chances * 0.4, min_chances=1.0, rounding=1.0)
- self.homerun = self.rem_xbh - self.bp_homerun
- else:
- self.bp_homerun = sanitize_chance_output(raw_hr_chances * .75, min_chances=1.0, rounding=1.0)
- self.homerun = mround(self.rem_xbh - self.bp_homerun)
- logger.info(f'BP HR: {self.bp_homerun} / ND HR: {self.homerun}')
-
- self.rem_xbh -= (self.bp_homerun + self.homerun)
- logger.info(f'excess xbh: {self.rem_xbh}')
-
- if self.rem_xbh > 0:
- if self.triple > 1:
- logger.info(f'Passing {self.rem_xbh} xbh to triple')
- self.triple += self.rem_xbh
- self.rem_xbh = 0.0
- elif self.double_cf > 1:
- logger.info(f'Passing {self.rem_xbh} xbh to double(cf)')
- self.double_cf += self.rem_xbh
- self.rem_xbh = 0.0
- elif self.double_two > 1:
- logger.info(f'Passing {self.rem_xbh} xbh to double**')
- self.double_two += self.rem_xbh
- self.rem_xbh = 0.0
- elif self.single_two > 1:
- logger.info(f'Passing {self.rem_xbh} xbh to single**')
- self.single_two += self.rem_xbh
- self.rem_xbh = 0.0
- elif self.single_center > 1:
- logger.info(f'Passing {self.rem_xbh} xbh to single(cf)')
- self.single_center += self.rem_xbh
- self.rem_xbh = 0.0
- elif self.single_one > 1:
- logger.info(f'Passing {self.rem_xbh} xbh to single*')
- self.single_one += self.rem_xbh
- self.rem_xbh = 0.0
- else:
- logger.info(f'Passing {self.rem_xbh} xbh to other_ob')
- self.all_other_ob += self.rem_xbh
-
- def calculate_other_ob(self, szn_walks, szn_hbp):
- if szn_walks + szn_hbp == 0:
- return
-
- this_hbp = sanitize_chance_output(self.all_other_ob * szn_hbp / (szn_walks + szn_hbp), rounding=1.0)
- logger.info(f'hbp value candidate: {this_hbp} / all_other_ob: {self.all_other_ob}')
- self.hbp = max(min(this_hbp, self.all_other_ob), 0)
- self.walk = mround(self.all_other_ob - self.hbp)
- logger.info(f'self.hbp: {self.hbp} / self.walk: {self.walk}')
-
- def calculate_strikouts(self, szn_strikeouts, szn_ab, szn_hits):
- denom = max(szn_ab - szn_hits, 1)
- raw_so = sanitize_chance_output(self.all_outs * (szn_strikeouts * 1.2) / denom)
- sum_bb_so = self.walk + raw_so
- excess = sum_bb_so - mround(math.floor(sum_bb_so))
- logger.info(f'raw_so: {raw_so} / sum_bb_so: {sum_bb_so} / excess: {excess}')
-
- self.strikeout = max(raw_so - excess - .05, 0.0)
- if self.strikeout < 0:
- logger.error(f'Strikeouts are less than zero :confusedpsyduck:')
-
- def calculate_other_outs(self, fb_pct, gb_pct, oppo_pct):
- rem_outs = 108 - self.total_chances()
-
- all_fo = sanitize_chance_output(rem_outs * fb_pct)
- if self.pit_hand == 'L':
- self.flyout_lf_b = sanitize_chance_output(all_fo * oppo_pct)
- else:
- self.flyout_rf_b = sanitize_chance_output(all_fo * oppo_pct)
- self.flyout_cf_b = all_fo - self.flyout_lf_b - self.flyout_rf_b
- rem_outs -= (self.flyout_lf_b + self.flyout_cf_b + self.flyout_rf_b)
-
- all_gb = rem_outs
- self.groundout_a = sanitize_chance_output(all_gb * self.soft_rate)
- self.groundout_b = sanitize_chance_output(all_gb - self.groundout_a)
-
- rem_chances = 108 - self.total_chances()
- logger.info(f'Remaining outs: {rem_chances}')
-
- if self.strikeout > 1:
- logger.info(f'Passing {rem_chances} outs to strikeouts')
- self.strikeout += rem_chances
- elif self.flyout_cf_b > 1:
- logger.info(f'Passing {rem_chances} outs to fly(cf)')
- self.flyout_cf_b += rem_chances
- elif self.flyout_rf_b > 1:
- logger.info(f'Passing {rem_chances} outs to fly(rf)')
- self.flyout_rf_b += rem_chances
- elif self.flyout_lf_b > 1:
- logger.info(f'Passing {rem_chances} outs to fly(lf)')
- self.flyout_lf_b += rem_chances
- elif self.groundout_a > 1:
- logger.info(f'Passing {rem_chances} outs to gbA')
- self.groundout_a += rem_chances
- elif self.single_one > 1:
- logger.info(f'Passing {rem_chances} outs to single*')
- self.single_one += rem_chances
- elif self.single_center > 1:
- logger.info(f'Passing {rem_chances} outs to single(cf)')
- self.single_center += rem_chances
- elif self.single_two > 1:
- logger.info(f'Passing {rem_chances} outs to single**')
- self.single_two += rem_chances
- elif self.double_two > 1:
- logger.info(f'Passing {rem_chances} outs to double**')
- self.double_two += rem_chances
- elif self.double_cf > 1:
- logger.info(f'Passing {rem_chances} outs to double(cf)')
- self.double_cf += rem_chances
- elif self.triple > 1:
- logger.info(f'Passing {rem_chances} outs to triple')
- self.triple += rem_chances
- elif self.homerun > 1:
- logger.info(f'Passing {rem_chances} outs to homerun')
- self.homerun += rem_chances
- else:
- raise ValueError(f'Could not complete card')
+from pitchers.models import PitchingCardRatingsModel
+from pitchers.card_builder import build_pitcher_full_cards
def get_pitcher_ratings(df_data) -> List[dict]:
# Calculate OB values with min cap (ensure scalar values for comparison)
- ob_vl = float(108 * (df_data['BB_vL'] + df_data['HBP_vL']) / df_data['TBF_vL'])
- ob_vr = float(108 * (df_data['BB_vR'] + df_data['HBP_vR']) / df_data['TBF_vR'])
+ ob_vl = float(108 * (df_data["BB_vL"] + df_data["HBP_vL"]) / df_data["TBF_vL"])
+ ob_vr = float(108 * (df_data["BB_vR"] + df_data["HBP_vR"]) / df_data["TBF_vR"])
vl = PitchingCardRatingsModel(
pitchingcard_id=df_data.pitchingcard_id,
pit_hand=df_data.pitch_hand,
- vs_hand='L',
- all_hits=sanitize_chance_output((df_data['AVG_vL'] - 0.05) * 108), # Subtracting chances from BP results
+ vs_hand="L",
+ all_hits=sanitize_chance_output(
+ (df_data["AVG_vL"] - 0.05) * 108
+ ), # Subtracting chances from BP results
all_other_ob=sanitize_chance_output(min(ob_vl, 0.8)),
- hard_rate=df_data['Hard%_vL'],
- med_rate=df_data['Med%_vL'],
- soft_rate=df_data['Soft%_vL']
+ hard_rate=df_data["Hard%_vL"],
+ med_rate=df_data["Med%_vL"],
+ soft_rate=df_data["Soft%_vL"],
)
vr = PitchingCardRatingsModel(
pitchingcard_id=df_data.pitchingcard_id,
pit_hand=df_data.pitch_hand,
- vs_hand='R',
- all_hits=sanitize_chance_output((df_data['AVG_vR'] - 0.05) * 108), # Subtracting chances from BP results
+ vs_hand="R",
+ all_hits=sanitize_chance_output(
+ (df_data["AVG_vR"] - 0.05) * 108
+ ), # Subtracting chances from BP results
all_other_ob=sanitize_chance_output(min(ob_vr, 0.8)),
- hard_rate=df_data['Hard%_vR'],
- med_rate=df_data['Med%_vR'],
- soft_rate=df_data['Soft%_vR']
+ hard_rate=df_data["Hard%_vR"],
+ med_rate=df_data["Med%_vR"],
+ soft_rate=df_data["Soft%_vR"],
)
vl.all_outs = mround(108 - vl.all_hits - vl.all_other_ob, base=0.5)
vr.all_outs = mround(108 - vr.all_hits - vr.all_other_ob, base=0.5)
logger.info(
- f'vL - All Hits: {vl.all_hits} / Other OB: {vl.all_other_ob} / All Outs: {vl.all_outs} '
- f'/ Total: {vl.total_chances()}'
+ f"vL - All Hits: {vl.all_hits} / Other OB: {vl.all_other_ob} / All Outs: {vl.all_outs} "
+ f"/ Total: {vl.total_chances()}"
)
logger.info(
- f'vR - All Hits: {vr.all_hits} / Other OB: {vr.all_other_ob} / All Outs: {vr.all_outs} '
- f'/ Total: {vr.total_chances()}'
+ f"vR - All Hits: {vr.all_hits} / Other OB: {vr.all_other_ob} / All Outs: {vr.all_outs} "
+ f"/ Total: {vr.total_chances()}"
)
- vl.calculate_singles(df_data['H_vL'], df_data['H_vL'] - df_data['2B_vL'] - df_data['3B_vL'] - df_data['HR_vL'])
- vr.calculate_singles(df_data['H_vR'], df_data['H_vR'] - df_data['2B_vR'] - df_data['3B_vR'] - df_data['HR_vR'])
+ vl.calculate_singles(
+ df_data["H_vL"],
+ df_data["H_vL"] - df_data["2B_vL"] - df_data["3B_vL"] - df_data["HR_vL"],
+ )
+ vr.calculate_singles(
+ df_data["H_vR"],
+ df_data["H_vR"] - df_data["2B_vR"] - df_data["3B_vR"] - df_data["HR_vR"],
+ )
- logger.info(f'vL: All Hits: {vl.all_hits} / BP Singles: {vl.bp_single} / Single 2: {vl.single_two} / '
- f'Single 1: {vl.single_one} / Single CF: {vl.single_center}')
- logger.info(f'vR: All Hits: {vr.all_hits} / BP Singles: {vr.bp_single} / Single 2: {vr.single_two} / '
- f'Single 1: {vr.single_one} / Single CF: {vr.single_center}')
+ logger.info(
+ f"vL: All Hits: {vl.all_hits} / BP Singles: {vl.bp_single} / Single 2: {vl.single_two} / "
+ f"Single 1: {vl.single_one} / Single CF: {vl.single_center}"
+ )
+ logger.info(
+ f"vR: All Hits: {vr.all_hits} / BP Singles: {vr.bp_single} / Single 2: {vr.single_two} / "
+ f"Single 1: {vr.single_one} / Single CF: {vr.single_center}"
+ )
- vl.calculate_xbh(df_data['2B_vL'], df_data['3B_vL'], df_data['HR_vL'], df_data['HR/FB_vL'])
- vr.calculate_xbh(df_data['2B_vR'], df_data['3B_vR'], df_data['HR_vR'], df_data['HR/FB_vR'])
+ vl.calculate_xbh(
+ df_data["2B_vL"], df_data["3B_vL"], df_data["HR_vL"], df_data["HR/FB_vL"]
+ )
+ vr.calculate_xbh(
+ df_data["2B_vR"], df_data["3B_vR"], df_data["HR_vR"], df_data["HR/FB_vR"]
+ )
- logger.debug(f'vL: All XBH: {vl.all_hits - vl.single_one - vl.single_two - vl.single_center - vl.bp_single} / '
- f'Double**: {vl.double_two} / Double(cf): {vl.double_cf} / Triple: {vl.triple} / '
- f'BP HR: {vl.bp_homerun} / ND HR: {vl.homerun}')
- logger.debug(f'vR: All XBH: {vr.all_hits - vr.single_one - vr.single_two - vr.single_center - vr.bp_single} / '
- f'Double**: {vr.double_two} / Double(cf): {vr.double_cf} / Triple: {vr.triple} / '
- f'BP HR: {vr.bp_homerun} / ND HR: {vr.homerun}')
+ logger.debug(
+ f"vL: All XBH: {vl.all_hits - vl.single_one - vl.single_two - vl.single_center - vl.bp_single} / "
+ f"Double**: {vl.double_two} / Double(cf): {vl.double_cf} / Triple: {vl.triple} / "
+ f"BP HR: {vl.bp_homerun} / ND HR: {vl.homerun}"
+ )
+ logger.debug(
+ f"vR: All XBH: {vr.all_hits - vr.single_one - vr.single_two - vr.single_center - vr.bp_single} / "
+ f"Double**: {vr.double_two} / Double(cf): {vr.double_cf} / Triple: {vr.triple} / "
+ f"BP HR: {vr.bp_homerun} / ND HR: {vr.homerun}"
+ )
- vl.calculate_other_ob(df_data['BB_vL'], df_data['HBP_vL'])
- vr.calculate_other_ob(df_data['BB_vR'], df_data['HBP_vR'])
+ vl.calculate_other_ob(df_data["BB_vL"], df_data["HBP_vL"])
+ vr.calculate_other_ob(df_data["BB_vR"], df_data["HBP_vR"])
- logger.info(f'vL: All other OB: {vl.all_other_ob} / HBP: {vl.hbp} / BB: {vl.walk} / '
- f'Total Chances: {vl.total_chances()}')
- logger.info(f'vR: All other OB: {vr.all_other_ob} / HBP: {vr.hbp} / BB: {vr.walk} / '
- f'Total Chances: {vr.total_chances()}')
+ logger.info(
+ f"vL: All other OB: {vl.all_other_ob} / HBP: {vl.hbp} / BB: {vl.walk} / "
+ f"Total Chances: {vl.total_chances()}"
+ )
+ logger.info(
+ f"vR: All other OB: {vr.all_other_ob} / HBP: {vr.hbp} / BB: {vr.walk} / "
+ f"Total Chances: {vr.total_chances()}"
+ )
vl.calculate_strikouts(
- df_data['SO_vL'], df_data['TBF_vL'] - df_data['BB_vL'] - df_data['IBB_vL'] - df_data['HBP_vL'], df_data['H_vL'])
+ df_data["SO_vL"],
+ df_data["TBF_vL"] - df_data["BB_vL"] - df_data["IBB_vL"] - df_data["HBP_vL"],
+ df_data["H_vL"],
+ )
vr.calculate_strikouts(
- df_data['SO_vR'], df_data['TBF_vR'] - df_data['BB_vR'] - df_data['IBB_vR'] - df_data['HBP_vR'], df_data['H_vR'])
+ df_data["SO_vR"],
+ df_data["TBF_vR"] - df_data["BB_vR"] - df_data["IBB_vR"] - df_data["HBP_vR"],
+ df_data["H_vR"],
+ )
- logger.info(f'vL: All Outs: {vl.all_outs} / Ks: {vl.strikeout} / Current Outs: {vl.total_outs()}')
- logger.info(f'vR: All Outs: {vr.all_outs} / Ks: {vr.strikeout} / Current Outs: {vr.total_outs()}')
+ logger.info(
+ f"vL: All Outs: {vl.all_outs} / Ks: {vl.strikeout} / Current Outs: {vl.total_outs()}"
+ )
+ logger.info(
+ f"vR: All Outs: {vr.all_outs} / Ks: {vr.strikeout} / Current Outs: {vr.total_outs()}"
+ )
- vl.calculate_other_outs(df_data['FB%_vL'], df_data['GB%_vL'], df_data['Oppo%_vL'])
- vr.calculate_other_outs(df_data['FB%_vR'], df_data['GB%_vR'], df_data['Oppo%_vR'])
+ vl.calculate_other_outs(df_data["FB%_vL"], df_data["GB%_vL"], df_data["Oppo%_vL"])
+ vr.calculate_other_outs(df_data["FB%_vR"], df_data["GB%_vR"], df_data["Oppo%_vR"])
- logger.info(f'vL: Total chances: {vl.total_chances()}')
- logger.info(f'vR: Total chances: {vr.total_chances()}')
+ logger.info(f"vL: Total chances: {vl.total_chances()}")
+ logger.info(f"vR: Total chances: {vr.total_chances()}")
- return [vl.custom_to_dict(), vr.custom_to_dict()]
+ vl_dict = vl.custom_to_dict()
+ vr_dict = vr.custom_to_dict()
+
+ try:
+ offense_col = int(df_data["offense_col"]) if "offense_col" in df_data else 1
+ player_id = (
+ int(df_data["player_id"])
+ if "player_id" in df_data
+ else abs(hash(df_data["key_bbref"])) % 10000
+ )
+ vl_card, vr_card = build_pitcher_full_cards(
+ vl, vr, offense_col, player_id, df_data["pitch_hand"]
+ )
+ vl_dict.update(vl_card.card_output())
+ vr_dict.update(vr_card.card_output())
+ except Exception as e:
+ logger.warning(f"Card layout builder failed for {df_data.name}: {e}")
+
+ return [vl_dict, vr_dict]
def total_chances(chance_data):
sum_chances = 0
for key in chance_data:
- if key not in ['id', 'player_id', 'cardset_id', 'vs_hand', 'is_prep']:
+ if key not in ["id", "player_id", "cardset_id", "vs_hand", "is_prep"]:
sum_chances += chance_data[key]
return sum_chances
def soft_rate(pct):
- if pct > .2:
- return 'high'
- elif pct < .1:
- return 'low'
+ if pct > 0.2:
+ return "high"
+ elif pct < 0.1:
+ return "low"
else:
- return 'avg'
+ return "avg"
def med_rate(pct):
- if pct > .65:
- return 'high'
- elif pct < .4:
- return 'low'
+ if pct > 0.65:
+ return "high"
+ elif pct < 0.4:
+ return "low"
else:
- return 'avg'
+ return "avg"
def hard_rate(pct):
- if pct > .4:
- return 'high'
- elif pct < .2:
- return 'low'
+ if pct > 0.4:
+ return "high"
+ elif pct < 0.2:
+ return "low"
else:
- return 'avg'
+ return "avg"
def hr_per_fb_rate(pct):
- if pct > .18:
- return 'high'
- elif pct < .08:
- return 'low'
+ if pct > 0.18:
+ return "high"
+ elif pct < 0.08:
+ return "low"
else:
- return 'avg'
+ return "avg"
def all_singles(row, hits_vl, hits_vr):
if int(row[7]) == 0:
tot_singles_vl = 0
else:
- tot_singles_vl = hits_vl * ((int(row[7]) - int(row[8]) - int(row[9]) - int(row[12]))
- / int(row[7]))
+ tot_singles_vl = hits_vl * (
+ (int(row[7]) - int(row[8]) - int(row[9]) - int(row[12])) / int(row[7])
+ )
if int(row[40]) == 0:
tot_singles_vr = 0
else:
- tot_singles_vr = hits_vr * ((int(row[40]) - int(row[41]) - int(row[42]) - int(row[45]))
- / int(row[40]))
+ tot_singles_vr = hits_vr * (
+ (int(row[40]) - int(row[41]) - int(row[42]) - int(row[45])) / int(row[40])
+ )
return mround(tot_singles_vl), mround(tot_singles_vr)
@@ -447,12 +212,12 @@ def bp_singles(singles_vl, singles_vr):
def wh_singles(rem_si_vl, rem_si_vr, hard_rate_vl, hard_rate_vr):
- if hard_rate_vl == 'low':
+ if hard_rate_vl == "low":
whs_vl = 0
else:
whs_vl = rem_si_vl / 2
- if hard_rate_vr == 'low':
+ if hard_rate_vr == "low":
whs_vr = 0
else:
whs_vr = rem_si_vr / 2
@@ -461,12 +226,12 @@ def wh_singles(rem_si_vl, rem_si_vr, hard_rate_vl, hard_rate_vr):
def one_singles(rem_si_vl, rem_si_vr, soft_rate_vl, soft_rate_vr):
- if soft_rate_vl == 'high':
+ if soft_rate_vl == "high":
oss_vl = rem_si_vl
else:
oss_vl = 0
- if soft_rate_vr == 'high':
+ if soft_rate_vr == "high":
oss_vr = rem_si_vr
else:
oss_vr = 0
@@ -475,19 +240,19 @@ def one_singles(rem_si_vl, rem_si_vr, soft_rate_vl, soft_rate_vr):
def bp_homerun(hr_vl, hr_vr, hr_rate_vl, hr_rate_vr):
- if hr_rate_vl == 'low':
+ if hr_rate_vl == "low":
bphr_vl = hr_vl
- elif hr_rate_vl == 'avg':
- bphr_vl = hr_vl * .75
+ elif hr_rate_vl == "avg":
+ bphr_vl = hr_vl * 0.75
else:
- bphr_vl = hr_vl * .4
+ bphr_vl = hr_vl * 0.4
- if hr_rate_vr == 'low':
+ if hr_rate_vr == "low":
bphr_vr = hr_vr
- elif hr_rate_vr == 'avg':
- bphr_vr = hr_vr * .75
+ elif hr_rate_vr == "avg":
+ bphr_vr = hr_vr * 0.75
else:
- bphr_vr = hr_vr * .4
+ bphr_vr = hr_vr * 0.4
return mround(bphr_vl), mround(bphr_vr)
@@ -500,8 +265,8 @@ def triples(all_xbh_vl, all_xbh_vr, triple_rate_vl, triple_rate_vr):
def two_doubles(all_doubles_vl, all_doubles_vr, soft_rate_vl, soft_rate_vr):
- two_doubles_vl = all_doubles_vl if soft_rate_vl == 'high' else 0
- two_doubles_vr = all_doubles_vr if soft_rate_vr == 'high' else 0
+ two_doubles_vl = all_doubles_vl if soft_rate_vl == "high" else 0
+ two_doubles_vr = all_doubles_vr if soft_rate_vr == "high" else 0
return mround(two_doubles_vl), mround(two_doubles_vr)
@@ -523,21 +288,21 @@ def hbps(all_ob, this_hbp_rate):
def xchecks(pos, all_chances=True):
- if pos.lower() == 'p':
+ if pos.lower() == "p":
return 1 if all_chances else 0
- elif pos.lower() == 'c':
+ elif pos.lower() == "c":
return 3 if all_chances else 2
- elif pos.lower() == '1b':
+ elif pos.lower() == "1b":
return 2 if all_chances else 1
- elif pos.lower() == '2b':
+ elif pos.lower() == "2b":
return 6 if all_chances else 5
- elif pos.lower() == '3b':
+ elif pos.lower() == "3b":
return 3 if all_chances else 2
- elif pos.lower() == 'ss':
+ elif pos.lower() == "ss":
return 7 if all_chances else 6
- elif pos.lower() == 'lf':
+ elif pos.lower() == "lf":
return 2 if all_chances else 1
- elif pos.lower() == 'cf':
+ elif pos.lower() == "cf":
return 3 if all_chances else 2
else:
return 2 if all_chances else 1
@@ -553,7 +318,7 @@ def oppo_fly(all_fly, oppo_rate):
def groundball_a(all_gb, dp_rate):
if all_gb == 0 or dp_rate == 0:
return 0
- elif dp_rate > .6:
+ elif dp_rate > 0.6:
return all_gb
else:
return mround(all_gb * (dp_rate * 1.5))
@@ -563,20 +328,22 @@ def balks(total_balks: int, innings: float, season_pct):
try:
total_balks = int(total_balks)
except ValueError:
- logger.error(f'Could not read balks: {total_balks} / setting to 0')
+ logger.error(f"Could not read balks: {total_balks} / setting to 0")
total_balks = 0
-
+
try:
innings = float(innings)
except ValueError:
- logger.error(f'Could not read innings: {innings} / setting to 0')
+ logger.error(f"Could not read innings: {innings} / setting to 0")
innings = 0
if innings == 0:
return 0
-
- numerator = (total_balks * 290 * season_pct)
- logger.info(f'total_balks: {total_balks} / season_pct {season_pct} / innings: {innings} / numerator: {numerator}')
+
+ numerator = total_balks * 290 * season_pct
+ logger.info(
+ f"total_balks: {total_balks} / season_pct {season_pct} / innings: {innings} / numerator: {numerator}"
+ )
return min(round(numerator / innings), 20)
@@ -592,19 +359,19 @@ def closer_rating(gf: int, saves: int, games: int):
if gf == 0 or games == 0 or saves == 0:
return None
- if gf / games >= .875:
+ if gf / games >= 0.875:
return 6
- elif gf / games >= .8:
+ elif gf / games >= 0.8:
return 5
- elif gf / games >= .7:
+ elif gf / games >= 0.7:
return 4
- elif gf / games >= .55:
+ elif gf / games >= 0.55:
return 3
- elif gf / games >= .4:
+ elif gf / games >= 0.4:
return 2
- elif gf / games >= .25:
+ elif gf / games >= 0.25:
return 1
- elif gf / games >= .1:
+ elif gf / games >= 0.1:
return 0
else:
return None
diff --git a/pitchers/card_builder.py b/pitchers/card_builder.py
new file mode 100644
index 0000000..07d8b20
--- /dev/null
+++ b/pitchers/card_builder.py
@@ -0,0 +1,776 @@
+import copy
+import math
+import logging
+from decimal import Decimal
+
+from card_layout import (
+ FullPitchingCard,
+ PLAY_RESULTS,
+ PlayResult,
+ EXACT_CHANCES,
+ get_chances,
+)
+from pitchers.models import PitchingCardRatingsModel
+
+logger = logging.getLogger(__name__)
+
+
+def build_pitcher_full_cards(
+ ratings_vl: PitchingCardRatingsModel,
+ ratings_vr: PitchingCardRatingsModel,
+ offense_col: int,
+ player_id: int,
+ hand: str,
+) -> tuple:
+ """Build vL and vR FullPitchingCard objects from pre-calculated ratings.
+
+ Returns (vl_card, vr_card).
+ """
+ player_binary = player_id % 2
+
+ vl = FullPitchingCard(offense_col=offense_col, alt_direction=player_binary)
+ vr = FullPitchingCard(offense_col=offense_col, alt_direction=player_binary)
+
+ def assign_pchances(this_card, play, chances, secondary_play=None):
+ r_data = this_card.add_result(play, chances, secondary_play)
+ if r_data:
+ return float(r_data[0]), float(r_data[1])
+ else:
+ for x in EXACT_CHANCES + [Decimal("0.95")]:
+ if x < math.floor(chances - Decimal("0.05")):
+ r_data = this_card.add_result(
+ play, Decimal(math.floor(chances)), secondary_play
+ )
+ if r_data:
+ return float(r_data[0]), float(r_data[1])
+ break
+ if x < chances and secondary_play is not None:
+ r_data = this_card.add_result(play, x, secondary_play)
+ if r_data:
+ return float(r_data[0]), float(r_data[1])
+ return 0, 0
+
+ def get_preferred_mif(ratings):
+ if hand == "L" and ratings.vs_hand == "L":
+ return "ss"
+ elif hand == "L" or (hand == "R" and ratings.vs_hand == "R"):
+ return "2b"
+ else:
+ return "ss"
+
+ for card, data, vs_hand in [
+ (vl, copy.deepcopy(ratings_vl), "L"),
+ (vr, copy.deepcopy(ratings_vr), "R"),
+ ]:
+ new_ratings = PitchingCardRatingsModel(
+ pitchingcard_id=data.pitchingcard_id,
+ pit_hand=data.pit_hand,
+ vs_hand=vs_hand,
+ hard_rate=data.hard_rate,
+ med_rate=data.med_rate,
+ soft_rate=data.soft_rate,
+ xcheck_p=0.0,
+ xcheck_c=0.0,
+ xcheck_1b=0.0,
+ xcheck_2b=0.0,
+ xcheck_3b=0.0,
+ xcheck_ss=0.0,
+ xcheck_lf=0.0,
+ xcheck_cf=0.0,
+ xcheck_rf=0.0,
+ )
+
+ res_chances = data.bp_homerun
+ while res_chances > 0:
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(card, PLAY_RESULTS["bp-hr"], ch)
+ res_chances -= r_val[0]
+ new_ratings.bp_homerun += r_val[0]
+
+ res_chances = data.hbp
+ while res_chances > 0:
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card, PlayResult(full_name="HBP", short_name="HBP"), ch
+ )
+ res_chances -= r_val[0]
+ new_ratings.hbp += r_val[0]
+ if r_val[0] == 0:
+ break
+
+ res_chances = data.xcheck_p
+ while res_chances > 0:
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card, PlayResult(full_name="GB (p) X", short_name="GB (p) X"), ch
+ )
+ res_chances -= r_val[0]
+ new_ratings.xcheck_p += r_val[0]
+
+ res_chances = data.xcheck_c
+ while res_chances > 0:
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card, PlayResult(full_name="CATCH X", short_name="CATCH X"), ch
+ )
+ res_chances -= r_val[0]
+ new_ratings.xcheck_c += r_val[0]
+
+ res_chances = data.xcheck_1b
+ while res_chances > 0:
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card, PlayResult(full_name="GB (1b) X", short_name="GB (1b) X"), ch
+ )
+ res_chances -= r_val[0]
+ new_ratings.xcheck_1b += r_val[0]
+
+ res_chances = data.xcheck_3b
+ while res_chances > 0:
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card, PlayResult(full_name="GB (3b) X", short_name="GB (3b) X"), ch
+ )
+ res_chances -= r_val[0]
+ new_ratings.xcheck_3b += r_val[0]
+
+ res_chances = data.xcheck_rf
+ while res_chances > 0:
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card, PlayResult(full_name="FLY (rf) X", short_name="FLY (rf) X"), ch
+ )
+ res_chances -= r_val[0]
+ new_ratings.xcheck_rf += r_val[0]
+
+ res_chances = data.xcheck_lf
+ while res_chances > 0:
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card, PlayResult(full_name="FLY (lf) X", short_name="FLY (lf) X"), ch
+ )
+ res_chances -= r_val[0]
+ new_ratings.xcheck_lf += r_val[0]
+
+ res_chances = data.xcheck_2b
+ while res_chances > 0:
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card, PlayResult(full_name="GB (2b) X", short_name="GB (2b) X"), ch
+ )
+ res_chances -= r_val[0]
+ new_ratings.xcheck_2b += r_val[0]
+
+ res_chances = data.xcheck_cf
+ while res_chances > 0:
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card, PlayResult(full_name="FLY (cf) X", short_name="FLY (cf) X"), ch
+ )
+ res_chances -= r_val[0]
+ new_ratings.xcheck_cf += r_val[0]
+
+ res_chances = data.xcheck_ss
+ while res_chances > 0:
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card, PlayResult(full_name="GB (ss) X", short_name="GB (ss) X"), ch
+ )
+ res_chances -= r_val[0]
+ new_ratings.xcheck_ss += r_val[0]
+
+ res_chances = data.walk
+ while res_chances >= 1:
+ ch = get_chances(res_chances)
+ if data.strikeout > max(1 - ch, 0):
+ secondary = PlayResult(full_name="strikeout", short_name="so")
+ else:
+ secondary = None
+
+ r_val = assign_pchances(card, PLAY_RESULTS["walk"], ch, secondary)
+ res_chances -= r_val[0]
+ new_ratings.walk += r_val[0]
+ if r_val[1] > 0:
+ data.strikeout -= r_val[1]
+ new_ratings.strikeout += r_val[1]
+
+ if r_val[0] == 0:
+ break
+
+ res_chances = data.homerun
+ retries = 0
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.double_cf > 0:
+ data.double_cf += res_chances
+ elif data.double_two > 0:
+ data.double_two += res_chances
+ elif data.triple > 0:
+ data.triple += res_chances
+ elif data.single_two > 0:
+ data.single_two += res_chances
+ elif data.single_center > 0:
+ data.single_center += res_chances
+ elif data.single_one > 0:
+ data.single_one += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.double_cf > (
+ data.flyout_rf_b + data.flyout_lf_b
+ ) and data.double_cf > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["do-cf"]
+ elif data.flyout_cf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-cf"]
+ elif data.flyout_lf_b > data.flyout_rf_b and data.flyout_lf_b > max(
+ 1 - ch, 0
+ ):
+ secondary = PLAY_RESULTS["fly-lf"]
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-rf"]
+ elif data.double_cf > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["do-cf"]
+ elif data.double_three > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["do***"]
+ elif data.double_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["do**"]
+ elif data.triple > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["tr"]
+ else:
+ secondary = None
+
+ r_val = assign_pchances(card, PLAY_RESULTS["hr"], ch, secondary)
+ res_chances -= r_val[0]
+ new_ratings.homerun += r_val[0]
+ if r_val[1] > 0:
+ if "DO (" in secondary.short_name:
+ data.double_cf -= r_val[1]
+ new_ratings.double_cf += r_val[1]
+ elif "lf" in secondary.short_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif "cf" in secondary.short_name:
+ data.flyout_cf_b -= r_val[1]
+ new_ratings.flyout_cf_b += r_val[1]
+ elif "rf" in secondary.short_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+ elif "***" in secondary.short_name:
+ data.double_three -= r_val[1]
+ new_ratings.double_three += r_val[1]
+ elif "**" in secondary.short_name:
+ data.double_two -= r_val[1]
+ new_ratings.double_two += r_val[1]
+ elif "TR" in secondary.short_name:
+ data.triple -= r_val[1]
+ new_ratings.triple += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.triple
+ retries = 0
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.double_cf > 0:
+ data.double_cf += res_chances
+ elif data.double_two > 0:
+ data.double_two += res_chances
+ elif data.single_two > 0:
+ data.single_two += res_chances
+ elif data.single_center > 0:
+ data.single_center += res_chances
+ elif data.single_one > 0:
+ data.single_one += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.single_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["si**"]
+ elif data.flyout_cf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-cf"]
+ elif data.flyout_lf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-lf"]
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-rf"]
+ elif data.double_cf > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["do-cf"]
+ elif data.double_three > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["do***"]
+ elif data.double_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["do**"]
+ else:
+ secondary = None
+
+ r_val = assign_pchances(card, PLAY_RESULTS["tr"], ch, secondary)
+ res_chances -= r_val[0]
+ new_ratings.triple += r_val[0]
+ if r_val[1] > 0:
+ if "DO (" in secondary.short_name:
+ data.double_cf -= r_val[1]
+ new_ratings.double_cf += r_val[1]
+ elif "lf" in secondary.short_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif "cf" in secondary.short_name:
+ data.flyout_cf_b -= r_val[1]
+ new_ratings.flyout_cf_b += r_val[1]
+ elif "rf" in secondary.short_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+ elif "***" in secondary.short_name:
+ data.double_three -= r_val[1]
+ new_ratings.double_three += r_val[1]
+ elif "SI" in secondary.short_name:
+ data.single_two -= r_val[1]
+ new_ratings.single_two += r_val[1]
+ elif "**" in secondary.short_name:
+ data.double_two -= r_val[1]
+ new_ratings.double_two += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.double_three
+ retries = 0
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.double_cf > 0:
+ data.double_cf += res_chances
+ elif data.double_two > 0:
+ data.double_two += res_chances
+ elif data.single_two > 0:
+ data.single_two += res_chances
+ elif data.single_center > 0:
+ data.single_center += res_chances
+ elif data.single_one > 0:
+ data.single_one += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.single_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["si**"]
+ elif data.flyout_cf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-cf"]
+ elif data.flyout_lf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-lf"]
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-rf"]
+ elif data.double_cf > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["do-cf"]
+ elif data.double_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["do**"]
+ else:
+ secondary = None
+
+ r_val = assign_pchances(card, PLAY_RESULTS["do***"], ch, secondary)
+ res_chances -= r_val[0]
+ new_ratings.double_three += r_val[0]
+ if r_val[1] > 0:
+ if "DO (" in secondary.short_name:
+ data.double_cf -= r_val[1]
+ new_ratings.double_cf += r_val[1]
+ elif "lf" in secondary.short_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif "cf" in secondary.short_name:
+ data.flyout_cf_b -= r_val[1]
+ new_ratings.flyout_cf_b += r_val[1]
+ elif "rf" in secondary.short_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+ elif "SI" in secondary.short_name:
+ data.single_two -= r_val[1]
+ new_ratings.single_two += r_val[1]
+ elif "**" in secondary.short_name:
+ data.double_two -= r_val[1]
+ new_ratings.double_two += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.double_cf
+ retries = 0
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.double_two > 0:
+ data.double_two += res_chances
+ elif data.single_two > 0:
+ data.single_two += res_chances
+ elif data.single_center > 0:
+ data.single_center += res_chances
+ elif data.single_one > 0:
+ data.single_one += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.flyout_cf_b > max(1 - ch, 0):
+ secondary = PlayResult(full_name="fly (cf) B", short_name="fly B")
+ elif data.flyout_lf_b > max(1 - ch, 0):
+ secondary = PlayResult(full_name="fly (lf) B", short_name="fly B")
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PlayResult(full_name="fly (rf) B", short_name="fly b")
+ elif data.single_one > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["si*"]
+ elif data.single_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["si**"]
+ else:
+ secondary = None
+
+ r_val = assign_pchances(card, PLAY_RESULTS["do-cf"], ch, secondary)
+ res_chances -= r_val[0]
+ new_ratings.double_cf += r_val[0]
+ if r_val[1] > 0:
+ if "lf" in secondary.full_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif "rf" in secondary.full_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+ elif "cf" in secondary.full_name:
+ data.flyout_cf_b -= r_val[1]
+ new_ratings.flyout_cf_b += r_val[1]
+ elif "***" in secondary.short_name:
+ data.double_three -= r_val[1]
+ new_ratings.double_three += r_val[1]
+ elif "SI" in secondary.short_name:
+ data.single_two -= r_val[1]
+ new_ratings.single_two += r_val[1]
+ elif "**" in secondary.short_name:
+ data.double_two -= r_val[1]
+ new_ratings.double_two += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.double_two
+ retries = 0
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.single_two > 0:
+ data.single_two += res_chances
+ elif data.single_center > 0:
+ data.single_center += res_chances
+ elif data.single_one > 0:
+ data.single_one += res_chances
+ elif data.walk > 0:
+ data.walk += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.single_two > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["si**"]
+ elif data.single_center > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["si-cf"]
+ elif data.flyout_cf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-cf"]
+ elif data.flyout_lf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-lf"]
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-rf"]
+ else:
+ secondary = None
+
+ r_val = assign_pchances(card, PLAY_RESULTS["do**"], ch, secondary)
+ res_chances -= r_val[0]
+ new_ratings.double_two += r_val[0]
+ if r_val[1] > 0:
+ if "lf" in secondary.full_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif "rf" in secondary.full_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+ elif "cf" in secondary.full_name:
+ data.flyout_cf_b -= r_val[1]
+ new_ratings.flyout_cf_b += r_val[1]
+ elif "SI" in secondary.short_name:
+ data.single_two -= r_val[1]
+ new_ratings.single_two += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.single_two
+ retries = 0
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.single_center > 0:
+ data.single_center += res_chances
+ elif data.single_one > 0:
+ data.single_one += res_chances
+ elif data.walk > 0:
+ data.walk += res_chances
+ break
+
+ pref_mif = get_preferred_mif(new_ratings)
+ ch = get_chances(res_chances)
+ if data.groundout_a > max(1 - ch, 0):
+ temp_mif = get_preferred_mif(new_ratings)
+ pref_mif = "ss" if temp_mif == "2b" else "2b"
+ secondary = PlayResult(
+ full_name=f"gb ({pref_mif}) A", short_name=f"gb ({pref_mif}) A"
+ )
+ elif data.groundout_b > max(1 - ch, 0):
+ secondary = PlayResult(
+ full_name=f"gb ({pref_mif}) B", short_name=f"gb ({pref_mif}) B"
+ )
+ elif data.flyout_cf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-cf"]
+ elif data.flyout_lf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-lf"]
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PLAY_RESULTS["fly-rf"]
+ else:
+ secondary = None
+
+ r_val = assign_pchances(card, PLAY_RESULTS["si**"], ch, secondary)
+ res_chances -= r_val[0]
+ new_ratings.single_two += r_val[0]
+ if r_val[1] > 0:
+ if "lf" in secondary.full_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif "rf" in secondary.full_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+ elif "cf" in secondary.full_name:
+ data.flyout_cf_b -= r_val[1]
+ new_ratings.flyout_cf_b += r_val[1]
+ elif "B" in secondary.short_name:
+ data.groundout_b -= r_val[1]
+ new_ratings.groundout_b += r_val[1]
+ elif "A" in secondary.short_name:
+ data.groundout_a -= r_val[1]
+ new_ratings.groundout_a += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.single_center
+ retries = 0
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.single_one > 0:
+ data.single_one += res_chances
+ elif data.walk > 0:
+ data.walk += res_chances
+ break
+
+ ch = get_chances(res_chances)
+ if data.flyout_cf_b > max(1 - ch, 0):
+ secondary = PlayResult(full_name="fly (cf) B", short_name="fly B")
+ elif (
+ data.flyout_lf_b > max(1 - ch, 0)
+ and data.flyout_lf_b > data.flyout_rf_b
+ ):
+ secondary = PlayResult(full_name="fly (lf) B", short_name="fly B")
+ elif data.flyout_rf_b > max(1 - ch, 0):
+ secondary = PlayResult(full_name="fly (rf) B", short_name="fly B")
+ elif data.flyout_lf_b > max(1 - ch, 0):
+ secondary = PlayResult(full_name="fly (lf) B", short_name="fly B")
+ else:
+ secondary = None
+
+ r_val = assign_pchances(card, PLAY_RESULTS["si-cf"], ch, secondary)
+ res_chances -= r_val[0]
+ new_ratings.single_center += r_val[0]
+ if r_val[1] > 0:
+ if "CF" in secondary.short_name:
+ data.flyout_cf_b -= r_val[1]
+ new_ratings.flyout_cf_b += r_val[1]
+ elif "LF" in secondary.full_name:
+ data.flyout_lf_b -= r_val[1]
+ new_ratings.flyout_lf_b += r_val[1]
+ elif "RF" in secondary.full_name:
+ data.flyout_rf_b -= r_val[1]
+ new_ratings.flyout_rf_b += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.single_one
+ retries = 0
+ while res_chances > 0:
+ if res_chances < 1 or retries > 0:
+ if data.walk > 0:
+ data.walk += res_chances
+ break
+
+ pref_mif = get_preferred_mif(new_ratings)
+ ch = get_chances(res_chances)
+ if data.groundout_b > max(1 - ch, 0):
+ secondary = PlayResult(
+ full_name=f"gb ({pref_mif}) B", short_name=f"gb ({pref_mif}) B"
+ )
+ elif data.groundout_a > max(1 - ch, 0):
+ temp_mif = get_preferred_mif(new_ratings)
+ pref_mif = "ss" if temp_mif == "2b" else "2b"
+ secondary = PlayResult(
+ full_name=f"gb ({pref_mif}) A", short_name=f"gb ({pref_mif}) A"
+ )
+ else:
+ secondary = None
+
+ r_val = assign_pchances(card, PLAY_RESULTS["si*"], ch, secondary)
+ res_chances -= r_val[0]
+ new_ratings.single_one += r_val[0]
+ if r_val[1] > 0:
+ if "B" in secondary.short_name:
+ data.groundout_b -= r_val[1]
+ new_ratings.groundout_b += r_val[1]
+ elif "A" in secondary.short_name:
+ data.groundout_a -= r_val[1]
+ new_ratings.groundout_a += r_val[1]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.bp_single
+ retries = 0
+ while res_chances > 0:
+ if retries > 0:
+ break
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(card, PLAY_RESULTS["bp-si"], ch)
+ res_chances -= r_val[0]
+ new_ratings.bp_single += r_val[0]
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.strikeout
+ retries = 0
+ while res_chances > 0:
+ if retries > 0:
+ break
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card, PlayResult(full_name="strikeout", short_name="so"), ch
+ )
+ res_chances -= r_val[0]
+ new_ratings.strikeout += r_val[0]
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.flyout_cf_b
+ retries = 0
+ while res_chances > 0:
+ if retries > 0:
+ break
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(card, PLAY_RESULTS["fly-cf"], ch)
+ res_chances -= r_val[0]
+ new_ratings.flyout_cf_b += r_val[0]
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.flyout_lf_b
+ retries = 0
+ while res_chances > 0:
+ if retries > 0:
+ break
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(card, PLAY_RESULTS["fly-lf"], ch)
+ res_chances -= r_val[0]
+ new_ratings.flyout_lf_b += r_val[0]
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.flyout_rf_b
+ retries = 0
+ while res_chances > 0:
+ if retries > 0:
+ break
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(card, PLAY_RESULTS["fly-rf"], ch)
+ res_chances -= r_val[0]
+ new_ratings.flyout_rf_b += r_val[0]
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.groundout_a
+ retries = 0
+ while res_chances > 0:
+ if retries > 0:
+ break
+
+ temp_mif = get_preferred_mif(new_ratings)
+ pref_mif = "ss" if temp_mif == "2b" else "2b"
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card,
+ PlayResult(
+ full_name=f"gb ({pref_mif}) A", short_name=f"gb ({pref_mif}) A"
+ ),
+ ch,
+ )
+ res_chances -= r_val[0]
+ new_ratings.groundout_a += r_val[0]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ res_chances = data.groundout_b
+ retries = 0
+ while res_chances > 0:
+ if retries > 0:
+ break
+
+ pref_mif = get_preferred_mif(new_ratings)
+ ch = get_chances(res_chances)
+ r_val = assign_pchances(
+ card,
+ PlayResult(
+ full_name=f"gb ({pref_mif}) B", short_name=f"gb ({pref_mif}) B"
+ ),
+ ch,
+ )
+ res_chances -= r_val[0]
+ new_ratings.groundout_b += r_val[0]
+
+ if r_val[0] == 0:
+ retries += 1
+
+ plays = sorted(
+ [
+ (data.strikeout, "so"),
+ (data.groundout_a, "gb"),
+ (data.flyout_lf_b, "lf"),
+ (data.flyout_rf_b, "rf"),
+ ],
+ key=lambda z: z[0],
+ reverse=True,
+ )
+ count_filler = -1
+ pref_mif = get_preferred_mif(new_ratings)
+ while not card.is_complete():
+ count_filler += 1
+ this_play = plays[count_filler % 4]
+ if this_play[1] == "so":
+ play_res = PlayResult(full_name="strikeout", short_name="strikeout")
+ elif this_play[1] == "gb":
+ this_if = "3b" if pref_mif == "ss" else "1b"
+ play_res = PlayResult(
+ full_name=f"gb ({this_if}) A", short_name=f"gb ({this_if}) A"
+ )
+ elif this_play[1] == "lf":
+ play_res = PLAY_RESULTS["fly-lf"]
+ else:
+ play_res = PLAY_RESULTS["fly-rf"]
+
+ r_raw = card.card_fill(play_res)
+ r_val = (float(r_raw[0]), float(r_raw[1]))
+
+ if this_play[1] == "so":
+ new_ratings.strikeout += r_val[0]
+ elif this_play[1] == "gb":
+ new_ratings.groundout_a += r_val[0]
+ elif this_play[1] == "lf":
+ new_ratings.flyout_lf_b += r_val[0]
+ else:
+ new_ratings.flyout_rf_b += r_val[0]
+
+ card.add_fatigue()
+ new_ratings.calculate_rate_stats()
+
+ return vl, vr
diff --git a/pitchers/models.py b/pitchers/models.py
new file mode 100644
index 0000000..ba0ade4
--- /dev/null
+++ b/pitchers/models.py
@@ -0,0 +1,300 @@
+import math
+
+import pydantic
+
+from creation_helpers import mround, sanitize_chance_output
+from typing import Literal
+from exceptions import logger
+
+
+class PitchingCardRatingsModel(pydantic.BaseModel):
+ pitchingcard_id: int
+ pit_hand: Literal['R', 'L']
+ vs_hand: Literal['R', 'L']
+ all_hits: float = 0.0
+ all_other_ob: float = 0.0
+ all_outs: float = 0.0
+ rem_singles: float = 0.0
+ rem_xbh: float = 0.0
+ rem_hr: float = 0.0
+ rem_doubles: float = 0.0
+ hard_rate: float
+ med_rate: float
+ soft_rate: float
+ # pull_rate: float
+ # center_rate: float
+ # slap_rate: float
+ homerun: float = 0.0
+ bp_homerun: float = 0.0
+ triple: float = 0.0
+ double_three: float = 0.0
+ double_two: float = 0.0
+ double_cf: float = 0.0
+ single_two: float = 0.0
+ single_one: float = 0.0
+ single_center: float = 0.0
+ bp_single: float = 0.0
+ hbp: float = 0.0
+ walk: float = 0.0
+ strikeout: float = 0.0
+ rem_flyballs: float = 0.0
+ flyout_lf_b: float = 0.0
+ flyout_cf_b: float = 0.0
+ flyout_rf_b: float = 0.0
+ rem_groundballs: float = 0.0
+ groundout_a: float = 0.0
+ groundout_b: float = 0.0
+ xcheck_p: float = float(1.0)
+ xcheck_c: float = float(3.0)
+ xcheck_1b: float = float(2.0)
+ xcheck_2b: float = float(6.0)
+ xcheck_3b: float = float(3.0)
+ xcheck_ss: float = float(7.0)
+ xcheck_lf: float = float(2.0)
+ xcheck_cf: float = float(3.0)
+ xcheck_rf: float = float(2.0)
+ avg: float = 0.0
+ obp: float = 0.0
+ slg: float = 0.0
+
+ def total_chances(self):
+ return mround(sum([
+ self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
+ self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk, self.strikeout,
+ self.flyout_lf_b, self.flyout_cf_b, self.flyout_rf_b, self.groundout_a, self.groundout_b, self.xcheck_p,
+ self.xcheck_c, self.xcheck_1b, self.xcheck_2b, self.xcheck_3b, self.xcheck_ss, self.xcheck_lf,
+ self.xcheck_cf, self.xcheck_rf
+ ]))
+
+ def total_hits(self):
+ return mround(sum([
+ self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
+ self.single_two, self.single_one, self.single_center, self.bp_single
+ ]))
+
+ def total_ob(self):
+ return mround(sum([
+ self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
+ self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk
+ ]))
+
+ def total_outs(self):
+ return mround(sum([
+ self.strikeout, self.flyout_lf_b, self.flyout_cf_b, self.flyout_rf_b, self.groundout_a, self.groundout_b,
+ self.xcheck_p, self.xcheck_c, self.xcheck_1b, self.xcheck_2b, self.xcheck_3b, self.xcheck_ss,
+ self.xcheck_lf, self.xcheck_cf, self.xcheck_rf
+ ]))
+
+ def calculate_rate_stats(self):
+ self.avg = mround(self.total_hits() / 108, prec=5, base=0.00001)
+ self.obp = mround((self.total_hits() + self.hbp + self.walk) / 108, prec=5, base=0.00001)
+ self.slg = mround((
+ self.homerun * 4 + self.bp_homerun * 2 + self.triple * 3 + self.double_three * 2 +
+ self.double_two * 2 + self.double_cf * 2 + self.single_two + self.single_one +
+ self.single_center + self.bp_single / 2) / 108, prec=5, base=0.00001)
+
+ def custom_to_dict(self):
+ self.calculate_rate_stats()
+ return {
+ 'pitchingcard_id': self.pitchingcard_id,
+ 'vs_hand': self.vs_hand,
+ 'homerun': self.homerun,
+ 'bp_homerun': self.bp_homerun,
+ 'triple': self.triple,
+ 'double_three': self.double_three,
+ 'double_two': self.double_two,
+ 'double_cf': self.double_cf,
+ 'single_two': self.single_two,
+ 'single_one': self.single_one,
+ 'single_center': self.single_center,
+ 'bp_single': self.bp_single,
+ 'hbp': self.hbp,
+ 'walk': self.walk,
+ 'strikeout': self.strikeout,
+ 'flyout_lf_b': self.flyout_lf_b,
+ 'flyout_cf_b': self.flyout_cf_b,
+ 'flyout_rf_b': self.flyout_rf_b,
+ 'groundout_a': self.groundout_a,
+ 'groundout_b': self.groundout_b,
+ 'xcheck_p': self.xcheck_p,
+ 'xcheck_c': self.xcheck_c,
+ 'xcheck_1b': self.xcheck_1b,
+ 'xcheck_2b': self.xcheck_2b,
+ 'xcheck_3b': self.xcheck_3b,
+ 'xcheck_ss': self.xcheck_ss,
+ 'xcheck_lf': self.xcheck_lf,
+ 'xcheck_cf': self.xcheck_cf,
+ 'xcheck_rf': self.xcheck_rf,
+ 'avg': self.avg,
+ 'obp': self.obp,
+ 'slg': self.slg
+ }
+
+ def calculate_singles(self, szn_hits, szn_singles):
+ if szn_hits == 0:
+ return
+
+ tot = sanitize_chance_output(self.all_hits * (szn_singles / szn_hits))
+ logger.debug(f'total singles: {tot}')
+ self.rem_singles = tot
+
+ self.bp_single = 5.0 if self.rem_singles >= 5 else 0.0
+ self.rem_singles -= self.bp_single
+
+ self.single_two = sanitize_chance_output(self.rem_singles / 2) if self.hard_rate >= 0.2 else 0.0
+ self.rem_singles -= self.single_two
+
+ self.single_one = sanitize_chance_output(self.rem_singles) if self.soft_rate >= .2 else 0.0
+ self.rem_singles -= self.single_one
+
+ self.single_center = sanitize_chance_output(self.rem_singles)
+ self.rem_singles -= self.single_center
+
+ self.rem_xbh = self.all_hits - self.single_center - self.single_one - self.single_two - self.bp_single
+ logger.info(f'remaining singles: {self.rem_singles} / total xbh: {self.rem_xbh}')
+
+ def calculate_xbh(self, szn_doubles, szn_triples, szn_homeruns, hr_per_fb_rate):
+ szn_xbh = szn_doubles + szn_triples + szn_homeruns
+ if szn_xbh == 0:
+ return
+
+ hr_rate = mround(szn_homeruns / szn_xbh)
+ tr_rate = mround(szn_triples / szn_xbh)
+ do_rate = mround(szn_doubles / szn_xbh)
+ logger.info(f'hr%: {hr_rate:.2f} / tr%: {tr_rate:.2f} / do%: {do_rate:.2f}')
+
+ raw_do_chances = sanitize_chance_output(self.rem_xbh * do_rate)
+ logger.info(f'raw do chances: {raw_do_chances}')
+ self.double_two = raw_do_chances if self.soft_rate > .2 else 0.0
+ self.double_cf = mround(raw_do_chances - self.double_two)
+ self.rem_xbh -= mround(self.double_two + self.double_cf + self.double_three)
+ logger.info(f'Double**: {self.double_two} / Double(cf): {self.double_cf} / rem xbh: {self.rem_xbh}')
+
+ self.triple = sanitize_chance_output(self.rem_xbh * tr_rate)
+ self.rem_xbh = mround(self.rem_xbh - self.triple)
+ logger.info(f'Triple: {self.triple} / rem xbh: {self.rem_xbh}')
+
+ raw_hr_chances = self.rem_xbh
+ logger.info(f'raw hr chances: {raw_hr_chances}')
+
+ if hr_per_fb_rate < .08:
+ self.bp_homerun = sanitize_chance_output(raw_hr_chances, min_chances=1.0, rounding=1.0)
+ elif hr_per_fb_rate > .28:
+ self.homerun = raw_hr_chances
+ elif hr_per_fb_rate > .18:
+ self.bp_homerun = sanitize_chance_output(raw_hr_chances * 0.4, min_chances=1.0, rounding=1.0)
+ self.homerun = self.rem_xbh - self.bp_homerun
+ else:
+ self.bp_homerun = sanitize_chance_output(raw_hr_chances * .75, min_chances=1.0, rounding=1.0)
+ self.homerun = mround(self.rem_xbh - self.bp_homerun)
+ logger.info(f'BP HR: {self.bp_homerun} / ND HR: {self.homerun}')
+
+ self.rem_xbh -= (self.bp_homerun + self.homerun)
+ logger.info(f'excess xbh: {self.rem_xbh}')
+
+ if self.rem_xbh > 0:
+ if self.triple > 1:
+ logger.info(f'Passing {self.rem_xbh} xbh to triple')
+ self.triple += self.rem_xbh
+ self.rem_xbh = 0.0
+ elif self.double_cf > 1:
+ logger.info(f'Passing {self.rem_xbh} xbh to double(cf)')
+ self.double_cf += self.rem_xbh
+ self.rem_xbh = 0.0
+ elif self.double_two > 1:
+ logger.info(f'Passing {self.rem_xbh} xbh to double**')
+ self.double_two += self.rem_xbh
+ self.rem_xbh = 0.0
+ elif self.single_two > 1:
+ logger.info(f'Passing {self.rem_xbh} xbh to single**')
+ self.single_two += self.rem_xbh
+ self.rem_xbh = 0.0
+ elif self.single_center > 1:
+ logger.info(f'Passing {self.rem_xbh} xbh to single(cf)')
+ self.single_center += self.rem_xbh
+ self.rem_xbh = 0.0
+ elif self.single_one > 1:
+ logger.info(f'Passing {self.rem_xbh} xbh to single*')
+ self.single_one += self.rem_xbh
+ self.rem_xbh = 0.0
+ else:
+ logger.info(f'Passing {self.rem_xbh} xbh to other_ob')
+ self.all_other_ob += self.rem_xbh
+
+ def calculate_other_ob(self, szn_walks, szn_hbp):
+ if szn_walks + szn_hbp == 0:
+ return
+
+ this_hbp = sanitize_chance_output(self.all_other_ob * szn_hbp / (szn_walks + szn_hbp), rounding=1.0)
+ logger.info(f'hbp value candidate: {this_hbp} / all_other_ob: {self.all_other_ob}')
+ self.hbp = max(min(this_hbp, self.all_other_ob), 0)
+ self.walk = mround(self.all_other_ob - self.hbp)
+ logger.info(f'self.hbp: {self.hbp} / self.walk: {self.walk}')
+
+ def calculate_strikouts(self, szn_strikeouts, szn_ab, szn_hits):
+ denom = max(szn_ab - szn_hits, 1)
+ raw_so = sanitize_chance_output(self.all_outs * (szn_strikeouts * 1.2) / denom)
+ sum_bb_so = self.walk + raw_so
+ excess = sum_bb_so - mround(math.floor(sum_bb_so))
+ logger.info(f'raw_so: {raw_so} / sum_bb_so: {sum_bb_so} / excess: {excess}')
+
+ self.strikeout = max(raw_so - excess - .05, 0.0)
+ if self.strikeout < 0:
+ logger.error(f'Strikeouts are less than zero :confusedpsyduck:')
+
+ def calculate_other_outs(self, fb_pct, gb_pct, oppo_pct):
+ rem_outs = 108 - self.total_chances()
+
+ all_fo = sanitize_chance_output(rem_outs * fb_pct)
+ if self.pit_hand == 'L':
+ self.flyout_lf_b = sanitize_chance_output(all_fo * oppo_pct)
+ else:
+ self.flyout_rf_b = sanitize_chance_output(all_fo * oppo_pct)
+ self.flyout_cf_b = all_fo - self.flyout_lf_b - self.flyout_rf_b
+ rem_outs -= (self.flyout_lf_b + self.flyout_cf_b + self.flyout_rf_b)
+
+ all_gb = rem_outs
+ self.groundout_a = sanitize_chance_output(all_gb * self.soft_rate)
+ self.groundout_b = sanitize_chance_output(all_gb - self.groundout_a)
+
+ rem_chances = 108 - self.total_chances()
+ logger.info(f'Remaining outs: {rem_chances}')
+
+ if self.strikeout > 1:
+ logger.info(f'Passing {rem_chances} outs to strikeouts')
+ self.strikeout += rem_chances
+ elif self.flyout_cf_b > 1:
+ logger.info(f'Passing {rem_chances} outs to fly(cf)')
+ self.flyout_cf_b += rem_chances
+ elif self.flyout_rf_b > 1:
+ logger.info(f'Passing {rem_chances} outs to fly(rf)')
+ self.flyout_rf_b += rem_chances
+ elif self.flyout_lf_b > 1:
+ logger.info(f'Passing {rem_chances} outs to fly(lf)')
+ self.flyout_lf_b += rem_chances
+ elif self.groundout_a > 1:
+ logger.info(f'Passing {rem_chances} outs to gbA')
+ self.groundout_a += rem_chances
+ elif self.single_one > 1:
+ logger.info(f'Passing {rem_chances} outs to single*')
+ self.single_one += rem_chances
+ elif self.single_center > 1:
+ logger.info(f'Passing {rem_chances} outs to single(cf)')
+ self.single_center += rem_chances
+ elif self.single_two > 1:
+ logger.info(f'Passing {rem_chances} outs to single**')
+ self.single_two += rem_chances
+ elif self.double_two > 1:
+ logger.info(f'Passing {rem_chances} outs to double**')
+ self.double_two += rem_chances
+ elif self.double_cf > 1:
+ logger.info(f'Passing {rem_chances} outs to double(cf)')
+ self.double_cf += rem_chances
+ elif self.triple > 1:
+ logger.info(f'Passing {rem_chances} outs to triple')
+ self.triple += rem_chances
+ elif self.homerun > 1:
+ logger.info(f'Passing {rem_chances} outs to homerun')
+ self.homerun += rem_chances
+ else:
+ raise ValueError(f'Could not complete card')
diff --git a/retrosheet_data.py b/retrosheet_data.py
index ccf54f9..c30a35a 100644
--- a/retrosheet_data.py
+++ b/retrosheet_data.py
@@ -16,6 +16,7 @@ from creation_helpers import get_args, CLUB_LIST, FRANCHISE_LIST, sanitize_name
from batters.stat_prep import DataMismatchError
from db_calls import DB_URL, db_get, db_patch, db_post, db_put, db_delete
from exceptions import log_exception, logger
+from offense_col_resolver import resolve_offense_cols, hash_offense_col
from retrosheet_transformer import load_retrosheet_csv
import batters.calcs_batter as cba
import defenders.calcs_defense as cde
@@ -31,68 +32,89 @@ cache.enable()
# )
-RETRO_FILE_PATH = 'data-input/retrosheet/'
-EVENTS_FILENAME = 'retrosheets_events_2005.csv' # Now using transformer for new format compatibility
-PERSONNEL_FILENAME = 'retrosheets_personnel.csv'
-DATA_INPUT_FILE_PATH = 'data-input/2005 Live Cardset/'
-CARD_BASE_URL = f'{DB_URL}/v2/players/'
+RETRO_FILE_PATH = "data-input/retrosheet/"
+EVENTS_FILENAME = (
+ "retrosheets_events_2005.csv" # Now using transformer for new format compatibility
+)
+PERSONNEL_FILENAME = "retrosheets_personnel.csv"
+DATA_INPUT_FILE_PATH = "data-input/2005 Live Cardset/"
+CARD_BASE_URL = f"{DB_URL}/v2/players/"
start_time = datetime.datetime.now()
-RELEASE_DIRECTORY = f'{start_time.year}-{start_time.month}-{start_time.day}'
-PLAYER_DESCRIPTION = 'Live' # Live for Live Series
-# PLAYER_DESCRIPTION = 'May PotM' # PotM for promos
+RELEASE_DIRECTORY = f"{start_time.year}-{start_time.month}-{start_time.day}"
+PLAYER_DESCRIPTION = "Live" # Live for Live Series
+# PLAYER_DESCRIPTION = 'July PotM' # PotM for promos
PROMO_INCLUSION_RETRO_IDS = [
# AL
- # 'rodra001', # Alex Rodriguez (IF)
- # 'menck001', # Kevin Mench (OF)
- # 'colob001', # Bartolo Colon (SP)
- # 'ryanb001', # BJ Ryan (RP)
+ # 'giamj001', # Jason Giambi (IF)
+ # 'cabrm001', # Miguel Cabrera (OF)
+ # 'lackj001', # John Lackey (SP)
+ # 'rivem002', # Mariano Rivera (RP)
# NL
- # 'delgc001', # Carlos Delgado (IF)
- # 'abreb001', # Bobby Abreu (OF)
- # 'haraa001', # Aaron Harang (SP)
- # 'hofft001', # Trevor Hoffman (RP)
+ # 'furcr001', # Rafael Furcal (IF)
+ # 'jenkg001', # Geoff Jenkins (OF)
+ # 'pattj004', # John Patterson (SP)
+ # 'wagnb001', # Billy Wagner (RP)
]
-MIN_PA_VL = 20 if 'live' in PLAYER_DESCRIPTION.lower() else 1 # 1 for PotM
-MIN_PA_VR = 40 if 'live' in PLAYER_DESCRIPTION.lower() else 1 # 1 for PotM
+MIN_PA_VL = 20 if "live" in PLAYER_DESCRIPTION.lower() else 1 # 1 for PotM
+MIN_PA_VR = 40 if "live" in PLAYER_DESCRIPTION.lower() else 1 # 1 for PotM
MIN_TBF_VL = MIN_PA_VL
MIN_TBF_VR = MIN_PA_VR
-CARDSET_ID = 27 if 'live' in PLAYER_DESCRIPTION.lower() else 28 # 27: 2005 Live, 28: 2005 Promos
+CARDSET_ID = (
+ 27 if "live" in PLAYER_DESCRIPTION.lower() else 28
+) # 27: 2005 Live, 28: 2005 Promos
# Per-Update Parameters
SEASON_PCT = 81 / 162 # Through end of July (~half season)
-START_DATE = 20050403 # YYYYMMDD format - 2005 Opening Day
-# END_DATE = 20050531 # YYYYMMDD format - May PotM
-END_DATE = 20050731 # End of July 2005
+START_DATE = 20050403 # YYYYMMDD format - 2005 Opening Day
+END_DATE = 20050731 # End of July 2005
POST_DATA = True
-LAST_WEEK_RATIO = 0.0 if PLAYER_DESCRIPTION == 'Live' else 0.0
+LAST_WEEK_RATIO = 0.0 if PLAYER_DESCRIPTION == "Live" else 0.0
LAST_TWOWEEKS_RATIO = 0.0
LAST_MONTH_RATIO = 0.0
+
def date_from_int(integer_date: int) -> datetime.datetime:
- return datetime.datetime(int(str(integer_date)[:4]), int(str(integer_date)[4:6]), int(str(integer_date)[-2:]))
+ return datetime.datetime(
+ int(str(integer_date)[:4]),
+ int(str(integer_date)[4:6]),
+ int(str(integer_date)[-2:]),
+ )
-def date_math(start_date: int, operator: Literal['+', '-'], day_delta: int = 0, month_delta: int = 0, year_delta: int = 0) -> int:
+def date_math(
+ start_date: int,
+ operator: Literal["+", "-"],
+ day_delta: int = 0,
+ month_delta: int = 0,
+ year_delta: int = 0,
+) -> int:
if len(str(start_date)) != 8:
- log_exception(ValueError, 'Start date must be 8 digits long')
+ log_exception(ValueError, "Start date must be 8 digits long")
if True in [day_delta < 0, month_delta < 0, year_delta < 0]:
- log_exception(ValueError, 'Time deltas must greater than or equal to 0; use `-` operator to go back in time')
+ log_exception(
+ ValueError,
+ "Time deltas must greater than or equal to 0; use `-` operator to go back in time",
+ )
if day_delta > 28:
- log_exception(ValueError, 'Use month_delta for days > 28')
+ log_exception(ValueError, "Use month_delta for days > 28")
if month_delta > 12:
- log_exception(ValueError, 'Use year_delta for months > 12')
+ log_exception(ValueError, "Use year_delta for months > 12")
s_date = date_from_int(start_date)
if year_delta > 0:
s_date = datetime.datetime(
- s_date.year + year_delta if operator == '+' else s_date.year - year_delta,
+ s_date.year + year_delta if operator == "+" else s_date.year - year_delta,
s_date.month,
- s_date.day
+ s_date.day,
)
if month_delta > 0:
month_range = [12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
- new_index = s_date.month + month_delta if operator == '+' else s_date.month - month_delta
+ new_index = (
+ s_date.month + month_delta
+ if operator == "+"
+ else s_date.month - month_delta
+ )
new_month = month_range[(new_index % 12)]
new_year = s_date.year
@@ -101,13 +123,11 @@ def date_math(start_date: int, operator: Literal['+', '-'], day_delta: int = 0,
elif new_index < 1:
new_year -= 1
- s_date = datetime.datetime(
- new_year,
- new_month,
- s_date.day
- )
- fd = s_date + datetime.timedelta(days=day_delta if operator == '+' else day_delta * -1)
- return f'{str(fd.year).zfill(4)}{str(fd.month).zfill(2)}{str(fd.day).zfill(2)}'
+ s_date = datetime.datetime(new_year, new_month, s_date.day)
+ fd = s_date + datetime.timedelta(
+ days=day_delta if operator == "+" else day_delta * -1
+ )
+ return f"{str(fd.year).zfill(4)}{str(fd.month).zfill(2)}{str(fd.day).zfill(2)}"
def weeks_between(start_date_int: int, end_date_int: int) -> int:
@@ -118,70 +138,124 @@ def weeks_between(start_date_int: int, end_date_int: int) -> int:
async def store_defense_to_csv(season: int):
- for position in ['c', '1b', '2b', '3b', 'ss', 'lf', 'cf', 'rf', 'of', 'p']:
+ for position in ["c", "1b", "2b", "3b", "ss", "lf", "cf", "rf", "of", "p"]:
pos_df = cde.get_bbref_fielding_df(position, season)
- pos_df.to_csv(f'{DATA_INPUT_FILE_PATH}defense_{position}.csv')
+ pos_df.to_csv(f"{DATA_INPUT_FILE_PATH}defense_{position}.csv")
await asyncio.sleep(8)
-def get_batting_result_series(plays: pd.DataFrame, event_type: str, pitcher_hand: Literal['r', 'l'], col_name: str) -> pd.Series:
- this_series = plays[(plays.event_type == event_type) & (plays.pitcher_hand == pitcher_hand)].groupby('batter_id').count()['event_type'].astype(int).rename(col_name)
+def get_batting_result_series(
+ plays: pd.DataFrame, event_type: str, pitcher_hand: Literal["r", "l"], col_name: str
+) -> pd.Series:
+ this_series = (
+ plays[(plays.event_type == event_type) & (plays.pitcher_hand == pitcher_hand)]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename(col_name)
+ )
return this_series
-def get_pitching_result_series(plays: pd.DataFrame, event_type: str, batter_hand: Literal['r', 'l'], col_name: str) -> pd.Series:
- this_series = plays[(plays.event_type == event_type) & (plays.batter_hand == batter_hand)].groupby('pitcher_id').count()['event_type'].astype(int).rename(col_name)
+def get_pitching_result_series(
+ plays: pd.DataFrame, event_type: str, batter_hand: Literal["r", "l"], col_name: str
+) -> pd.Series:
+ this_series = (
+ plays[(plays.event_type == event_type) & (plays.batter_hand == batter_hand)]
+ .groupby("pitcher_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename(col_name)
+ )
return this_series
def get_run_stat_df(input_path: str):
- run_data = pd.read_csv(f'{input_path}running.csv') #.set_index('Name-additional'))
+ run_data = pd.read_csv(f"{input_path}running.csv") # .set_index('Name-additional'))
# if 'Player' in run_data:
# run_data = run_data.rename(columns={'Player': 'Full Name'})
# if 'Name' in run_data:
# run_data = run_data.rename(columns={'Name': 'Full Name'})
- if 'Player-additional' in run_data:
- run_data = run_data.rename(columns={'Player-additional': 'key_bbref'})
- if 'Name-additional' in run_data:
- run_data = run_data.rename(columns={'Name-additional': 'key_bbref'})
-
- run_data = run_data[['key_bbref', 'Tm', 'ROE', 'XI', 'RS%', 'SBO', 'SB', 'CS', 'SB%', 'SB2', 'CS2', 'SB3', 'CS3', 'SBH', 'CSH', 'PO', 'PCS', 'OOB', 'OOB1', 'OOB2', 'OOB3', 'OOBHm', 'BT', 'XBT%', '1stS', '1stS2', '1stS3', '1stD', '1stD3', '1stDH', '2ndS', '2ndS3', '2ndSH']]
+ if "Player-additional" in run_data:
+ run_data = run_data.rename(columns={"Player-additional": "key_bbref"})
+ if "Name-additional" in run_data:
+ run_data = run_data.rename(columns={"Name-additional": "key_bbref"})
+
+ run_data = run_data[
+ [
+ "key_bbref",
+ "Tm",
+ "ROE",
+ "XI",
+ "RS%",
+ "SBO",
+ "SB",
+ "CS",
+ "SB%",
+ "SB2",
+ "CS2",
+ "SB3",
+ "CS3",
+ "SBH",
+ "CSH",
+ "PO",
+ "PCS",
+ "OOB",
+ "OOB1",
+ "OOB2",
+ "OOB3",
+ "OOBHm",
+ "BT",
+ "XBT%",
+ "1stS",
+ "1stS2",
+ "1stS3",
+ "1stD",
+ "1stD3",
+ "1stDH",
+ "2ndS",
+ "2ndS3",
+ "2ndSH",
+ ]
+ ]
run_data = run_data.fillna(0)
- return run_data.set_index('key_bbref')
+ return run_data.set_index("key_bbref")
def get_periph_stat_df(input_path: str):
- pit_data = pd.read_csv(f'{input_path}pitching.csv')
- if 'Player-additional' in pit_data:
- pit_data = pit_data.rename(columns={'Player-additional': 'key_bbref'})
- if 'Name-additional' in pit_data:
- pit_data = pit_data.rename(columns={'Name-additional': 'key_bbref'})
- if 'Team' in pit_data:
- pit_data = pit_data.rename(columns={'Team': 'Tm'})
-
- pit_data = pit_data[['key_bbref', 'Tm', 'GF', 'SHO', 'SV', 'IP', 'BK', 'WP']]
-
+ pit_data = pd.read_csv(f"{input_path}pitching.csv")
+ if "Player-additional" in pit_data:
+ pit_data = pit_data.rename(columns={"Player-additional": "key_bbref"})
+ if "Name-additional" in pit_data:
+ pit_data = pit_data.rename(columns={"Name-additional": "key_bbref"})
+ if "Team" in pit_data:
+ pit_data = pit_data.rename(columns={"Team": "Tm"})
+
+ pit_data = pit_data[["key_bbref", "Tm", "GF", "SHO", "SV", "IP", "BK", "WP"]]
+
pit_data = pit_data.fillna(0)
return pit_data
-def get_player_ids(plays: pd.DataFrame, which: Literal['batters', 'pitchers']) -> pd.DataFrame:
- RETRO_PLAYERS = pd.read_csv(f'{RETRO_FILE_PATH}{PERSONNEL_FILENAME}')
- id_key = 'batter_id' if which == 'batters' else 'pitcher_id'
+def get_player_ids(
+ plays: pd.DataFrame, which: Literal["batters", "pitchers"]
+) -> pd.DataFrame:
+ RETRO_PLAYERS = pd.read_csv(f"{RETRO_FILE_PATH}{PERSONNEL_FILENAME}")
+ id_key = "batter_id" if which == "batters" else "pitcher_id"
players = pd.DataFrame()
- unique_players = pd.Series(plays[id_key].unique()).to_frame('id')
+ unique_players = pd.Series(plays[id_key].unique()).to_frame("id")
players = pd.merge(
left=RETRO_PLAYERS,
right=unique_players,
- how='right',
- left_on='id',
- right_on='id'
- ).rename(columns={'id': id_key})
+ how="right",
+ left_on="id",
+ right_on="id",
+ ).rename(columns={"id": id_key})
- if PLAYER_DESCRIPTION not in ['Live', '1998']:
- msg = f'Player description is *{PLAYER_DESCRIPTION}* so dropping players not in PROMO_INCLUSION_RETRO_IDS'
+ if PLAYER_DESCRIPTION not in ["Live", "1998"]:
+ msg = f"Player description is *{PLAYER_DESCRIPTION}* so dropping players not in PROMO_INCLUSION_RETRO_IDS"
print(msg)
logger.info(msg)
# players = players.drop(players[players.index not in PROMO_INCLUSION_RETRO_IDS].index)
@@ -189,351 +263,737 @@ def get_player_ids(plays: pd.DataFrame, which: Literal['batters', 'pitchers']) -
def get_pids(row):
# return get_all_pybaseball_ids([row[id_key]], 'retro', full_name=f'{row["use_name"]} {row["last_name"]}')
- pull = pb.playerid_reverse_lookup([row[id_key]], key_type='retro')
+ pull = pb.playerid_reverse_lookup([row[id_key]], key_type="retro")
if len(pull.values) == 0:
- print(f'Could not find id {row[id_key]} in pybaseball lookup')
- return pull.loc[0][['key_mlbam', 'key_retro', 'key_bbref', 'key_fangraphs']]
-
- players = players[[id_key, 'last_name', 'use_name']]
+ print(f"Could not find id {row[id_key]} in pybaseball lookup")
+ return pull.loc[0][["key_mlbam", "key_retro", "key_bbref", "key_fangraphs"]]
+
+ players = players[[id_key, "last_name", "use_name"]]
start_time = datetime.datetime.now()
other_ids = players.apply(get_pids, axis=1)
end_time = datetime.datetime.now()
- print(f'ID lookup: {(end_time - start_time).total_seconds():.2f}s')
+ print(f"ID lookup: {(end_time - start_time).total_seconds():.2f}s")
def clean_first(row):
- return sanitize_name(row['use_name'])
+ return sanitize_name(row["use_name"])
+
def clean_last(row):
- return sanitize_name(row['last_name'])
-
- players['use_name'] = players.apply(clean_first, axis=1)
- players['last_name'] = players.apply(clean_last, axis=1)
+ return sanitize_name(row["last_name"])
+
+ players["use_name"] = players.apply(clean_first, axis=1)
+ players["last_name"] = players.apply(clean_last, axis=1)
players = pd.merge(
- left=players,
- right=other_ids,
- left_on=id_key,
- right_on='key_retro'
+ left=players, right=other_ids, left_on=id_key, right_on="key_retro"
)
players = players.set_index(id_key)
def get_bat_hand(row):
- pa_vl = plays[(plays.batter_id == row['key_retro']) & (plays.pitcher_hand == 'l')].groupby('result_batter_hand').count()['game_id'].astype(int)
- pa_vr = plays[(plays.batter_id == row['key_retro']) & (plays.pitcher_hand == 'r')].groupby('result_batter_hand').count()['game_id'].astype(int)
+ pa_vl = (
+ plays[(plays.batter_id == row["key_retro"]) & (plays.pitcher_hand == "l")]
+ .groupby("result_batter_hand")
+ .count()["game_id"]
+ .astype(int)
+ )
+ pa_vr = (
+ plays[(plays.batter_id == row["key_retro"]) & (plays.pitcher_hand == "r")]
+ .groupby("result_batter_hand")
+ .count()["game_id"]
+ .astype(int)
+ )
- l_vs_l = 0 if 'l' not in pa_vl else pa_vl['l']
- l_vs_r = 0 if 'l' not in pa_vr else pa_vr['l']
- r_vs_l = 0 if 'r' not in pa_vl else pa_vl['r']
- r_vs_r = 0 if 'r' not in pa_vr else pa_vr['r']
+ l_vs_l = 0 if "l" not in pa_vl else pa_vl["l"]
+ l_vs_r = 0 if "l" not in pa_vr else pa_vr["l"]
+ r_vs_l = 0 if "r" not in pa_vl else pa_vl["r"]
+ r_vs_r = 0 if "r" not in pa_vr else pa_vr["r"]
# If player ONLY batted from one side (zero PAs from other side), classify as single-handed
if sum([l_vs_l, l_vs_r]) == 0 and sum([r_vs_l, r_vs_r]) > 0:
- return 'R'
+ return "R"
elif sum([l_vs_l, l_vs_r]) > 0 and sum([r_vs_l, r_vs_r]) == 0:
- return 'L'
+ return "L"
# If player batted from both sides (even if limited sample), they're a switch hitter
# This correctly identifies switch hitters regardless of total PA count
if sum([l_vs_l, l_vs_r]) > 0 and sum([r_vs_l, r_vs_r]) > 0:
- return 'S'
+ return "S"
# Fallback for edge cases (shouldn't reach here in normal flow)
if sum([l_vs_l, l_vs_r]) > sum([r_vs_l, r_vs_r]):
- return 'L'
+ return "L"
else:
- return 'R'
-
+ return "R"
+
def get_pitch_hand(row):
- first_event = plays.drop_duplicates('pitcher_id').loc[plays.pitcher_id == row['key_retro'], 'pitcher_hand']
+ first_event = plays.drop_duplicates("pitcher_id").loc[
+ plays.pitcher_id == row["key_retro"], "pitcher_hand"
+ ]
return first_event.item()
- if which == 'batters':
- players['bat_hand'] = players.apply(get_bat_hand, axis=1)
- elif which == 'pitchers':
- players['pitch_hand'] = players.apply(get_pitch_hand, axis=1)
+ if which == "batters":
+ players["bat_hand"] = players.apply(get_bat_hand, axis=1)
+ elif which == "pitchers":
+ players["pitch_hand"] = players.apply(get_pitch_hand, axis=1)
return players
-def get_base_batting_df(file_path: str, start_date: int, end_date: int) -> list[pd.DataFrame, pd.DataFrame]:
+def get_base_batting_df(
+ file_path: str, start_date: int, end_date: int
+) -> list[pd.DataFrame, pd.DataFrame]:
all_plays = load_retrosheet_csv(file_path)
- all_plays['date'] = all_plays['game_id'].str[3:-1].astype(int)
- date_plays = all_plays[(all_plays.date >= start_date) & (all_plays.date <= end_date)]
+ all_plays["date"] = all_plays["game_id"].str[3:-1].astype(int)
+ date_plays = all_plays[
+ (all_plays.date >= start_date) & (all_plays.date <= end_date)
+ ]
- all_player_ids = get_player_ids(all_plays, 'batters')
+ all_player_ids = get_player_ids(all_plays, "batters")
- pal_series = date_plays[(date_plays.batter_event == 't') & (date_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vL')
+ pal_series = (
+ date_plays[(date_plays.batter_event == "t") & (date_plays.pitcher_hand == "l")]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("PA_vL")
+ )
bs = pd.concat([all_player_ids, pal_series], axis=1)
- par_series = date_plays[(date_plays.batter_event == 't') & (date_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vR')
+ par_series = (
+ date_plays[(date_plays.batter_event == "t") & (date_plays.pitcher_hand == "r")]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("PA_vR")
+ )
bs = pd.concat([bs, par_series], axis=1)
- abl_series = date_plays[(date_plays.ab == 't') & (date_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('AB_vL')
+ abl_series = (
+ date_plays[(date_plays.ab == "t") & (date_plays.pitcher_hand == "l")]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("AB_vL")
+ )
bs = pd.concat([bs, abl_series], axis=1)
- abr_series = date_plays[(date_plays.ab == 't') & (date_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('AB_vR')
+ abr_series = (
+ date_plays[(date_plays.ab == "t") & (date_plays.pitcher_hand == "r")]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("AB_vR")
+ )
bs = pd.concat([bs, abr_series], axis=1)
- core_df = bs.dropna().query(f'PA_vL >= {MIN_PA_VL} & PA_vR >= {MIN_PA_VR}')
- if LAST_WEEK_RATIO == 0.0 and LAST_TWOWEEKS_RATIO == 0.0 and LAST_MONTH_RATIO == 0.0:
+ core_df = bs.dropna().query(f"PA_vL >= {MIN_PA_VL} & PA_vR >= {MIN_PA_VR}")
+ if (
+ LAST_WEEK_RATIO == 0.0
+ and LAST_TWOWEEKS_RATIO == 0.0
+ and LAST_MONTH_RATIO == 0.0
+ ):
return [date_plays, core_df]
base_num_weeks = weeks_between(start_date, end_date)
if LAST_WEEK_RATIO > 0:
- new_start = date_math(end_date, '-', day_delta=7)
- week_plays = date_plays[(date_plays.date >= int(new_start)) & (date_plays.date <= end_date)]
+ new_start = date_math(end_date, "-", day_delta=7)
+ week_plays = date_plays[
+ (date_plays.date >= int(new_start)) & (date_plays.date <= end_date)
+ ]
copies = round(base_num_weeks * LAST_WEEK_RATIO)
for x in range(copies):
date_plays = pd.concat([date_plays, week_plays], ignore_index=True)
-
+
if LAST_TWOWEEKS_RATIO > 0:
- new_start = date_math(end_date, '-', day_delta=14)
- week_plays = date_plays[(date_plays.date >= int(new_start)) & (date_plays.date <= end_date)]
+ new_start = date_math(end_date, "-", day_delta=14)
+ week_plays = date_plays[
+ (date_plays.date >= int(new_start)) & (date_plays.date <= end_date)
+ ]
copies = round(base_num_weeks * LAST_TWOWEEKS_RATIO)
for x in range(copies):
date_plays = pd.concat([date_plays, week_plays], ignore_index=True)
-
+
if LAST_MONTH_RATIO > 0:
- new_start = date_math(end_date, '-', month_delta=1)
- week_plays = date_plays[(date_plays.date >= int(new_start)) & (date_plays.date <= end_date)]
+ new_start = date_math(end_date, "-", month_delta=1)
+ week_plays = date_plays[
+ (date_plays.date >= int(new_start)) & (date_plays.date <= end_date)
+ ]
copies = round(base_num_weeks * LAST_MONTH_RATIO)
for x in range(copies):
date_plays = pd.concat([date_plays, week_plays], ignore_index=True)
- core_df = core_df.drop(columns=['PA_vL', 'PA_vR', 'AB_vL', 'AB_vR'])
+ core_df = core_df.drop(columns=["PA_vL", "PA_vR", "AB_vL", "AB_vR"])
- pal_series = date_plays[(date_plays.batter_event == 't') & (date_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vL')
- core_df['PA_vL'] = pal_series
+ pal_series = (
+ date_plays[(date_plays.batter_event == "t") & (date_plays.pitcher_hand == "l")]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("PA_vL")
+ )
+ core_df["PA_vL"] = pal_series
- par_series = date_plays[(date_plays.batter_event == 't') & (date_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vR')
- core_df['PA_vR'] = par_series
+ par_series = (
+ date_plays[(date_plays.batter_event == "t") & (date_plays.pitcher_hand == "r")]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("PA_vR")
+ )
+ core_df["PA_vR"] = par_series
- abl_series = date_plays[(date_plays.ab == 't') & (date_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('AB_vL')
- core_df['AB_vL'] = abl_series
+ abl_series = (
+ date_plays[(date_plays.ab == "t") & (date_plays.pitcher_hand == "l")]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("AB_vL")
+ )
+ core_df["AB_vL"] = abl_series
- abr_series = date_plays[(date_plays.ab == 't') & (date_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('AB_vR')
- core_df['AB_vR'] = abr_series
+ abr_series = (
+ date_plays[(date_plays.ab == "t") & (date_plays.pitcher_hand == "r")]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("AB_vR")
+ )
+ core_df["AB_vR"] = abr_series
return [date_plays, core_df]
-def get_base_pitching_df(file_path: str, start_date: int, end_date: int) -> list[pd.DataFrame, pd.DataFrame]:
+def get_base_pitching_df(
+ file_path: str, start_date: int, end_date: int
+) -> list[pd.DataFrame, pd.DataFrame]:
all_plays = load_retrosheet_csv(file_path)
- all_plays['date'] = all_plays['game_id'].str[3:-1].astype(int)
- date_plays = all_plays[(all_plays.date >= start_date) & (all_plays.date <= end_date)]
+ all_plays["date"] = all_plays["game_id"].str[3:-1].astype(int)
+ date_plays = all_plays[
+ (all_plays.date >= start_date) & (all_plays.date <= end_date)
+ ]
- ps = get_player_ids(all_plays, 'pitchers')
+ ps = get_player_ids(all_plays, "pitchers")
- tbfl_series = date_plays[(date_plays.batter_event == 't') & (date_plays.batter_hand == 'l')].groupby('pitcher_id').count()['event_type'].astype(int).rename('TBF_vL')
+ tbfl_series = (
+ date_plays[(date_plays.batter_event == "t") & (date_plays.batter_hand == "l")]
+ .groupby("pitcher_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("TBF_vL")
+ )
ps = pd.concat([ps, tbfl_series], axis=1)
- tbfr_series = date_plays[(date_plays.batter_event == 't') & (date_plays.batter_hand == 'r')].groupby('pitcher_id').count()['event_type'].astype(int).rename('TBF_vR')
+ tbfr_series = (
+ date_plays[(date_plays.batter_event == "t") & (date_plays.batter_hand == "r")]
+ .groupby("pitcher_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("TBF_vR")
+ )
ps = pd.concat([ps, tbfr_series], axis=1)
- abl_series = date_plays[(date_plays.ab == 't') & (date_plays.batter_hand == 'l')].groupby('pitcher_id').count()['event_type'].astype(int).rename('AB_vL')
+ abl_series = (
+ date_plays[(date_plays.ab == "t") & (date_plays.batter_hand == "l")]
+ .groupby("pitcher_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("AB_vL")
+ )
ps = pd.concat([ps, abl_series], axis=1)
- abr_series = date_plays[(date_plays.ab == 't') & (date_plays.batter_hand == 'r')].groupby('pitcher_id').count()['event_type'].astype(int).rename('AB_vR')
+ abr_series = (
+ date_plays[(date_plays.ab == "t") & (date_plays.batter_hand == "r")]
+ .groupby("pitcher_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("AB_vR")
+ )
ps = pd.concat([ps, abr_series], axis=1)
- if PLAYER_DESCRIPTION in ['Live', '1998']:
- core_df = ps.dropna().query(f'TBF_vL >= {MIN_TBF_VL} & TBF_vR >= {MIN_TBF_VR}')
+ if PLAYER_DESCRIPTION in ["Live", "1998"]:
+ core_df = ps.dropna().query(f"TBF_vL >= {MIN_TBF_VL} & TBF_vR >= {MIN_TBF_VR}")
else:
core_df = ps.dropna()
- if LAST_WEEK_RATIO == 0.0 and LAST_TWOWEEKS_RATIO == 0.0 and LAST_MONTH_RATIO == 0.0:
+ if (
+ LAST_WEEK_RATIO == 0.0
+ and LAST_TWOWEEKS_RATIO == 0.0
+ and LAST_MONTH_RATIO == 0.0
+ ):
return [date_plays, core_df]
base_num_weeks = weeks_between(start_date, end_date)
if LAST_WEEK_RATIO > 0:
- new_start = date_math(end_date, '-', day_delta=7)
- week_plays = date_plays[(date_plays.date >= int(new_start)) & (date_plays.date <= end_date)]
+ new_start = date_math(end_date, "-", day_delta=7)
+ week_plays = date_plays[
+ (date_plays.date >= int(new_start)) & (date_plays.date <= end_date)
+ ]
copies = round(base_num_weeks * LAST_WEEK_RATIO)
for x in range(copies):
date_plays = pd.concat([date_plays, week_plays], ignore_index=True)
-
+
if LAST_TWOWEEKS_RATIO > 0:
- new_start = date_math(end_date, '-', day_delta=14)
- week_plays = date_plays[(date_plays.date >= int(new_start)) & (date_plays.date <= end_date)]
+ new_start = date_math(end_date, "-", day_delta=14)
+ week_plays = date_plays[
+ (date_plays.date >= int(new_start)) & (date_plays.date <= end_date)
+ ]
copies = round(base_num_weeks * LAST_TWOWEEKS_RATIO)
for x in range(copies):
date_plays = pd.concat([date_plays, week_plays], ignore_index=True)
-
+
if LAST_MONTH_RATIO > 0:
- new_start = date_math(end_date, '-', month_delta=1)
- week_plays = date_plays[(date_plays.date >= int(new_start)) & (date_plays.date <= end_date)]
+ new_start = date_math(end_date, "-", month_delta=1)
+ week_plays = date_plays[
+ (date_plays.date >= int(new_start)) & (date_plays.date <= end_date)
+ ]
copies = round(base_num_weeks * LAST_MONTH_RATIO)
for x in range(copies):
date_plays = pd.concat([date_plays, week_plays], ignore_index=True)
- core_df = core_df.drop(columns=['TBF_vL', 'TBF_vR', 'AB_vL', 'AB_vR'])
+ core_df = core_df.drop(columns=["TBF_vL", "TBF_vR", "AB_vL", "AB_vR"])
- tbfl_series = date_plays[(date_plays.batter_event == 't') & (date_plays.batter_hand == 'l')].groupby('pitcher_id').count()['event_type'].astype(int).rename('TBF_vL')
- core_df['TBF_vL'] = tbfl_series
+ tbfl_series = (
+ date_plays[(date_plays.batter_event == "t") & (date_plays.batter_hand == "l")]
+ .groupby("pitcher_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("TBF_vL")
+ )
+ core_df["TBF_vL"] = tbfl_series
- tbfr_series = date_plays[(date_plays.batter_event == 't') & (date_plays.batter_hand == 'r')].groupby('pitcher_id').count()['event_type'].astype(int).rename('TBF_vR')
- core_df['TBF_vR'] = tbfr_series
+ tbfr_series = (
+ date_plays[(date_plays.batter_event == "t") & (date_plays.batter_hand == "r")]
+ .groupby("pitcher_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("TBF_vR")
+ )
+ core_df["TBF_vR"] = tbfr_series
- abl_series = date_plays[(date_plays.ab == 't') & (date_plays.batter_hand == 'l')].groupby('pitcher_id').count()['event_type'].astype(int).rename('AB_vL')
- core_df['AB_vL'] = abl_series
+ abl_series = (
+ date_plays[(date_plays.ab == "t") & (date_plays.batter_hand == "l")]
+ .groupby("pitcher_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("AB_vL")
+ )
+ core_df["AB_vL"] = abl_series
- abr_series = date_plays[(date_plays.ab == 't') & (date_plays.batter_hand == 'r')].groupby('pitcher_id').count()['event_type'].astype(int).rename('AB_vR')
- core_df['AB_vR'] = abr_series
+ abr_series = (
+ date_plays[(date_plays.ab == "t") & (date_plays.batter_hand == "r")]
+ .groupby("pitcher_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("AB_vR")
+ )
+ core_df["AB_vR"] = abr_series
return [date_plays, core_df]
def get_med_vL(row):
- high = 0.9 - row['Hard%_vL']
- low = (row['SLG_vL'] - row['AVG_vL']) * 1.5
- return round(max(min(high, low),0.1), 5)
+ high = 0.9 - row["Hard%_vL"]
+ low = (row["SLG_vL"] - row["AVG_vL"]) * 1.5
+ return round(max(min(high, low), 0.1), 5)
+
+
def get_med_vR(row):
- high = 0.9 - row['Hard%_vR']
- low = (row['SLG_vR'] - row['AVG_vR']) * 1.5
- return round(max(min(high, low),0.1), 5)
+ high = 0.9 - row["Hard%_vR"]
+ low = (row["SLG_vR"] - row["AVG_vR"]) * 1.5
+ return round(max(min(high, low), 0.1), 5)
-def get_batting_stats_by_date(retro_file_path, start_date: int, end_date: int) -> pd.DataFrame:
+def get_batting_stats_by_date(
+ retro_file_path, start_date: int, end_date: int
+) -> pd.DataFrame:
start = datetime.datetime.now()
- all_plays, batting_stats = get_base_batting_df(retro_file_path, start_date, end_date)
- print(f'Get base dataframe: {(datetime.datetime.now() - start).total_seconds():.2f}s')
+ all_plays, batting_stats = get_base_batting_df(
+ retro_file_path, start_date, end_date
+ )
+ print(
+ f"Get base dataframe: {(datetime.datetime.now() - start).total_seconds():.2f}s"
+ )
start = datetime.datetime.now()
- all_player_ids = batting_stats['key_retro']
- logging.info(f'all_player_ids: {all_player_ids}')
- all_plays = all_plays[all_plays['batter_id'].isin(all_player_ids)]
- print(f'Shrink all_plays: {(datetime.datetime.now() - start).total_seconds():.2f}s')
+ all_player_ids = batting_stats["key_retro"]
+ logging.info(f"all_player_ids: {all_player_ids}")
+ all_plays = all_plays[all_plays["batter_id"].isin(all_player_ids)]
+ print(f"Shrink all_plays: {(datetime.datetime.now() - start).total_seconds():.2f}s")
# Basic counting stats
start = datetime.datetime.now()
for event_type, vs_hand, col_name in [
- ('home run', 'r', 'HR_vR'),
- ('home run', 'l', 'HR_vL'),
- ('single', 'r', '1B_vR'),
- ('single', 'l', '1B_vL'),
- ('double', 'r', '2B_vR'),
- ('double', 'l', '2B_vL'),
- ('triple', 'r', '3B_vR'),
- ('triple', 'l', '3B_vL'),
- ('walk', 'r', 'BB_vR'),
- ('walk', 'l', 'BB_vL'),
- ('strikeout', 'r', 'SO_vR'),
- ('strikeout', 'l', 'SO_vL'),
- ('hit by pitch', 'r', 'HBP_vR'),
- ('hit by pitch', 'l', 'HBP_vL')
+ ("home run", "r", "HR_vR"),
+ ("home run", "l", "HR_vL"),
+ ("single", "r", "1B_vR"),
+ ("single", "l", "1B_vL"),
+ ("double", "r", "2B_vR"),
+ ("double", "l", "2B_vL"),
+ ("triple", "r", "3B_vR"),
+ ("triple", "l", "3B_vL"),
+ ("walk", "r", "BB_vR"),
+ ("walk", "l", "BB_vL"),
+ ("strikeout", "r", "SO_vR"),
+ ("strikeout", "l", "SO_vL"),
+ ("hit by pitch", "r", "HBP_vR"),
+ ("hit by pitch", "l", "HBP_vL"),
]:
- this_series = get_batting_result_series(all_plays, event_type, vs_hand, col_name)
+ this_series = get_batting_result_series(
+ all_plays, event_type, vs_hand, col_name
+ )
batting_stats[col_name] = this_series
- print(f'Count basic stats: {(datetime.datetime.now() - start).total_seconds():.2f}s')
-
+ print(
+ f"Count basic stats: {(datetime.datetime.now() - start).total_seconds():.2f}s"
+ )
+
# Bespoke counting stats
start = datetime.datetime.now()
+
def get_fb_vl(row):
- return all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.batted_ball_type == 'f') & (all_plays.pitcher_hand == 'l')].count()['event_type'].astype(int)
+ return (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"])
+ & (all_plays.batted_ball_type == "f")
+ & (all_plays.pitcher_hand == "l")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+
def get_fb_vr(row):
- return all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.batted_ball_type == 'f') & (all_plays.pitcher_hand == 'r')].count()['event_type'].astype(int)
-
+ return (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"])
+ & (all_plays.batted_ball_type == "f")
+ & (all_plays.pitcher_hand == "r")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+
def get_gb_vl(row):
- return all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.batted_ball_type == 'G') & (all_plays.pitcher_hand == 'l')].count()['event_type'].astype(int)
+ return (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"])
+ & (all_plays.batted_ball_type == "G")
+ & (all_plays.pitcher_hand == "l")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+
def get_gb_vr(row):
- return all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.batted_ball_type == 'G') & (all_plays.pitcher_hand == 'r')].count()['event_type'].astype(int)
-
+ return (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"])
+ & (all_plays.batted_ball_type == "G")
+ & (all_plays.pitcher_hand == "r")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+
def get_ld_vl(row):
- return all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.batted_ball_type == 'l') & (all_plays.pitcher_hand == 'l')].count()['event_type'].astype(int)
+ return (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"])
+ & (all_plays.batted_ball_type == "l")
+ & (all_plays.pitcher_hand == "l")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+
def get_ld_vr(row):
- return all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.batted_ball_type == 'l') & (all_plays.pitcher_hand == 'r')].count()['event_type'].astype(int)
-
+ return (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"])
+ & (all_plays.batted_ball_type == "l")
+ & (all_plays.pitcher_hand == "r")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+
def get_gdp_vl(row):
- dp = all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'l') & (all_plays.dp == 't')].count()['event_type'].astype(int)
- tp = all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'l') & (all_plays.tp == 't')].count()['event_type'].astype(int)
+ dp = (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"])
+ & (all_plays.batter_event == "t")
+ & (all_plays.pitcher_hand == "l")
+ & (all_plays.dp == "t")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+ tp = (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"])
+ & (all_plays.batter_event == "t")
+ & (all_plays.pitcher_hand == "l")
+ & (all_plays.tp == "t")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
return dp + tp
+
def get_gdp_vr(row):
- dp = all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'r') & (all_plays.dp == 't')].count()['event_type'].astype(int)
- tp = all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'r') & (all_plays.tp == 't')].count()['event_type'].astype(int)
+ dp = (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"])
+ & (all_plays.batter_event == "t")
+ & (all_plays.pitcher_hand == "r")
+ & (all_plays.dp == "t")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+ tp = (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"])
+ & (all_plays.batter_event == "t")
+ & (all_plays.pitcher_hand == "r")
+ & (all_plays.tp == "t")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
return dp + tp
-
+
def get_bunt(row):
- return all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.bunt == 't')].count()['event_type'].astype(int)
-
- batting_stats['FB_vL'] = batting_stats.apply(get_fb_vl, axis=1)
- batting_stats['FB_vR'] = batting_stats.apply(get_fb_vr, axis=1)
+ return (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"]) & (all_plays.bunt == "t")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
- batting_stats['GB_vL'] = batting_stats.apply(get_gb_vl, axis=1)
- batting_stats['GB_vR'] = batting_stats.apply(get_gb_vr, axis=1)
+ batting_stats["FB_vL"] = batting_stats.apply(get_fb_vl, axis=1)
+ batting_stats["FB_vR"] = batting_stats.apply(get_fb_vr, axis=1)
- batting_stats['LD_vL'] = batting_stats.apply(get_ld_vl, axis=1)
- batting_stats['LD_vR'] = batting_stats.apply(get_ld_vr, axis=1)
+ batting_stats["GB_vL"] = batting_stats.apply(get_gb_vl, axis=1)
+ batting_stats["GB_vR"] = batting_stats.apply(get_gb_vr, axis=1)
- batting_stats['GDP_vL'] = batting_stats.apply(get_gdp_vl, axis=1)
- batting_stats['GDP_vR'] = batting_stats.apply(get_gdp_vr, axis=1)
+ batting_stats["LD_vL"] = batting_stats.apply(get_ld_vl, axis=1)
+ batting_stats["LD_vR"] = batting_stats.apply(get_ld_vr, axis=1)
- batting_stats['Bunts'] = batting_stats.apply(get_bunt, axis=1)
- print(f'Custom counting stats: {(datetime.datetime.now() - start).total_seconds():.2f}s')
+ batting_stats["GDP_vL"] = batting_stats.apply(get_gdp_vl, axis=1)
+ batting_stats["GDP_vR"] = batting_stats.apply(get_gdp_vr, axis=1)
+
+ batting_stats["Bunts"] = batting_stats.apply(get_bunt, axis=1)
+ print(
+ f"Custom counting stats: {(datetime.datetime.now() - start).total_seconds():.2f}s"
+ )
# Infield Hit %
- ifh_vl = all_plays[(all_plays.hit_val.str.contains('1|2|3')) & (all_plays.pitcher_hand == 'l') & (all_plays.hit_location.str.contains('1|2|3|4|5|6')) & (~all_plays.hit_location.str.contains('D', na=False))].groupby('batter_id').count()['event_type'].astype(int).rename('ifh_vL')
- ifh_vr = all_plays[(all_plays.hit_val.str.contains('1|2|3')) & (all_plays.pitcher_hand == 'r') & (all_plays.hit_location.str.contains('1|2|3|4|5|6')) & (~all_plays.hit_location.str.contains('D', na=False))].groupby('batter_id').count()['event_type'].astype(int).rename('ifh_vR')
-
- batting_stats['ifh_vL'] = ifh_vl
- batting_stats['ifh_vR'] = ifh_vr
+ ifh_vl = (
+ all_plays[
+ (all_plays.hit_val.str.contains("1|2|3"))
+ & (all_plays.pitcher_hand == "l")
+ & (all_plays.hit_location.str.contains("1|2|3|4|5|6"))
+ & (~all_plays.hit_location.str.contains("D", na=False))
+ ]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("ifh_vL")
+ )
+ ifh_vr = (
+ all_plays[
+ (all_plays.hit_val.str.contains("1|2|3"))
+ & (all_plays.pitcher_hand == "r")
+ & (all_plays.hit_location.str.contains("1|2|3|4|5|6"))
+ & (~all_plays.hit_location.str.contains("D", na=False))
+ ]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("ifh_vR")
+ )
+
+ batting_stats["ifh_vL"] = ifh_vl
+ batting_stats["ifh_vR"] = ifh_vr
def get_pull_vl(row):
- pull_loc = '5|7' if row['bat_hand'] != 'L' else '3|9'
- x = all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.pitcher_hand == 'l') & (all_plays.hit_location.str.contains(pull_loc))].count()['event_type'].astype(int)
+ pull_loc = "5|7" if row["bat_hand"] != "L" else "3|9"
+ x = (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"])
+ & (all_plays.pitcher_hand == "l")
+ & (all_plays.hit_location.str.contains(pull_loc))
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
return x
+
def get_pull_vr(row):
- pull_loc = '5|7' if row['bat_hand'] == 'R' else '3|9'
- x = all_plays[(all_plays.batter_id == row['key_retro']) & (all_plays.pitcher_hand == 'r') & (all_plays.hit_location.str.contains(pull_loc))].count()['event_type'].astype(int)
+ pull_loc = "5|7" if row["bat_hand"] == "R" else "3|9"
+ x = (
+ all_plays[
+ (all_plays.batter_id == row["key_retro"])
+ & (all_plays.pitcher_hand == "r")
+ & (all_plays.hit_location.str.contains(pull_loc))
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
return x
# Bespoke Queries
- batting_stats['pull_vL'] = batting_stats.apply(get_pull_vl, axis=1)
- batting_stats['pull_vR'] = batting_stats.apply(get_pull_vr, axis=1)
+ batting_stats["pull_vL"] = batting_stats.apply(get_pull_vl, axis=1)
+ batting_stats["pull_vR"] = batting_stats.apply(get_pull_vr, axis=1)
- center_vl = all_plays[(all_plays.pitcher_hand == 'l') & (all_plays.hit_location.str.contains('1|4|6|8'))].groupby('batter_id').count()['event_type'].astype(int).rename('center_vl')
- center_vr = all_plays[(all_plays.pitcher_hand == 'r') & (all_plays.hit_location.str.contains('1|4|6|8'))].groupby('batter_id').count()['event_type'].astype(int).rename('center_vr')
+ center_vl = (
+ all_plays[
+ (all_plays.pitcher_hand == "l")
+ & (all_plays.hit_location.str.contains("1|4|6|8"))
+ ]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("center_vl")
+ )
+ center_vr = (
+ all_plays[
+ (all_plays.pitcher_hand == "r")
+ & (all_plays.hit_location.str.contains("1|4|6|8"))
+ ]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("center_vr")
+ )
- batting_stats['center_vL'] = center_vl
- batting_stats['center_vR'] = center_vr
+ batting_stats["center_vL"] = center_vl
+ batting_stats["center_vR"] = center_vr
- oppo_vl = all_plays[(all_plays.pitcher_hand == 'l') & (all_plays.hit_location.str.contains('5|7'))].groupby('batter_id').count()['event_type'].astype(int).rename('oppo_vL')
- oppo_vr = all_plays[(all_plays.pitcher_hand == 'r') & (all_plays.hit_location.str.contains('5|7'))].groupby('batter_id').count()['event_type'].astype(int).rename('oppo_vR')
+ oppo_vl = (
+ all_plays[
+ (all_plays.pitcher_hand == "l")
+ & (all_plays.hit_location.str.contains("5|7"))
+ ]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("oppo_vL")
+ )
+ oppo_vr = (
+ all_plays[
+ (all_plays.pitcher_hand == "r")
+ & (all_plays.hit_location.str.contains("5|7"))
+ ]
+ .groupby("batter_id")
+ .count()["event_type"]
+ .astype(int)
+ .rename("oppo_vR")
+ )
+
+ batting_stats["oppo_vL"] = oppo_vl
+ batting_stats["oppo_vR"] = oppo_vr
- batting_stats['oppo_vL'] = oppo_vl
- batting_stats['oppo_vR'] = oppo_vr
-
# fill na to 0 following counting stats
batting_stats = batting_stats.fillna(0)
# Calculated Fields
start = datetime.datetime.now()
- batting_stats['H_vL'] = batting_stats['1B_vL'] + batting_stats['2B_vL'] + batting_stats['3B_vL'] + batting_stats['HR_vL']
- batting_stats['H_vR'] = batting_stats['1B_vR'] + batting_stats['2B_vR'] + batting_stats['3B_vR'] + batting_stats['HR_vR']
+ batting_stats["H_vL"] = (
+ batting_stats["1B_vL"]
+ + batting_stats["2B_vL"]
+ + batting_stats["3B_vL"]
+ + batting_stats["HR_vL"]
+ )
+ batting_stats["H_vR"] = (
+ batting_stats["1B_vR"]
+ + batting_stats["2B_vR"]
+ + batting_stats["3B_vR"]
+ + batting_stats["HR_vR"]
+ )
- batting_stats['AVG_vL'] = round(batting_stats['H_vL'] / batting_stats['AB_vL'], 5)
- batting_stats['AVG_vR'] = round(batting_stats['H_vR'] / batting_stats['AB_vR'], 5)
+ batting_stats["AVG_vL"] = round(batting_stats["H_vL"] / batting_stats["AB_vL"], 5)
+ batting_stats["AVG_vR"] = round(batting_stats["H_vR"] / batting_stats["AB_vR"], 5)
- batting_stats['OBP_vL'] = round((batting_stats['H_vL'] + batting_stats['BB_vL'] + batting_stats['HBP_vL']) / batting_stats['PA_vL'], 5)
- batting_stats['OBP_vR'] = round((batting_stats['H_vR'] + batting_stats['BB_vR'] + batting_stats['HBP_vR']) / batting_stats['PA_vR'], 5)
+ batting_stats["OBP_vL"] = round(
+ (batting_stats["H_vL"] + batting_stats["BB_vL"] + batting_stats["HBP_vL"])
+ / batting_stats["PA_vL"],
+ 5,
+ )
+ batting_stats["OBP_vR"] = round(
+ (batting_stats["H_vR"] + batting_stats["BB_vR"] + batting_stats["HBP_vR"])
+ / batting_stats["PA_vR"],
+ 5,
+ )
- batting_stats['SLG_vL'] = round((batting_stats['1B_vL'] + batting_stats['2B_vL'] * 2 + batting_stats['3B_vL'] * 3 + batting_stats['HR_vL'] * 4) / batting_stats['AB_vL'], 5)
- batting_stats['SLG_vR'] = round((batting_stats['1B_vR'] + batting_stats['2B_vR'] * 2 + batting_stats['3B_vR'] * 3 + batting_stats['HR_vR'] * 4) / batting_stats['AB_vR'], 5)
+ batting_stats["SLG_vL"] = round(
+ (
+ batting_stats["1B_vL"]
+ + batting_stats["2B_vL"] * 2
+ + batting_stats["3B_vL"] * 3
+ + batting_stats["HR_vL"] * 4
+ )
+ / batting_stats["AB_vL"],
+ 5,
+ )
+ batting_stats["SLG_vR"] = round(
+ (
+ batting_stats["1B_vR"]
+ + batting_stats["2B_vR"] * 2
+ + batting_stats["3B_vR"] * 3
+ + batting_stats["HR_vR"] * 4
+ )
+ / batting_stats["AB_vR"],
+ 5,
+ )
- batting_stats['HR/FB_vL'] = round(batting_stats['HR_vL'] / batting_stats['FB_vL'], 5)
- batting_stats['HR/FB_vR'] = round(batting_stats['HR_vR'] / batting_stats['FB_vR'], 5)
+ batting_stats["HR/FB_vL"] = round(
+ batting_stats["HR_vL"] / batting_stats["FB_vL"], 5
+ )
+ batting_stats["HR/FB_vR"] = round(
+ batting_stats["HR_vR"] / batting_stats["FB_vR"], 5
+ )
- batting_stats['FB%_vL'] = round(batting_stats['FB_vL'] / (batting_stats['FB_vL'] + batting_stats['GB_vL'] + batting_stats['LD_vL']), 5)
- batting_stats['FB%_vR'] = round(batting_stats['FB_vR'] / (batting_stats['FB_vR'] + batting_stats['GB_vR'] + batting_stats['LD_vR']), 5)
+ batting_stats["FB%_vL"] = round(
+ batting_stats["FB_vL"]
+ / (batting_stats["FB_vL"] + batting_stats["GB_vL"] + batting_stats["LD_vL"]),
+ 5,
+ )
+ batting_stats["FB%_vR"] = round(
+ batting_stats["FB_vR"]
+ / (batting_stats["FB_vR"] + batting_stats["GB_vR"] + batting_stats["LD_vR"]),
+ 5,
+ )
- batting_stats['GB%_vL'] = round(batting_stats['GB_vL'] / (batting_stats['FB_vL'] + batting_stats['GB_vL'] + batting_stats['LD_vL']), 5)
- batting_stats['GB%_vR'] = round(batting_stats['GB_vR'] / (batting_stats['FB_vR'] + batting_stats['GB_vR'] + batting_stats['LD_vR']), 5)
+ batting_stats["GB%_vL"] = round(
+ batting_stats["GB_vL"]
+ / (batting_stats["FB_vL"] + batting_stats["GB_vL"] + batting_stats["LD_vL"]),
+ 5,
+ )
+ batting_stats["GB%_vR"] = round(
+ batting_stats["GB_vR"]
+ / (batting_stats["FB_vR"] + batting_stats["GB_vR"] + batting_stats["LD_vR"]),
+ 5,
+ )
- batting_stats['LD%_vL'] = round(batting_stats['LD_vL'] / (batting_stats['FB_vL'] + batting_stats['GB_vL'] + batting_stats['LD_vL']), 5)
- batting_stats['LD%_vR'] = round(batting_stats['LD_vR'] / (batting_stats['FB_vR'] + batting_stats['GB_vR'] + batting_stats['LD_vR']), 5)
+ batting_stats["LD%_vL"] = round(
+ batting_stats["LD_vL"]
+ / (batting_stats["FB_vL"] + batting_stats["GB_vL"] + batting_stats["LD_vL"]),
+ 5,
+ )
+ batting_stats["LD%_vR"] = round(
+ batting_stats["LD_vR"]
+ / (batting_stats["FB_vR"] + batting_stats["GB_vR"] + batting_stats["LD_vR"]),
+ 5,
+ )
- batting_stats['Hard%_vL'] = round(0.2 + batting_stats['SLG_vL'] - batting_stats['AVG_vL'], 5)
- batting_stats['Hard%_vR'] = round(0.2 + batting_stats['SLG_vR'] - batting_stats['AVG_vR'], 5)
+ batting_stats["Hard%_vL"] = round(
+ 0.2 + batting_stats["SLG_vL"] - batting_stats["AVG_vL"], 5
+ )
+ batting_stats["Hard%_vR"] = round(
+ 0.2 + batting_stats["SLG_vR"] - batting_stats["AVG_vR"], 5
+ )
# def get_med_vL(row):
# high = 0.9 - row['Hard%_vL']
@@ -544,148 +1004,367 @@ def get_batting_stats_by_date(retro_file_path, start_date: int, end_date: int) -
# low = (row['SLG_vR'] - row['AVG_vR']) * 1.5
# return round(max(min(high, low),0.1), 5)
- batting_stats['Med%_vL'] = batting_stats.apply(get_med_vL, axis=1)
- batting_stats['Med%_vR'] = batting_stats.apply(get_med_vR, axis=1)
+ batting_stats["Med%_vL"] = batting_stats.apply(get_med_vL, axis=1)
+ batting_stats["Med%_vR"] = batting_stats.apply(get_med_vR, axis=1)
- batting_stats['Soft%_vL'] = round(1 - batting_stats['Hard%_vL'] - batting_stats['Med%_vL'], 5)
- batting_stats['Soft%_vR'] = round(1 - batting_stats['Hard%_vR'] - batting_stats['Med%_vR'], 5)
+ batting_stats["Soft%_vL"] = round(
+ 1 - batting_stats["Hard%_vL"] - batting_stats["Med%_vL"], 5
+ )
+ batting_stats["Soft%_vR"] = round(
+ 1 - batting_stats["Hard%_vR"] - batting_stats["Med%_vR"], 5
+ )
- batting_stats['IFH%_vL'] = round(batting_stats['ifh_vL'] / batting_stats['H_vL'], 5)
- batting_stats['IFH%_vR'] = round(batting_stats['ifh_vR'] / batting_stats['H_vR'], 5)
+ batting_stats["IFH%_vL"] = round(batting_stats["ifh_vL"] / batting_stats["H_vL"], 5)
+ batting_stats["IFH%_vR"] = round(batting_stats["ifh_vR"] / batting_stats["H_vR"], 5)
- pull_val = round(batting_stats['pull_vL'] / (batting_stats['pull_vL'] + batting_stats['center_vL'] + batting_stats['oppo_vL']), 5)
- batting_stats['Pull%_vL'] = pull_val.clip(0.1, 0.6)
- pull_val = round(batting_stats['pull_vR'] / (batting_stats['pull_vR'] + batting_stats['center_vR'] + batting_stats['oppo_vR']), 5)
- batting_stats['Pull%_vR'] = pull_val.clip(0.1, 0.6)
+ pull_val = round(
+ batting_stats["pull_vL"]
+ / (
+ batting_stats["pull_vL"]
+ + batting_stats["center_vL"]
+ + batting_stats["oppo_vL"]
+ ),
+ 5,
+ )
+ batting_stats["Pull%_vL"] = pull_val.clip(0.1, 0.6)
+ pull_val = round(
+ batting_stats["pull_vR"]
+ / (
+ batting_stats["pull_vR"]
+ + batting_stats["center_vR"]
+ + batting_stats["oppo_vR"]
+ ),
+ 5,
+ )
+ batting_stats["Pull%_vR"] = pull_val.clip(0.1, 0.6)
- cent_val = round(batting_stats['center_vL'] / (batting_stats['pull_vL'] + batting_stats['center_vL'] + batting_stats['oppo_vL']), 5)
- batting_stats['Cent%_vL'] = cent_val.clip(0.1, 0.6)
- cent_val = round(batting_stats['center_vL'] / (batting_stats['pull_vR'] + batting_stats['center_vR'] + batting_stats['oppo_vR']), 5)
- batting_stats['Cent%_vR'] = cent_val.clip(0.1, 0.6)
+ cent_val = round(
+ batting_stats["center_vL"]
+ / (
+ batting_stats["pull_vL"]
+ + batting_stats["center_vL"]
+ + batting_stats["oppo_vL"]
+ ),
+ 5,
+ )
+ batting_stats["Cent%_vL"] = cent_val.clip(0.1, 0.6)
+ cent_val = round(
+ batting_stats["center_vL"]
+ / (
+ batting_stats["pull_vR"]
+ + batting_stats["center_vR"]
+ + batting_stats["oppo_vR"]
+ ),
+ 5,
+ )
+ batting_stats["Cent%_vR"] = cent_val.clip(0.1, 0.6)
- batting_stats['Oppo%_vL'] = round(1 - batting_stats['Pull%_vL'] - batting_stats['Cent%_vL'], 5)
- batting_stats['Oppo%_vR'] = round(1 - batting_stats['Pull%_vR'] - batting_stats['Cent%_vR'], 5)
+ batting_stats["Oppo%_vL"] = round(
+ 1 - batting_stats["Pull%_vL"] - batting_stats["Cent%_vL"], 5
+ )
+ batting_stats["Oppo%_vR"] = round(
+ 1 - batting_stats["Pull%_vR"] - batting_stats["Cent%_vR"], 5
+ )
batting_stats = batting_stats.fillna(0)
- print(f'Calculated fields: {(datetime.datetime.now() - start).total_seconds():.2f}s')
+ print(
+ f"Calculated fields: {(datetime.datetime.now() - start).total_seconds():.2f}s"
+ )
return batting_stats
-def get_pitching_stats_by_date(retro_file_path, start_date: int, end_date: int) -> pd.DataFrame:
+def get_pitching_stats_by_date(
+ retro_file_path, start_date: int, end_date: int
+) -> pd.DataFrame:
start = datetime.datetime.now()
- all_plays, pitching_stats = get_base_pitching_df(retro_file_path, start_date, end_date)
- print(f'Get base dataframe: {(datetime.datetime.now() - start).total_seconds():.2f}s')
+ all_plays, pitching_stats = get_base_pitching_df(
+ retro_file_path, start_date, end_date
+ )
+ print(
+ f"Get base dataframe: {(datetime.datetime.now() - start).total_seconds():.2f}s"
+ )
start = datetime.datetime.now()
- all_player_ids = pitching_stats['key_retro']
- all_plays = all_plays[all_plays['pitcher_id'].isin(all_player_ids)]
- print(f'Shrink all_plays: {(datetime.datetime.now() - start).total_seconds():.2f}s')
+ all_player_ids = pitching_stats["key_retro"]
+ all_plays = all_plays[all_plays["pitcher_id"].isin(all_player_ids)]
+ print(f"Shrink all_plays: {(datetime.datetime.now() - start).total_seconds():.2f}s")
# Basic counting stats
start = datetime.datetime.now()
for event_type, vs_hand, col_name in [
- ('home run', 'r', 'HR_vR'),
- ('home run', 'l', 'HR_vL'),
- ('single', 'r', '1B_vR'),
- ('single', 'l', '1B_vL'),
- ('double', 'r', '2B_vR'),
- ('double', 'l', '2B_vL'),
- ('triple', 'r', '3B_vR'),
- ('triple', 'l', '3B_vL'),
- ('walk', 'r', 'BB_vR'),
- ('walk', 'l', 'BB_vL'),
- ('strikeout', 'r', 'SO_vR'),
- ('strikeout', 'l', 'SO_vL'),
- ('hit by pitch', 'r', 'HBP_vR'),
- ('hit by pitch', 'l', 'HBP_vL'),
- ('intentional walk', 'l', 'IBB_vL'),
- ('intentional walk', 'r', 'IBB_vR')
+ ("home run", "r", "HR_vR"),
+ ("home run", "l", "HR_vL"),
+ ("single", "r", "1B_vR"),
+ ("single", "l", "1B_vL"),
+ ("double", "r", "2B_vR"),
+ ("double", "l", "2B_vL"),
+ ("triple", "r", "3B_vR"),
+ ("triple", "l", "3B_vL"),
+ ("walk", "r", "BB_vR"),
+ ("walk", "l", "BB_vL"),
+ ("strikeout", "r", "SO_vR"),
+ ("strikeout", "l", "SO_vL"),
+ ("hit by pitch", "r", "HBP_vR"),
+ ("hit by pitch", "l", "HBP_vL"),
+ ("intentional walk", "l", "IBB_vL"),
+ ("intentional walk", "r", "IBB_vR"),
]:
- this_series = get_pitching_result_series(all_plays, event_type, vs_hand, col_name)
+ this_series = get_pitching_result_series(
+ all_plays, event_type, vs_hand, col_name
+ )
pitching_stats[col_name] = this_series
- print(f'Count basic stats: {(datetime.datetime.now() - start).total_seconds():.2f}s')
+ print(
+ f"Count basic stats: {(datetime.datetime.now() - start).total_seconds():.2f}s"
+ )
pitching_stats = pitching_stats.fillna(0)
-
+
# Bespoke counting stats
start = datetime.datetime.now()
+
def get_fb_vl(row):
- return all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batted_ball_type == 'f') & (all_plays.batter_hand == 'l')].count()['event_type'].astype(int)
+ return (
+ all_plays[
+ (all_plays.pitcher_id == row["key_retro"])
+ & (all_plays.batted_ball_type == "f")
+ & (all_plays.batter_hand == "l")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+
def get_fb_vr(row):
- return all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batted_ball_type == 'f') & (all_plays.batter_hand == 'r')].count()['event_type'].astype(int)
-
+ return (
+ all_plays[
+ (all_plays.pitcher_id == row["key_retro"])
+ & (all_plays.batted_ball_type == "f")
+ & (all_plays.batter_hand == "r")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+
def get_gb_vl(row):
- return all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batted_ball_type == 'G') & (all_plays.batter_hand == 'l')].count()['event_type'].astype(int)
+ return (
+ all_plays[
+ (all_plays.pitcher_id == row["key_retro"])
+ & (all_plays.batted_ball_type == "G")
+ & (all_plays.batter_hand == "l")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+
def get_gb_vr(row):
- return all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batted_ball_type == 'G') & (all_plays.batter_hand == 'r')].count()['event_type'].astype(int)
-
+ return (
+ all_plays[
+ (all_plays.pitcher_id == row["key_retro"])
+ & (all_plays.batted_ball_type == "G")
+ & (all_plays.batter_hand == "r")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+
def get_ld_vl(row):
- return all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batted_ball_type == 'l') & (all_plays.pitcher_hand == 'l')].count()['event_type'].astype(int)
+ return (
+ all_plays[
+ (all_plays.pitcher_id == row["key_retro"])
+ & (all_plays.batted_ball_type == "l")
+ & (all_plays.pitcher_hand == "l")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+
def get_ld_vr(row):
- return all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batted_ball_type == 'l') & (all_plays.pitcher_hand == 'r')].count()['event_type'].astype(int)
-
- pitching_stats['FB_vL'] = pitching_stats.apply(get_fb_vl, axis=1)
- pitching_stats['FB_vR'] = pitching_stats.apply(get_fb_vr, axis=1)
+ return (
+ all_plays[
+ (all_plays.pitcher_id == row["key_retro"])
+ & (all_plays.batted_ball_type == "l")
+ & (all_plays.pitcher_hand == "r")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
- pitching_stats['GB_vL'] = pitching_stats.apply(get_gb_vl, axis=1)
- pitching_stats['GB_vR'] = pitching_stats.apply(get_gb_vr, axis=1)
+ pitching_stats["FB_vL"] = pitching_stats.apply(get_fb_vl, axis=1)
+ pitching_stats["FB_vR"] = pitching_stats.apply(get_fb_vr, axis=1)
- pitching_stats['LD_vL'] = pitching_stats.apply(get_ld_vl, axis=1)
- pitching_stats['LD_vR'] = pitching_stats.apply(get_ld_vr, axis=1)
+ pitching_stats["GB_vL"] = pitching_stats.apply(get_gb_vl, axis=1)
+ pitching_stats["GB_vR"] = pitching_stats.apply(get_gb_vr, axis=1)
- pitching_stats['H_vL'] = pitching_stats['1B_vL'] + pitching_stats['2B_vL'] + pitching_stats['3B_vL'] + pitching_stats['HR_vL']
- pitching_stats['H_vR'] = pitching_stats['1B_vR'] + pitching_stats['2B_vR'] + pitching_stats['3B_vR'] + pitching_stats['HR_vR']
+ pitching_stats["LD_vL"] = pitching_stats.apply(get_ld_vl, axis=1)
+ pitching_stats["LD_vR"] = pitching_stats.apply(get_ld_vr, axis=1)
- print(f'Custom counting stats: {(datetime.datetime.now() - start).total_seconds():.2f}s')
+ pitching_stats["H_vL"] = (
+ pitching_stats["1B_vL"]
+ + pitching_stats["2B_vL"]
+ + pitching_stats["3B_vL"]
+ + pitching_stats["HR_vL"]
+ )
+ pitching_stats["H_vR"] = (
+ pitching_stats["1B_vR"]
+ + pitching_stats["2B_vR"]
+ + pitching_stats["3B_vR"]
+ + pitching_stats["HR_vR"]
+ )
+
+ print(
+ f"Custom counting stats: {(datetime.datetime.now() - start).total_seconds():.2f}s"
+ )
# Calculated Fields
"""
Oppo%_vL & R
"""
start = datetime.datetime.now()
- pitching_stats['AVG_vL'] = round(pitching_stats['H_vL'] / pitching_stats['AB_vL'], 5)
- pitching_stats['AVG_vR'] = round(pitching_stats['H_vR'] / pitching_stats['AB_vR'], 5)
+ pitching_stats["AVG_vL"] = round(
+ pitching_stats["H_vL"] / pitching_stats["AB_vL"], 5
+ )
+ pitching_stats["AVG_vR"] = round(
+ pitching_stats["H_vR"] / pitching_stats["AB_vR"], 5
+ )
- pitching_stats['OBP_vL'] = round((pitching_stats['H_vL'] + pitching_stats['BB_vL'] + pitching_stats['HBP_vL'] + pitching_stats['IBB_vL']) / pitching_stats['TBF_vL'], 5)
- pitching_stats['OBP_vR'] = round((pitching_stats['H_vR'] + pitching_stats['BB_vR'] + pitching_stats['HBP_vR'] + pitching_stats['IBB_vR']) / pitching_stats['TBF_vR'], 5)
+ pitching_stats["OBP_vL"] = round(
+ (
+ pitching_stats["H_vL"]
+ + pitching_stats["BB_vL"]
+ + pitching_stats["HBP_vL"]
+ + pitching_stats["IBB_vL"]
+ )
+ / pitching_stats["TBF_vL"],
+ 5,
+ )
+ pitching_stats["OBP_vR"] = round(
+ (
+ pitching_stats["H_vR"]
+ + pitching_stats["BB_vR"]
+ + pitching_stats["HBP_vR"]
+ + pitching_stats["IBB_vR"]
+ )
+ / pitching_stats["TBF_vR"],
+ 5,
+ )
- pitching_stats['SLG_vL'] = round((pitching_stats['1B_vL'] + pitching_stats['2B_vL'] * 2 + pitching_stats['3B_vL'] * 3 + pitching_stats['HR_vL'] * 4) / pitching_stats['AB_vL'], 5)
- pitching_stats['SLG_vR'] = round((pitching_stats['1B_vR'] + pitching_stats['2B_vR'] * 2 + pitching_stats['3B_vR'] * 3 + pitching_stats['HR_vR'] * 4) / pitching_stats['AB_vR'], 5)
+ pitching_stats["SLG_vL"] = round(
+ (
+ pitching_stats["1B_vL"]
+ + pitching_stats["2B_vL"] * 2
+ + pitching_stats["3B_vL"] * 3
+ + pitching_stats["HR_vL"] * 4
+ )
+ / pitching_stats["AB_vL"],
+ 5,
+ )
+ pitching_stats["SLG_vR"] = round(
+ (
+ pitching_stats["1B_vR"]
+ + pitching_stats["2B_vR"] * 2
+ + pitching_stats["3B_vR"] * 3
+ + pitching_stats["HR_vR"] * 4
+ )
+ / pitching_stats["AB_vR"],
+ 5,
+ )
- pitching_stats['HR/FB_vL'] = round(pitching_stats['HR_vL'] / pitching_stats['FB_vL'], 5)
- pitching_stats['HR/FB_vR'] = round(pitching_stats['HR_vR'] / pitching_stats['FB_vR'], 5)
+ pitching_stats["HR/FB_vL"] = round(
+ pitching_stats["HR_vL"] / pitching_stats["FB_vL"], 5
+ )
+ pitching_stats["HR/FB_vR"] = round(
+ pitching_stats["HR_vR"] / pitching_stats["FB_vR"], 5
+ )
- pitching_stats['Hard%_vL'] = round(0.2 + pitching_stats['SLG_vL'] - pitching_stats['AVG_vL'], 5)
- pitching_stats['Hard%_vR'] = round(0.2 + pitching_stats['SLG_vR'] - pitching_stats['AVG_vR'], 5)
+ pitching_stats["Hard%_vL"] = round(
+ 0.2 + pitching_stats["SLG_vL"] - pitching_stats["AVG_vL"], 5
+ )
+ pitching_stats["Hard%_vR"] = round(
+ 0.2 + pitching_stats["SLG_vR"] - pitching_stats["AVG_vR"], 5
+ )
- pitching_stats['Med%_vL'] = pitching_stats.apply(get_med_vL, axis=1)
- pitching_stats['Med%_vR'] = pitching_stats.apply(get_med_vR, axis=1)
+ pitching_stats["Med%_vL"] = pitching_stats.apply(get_med_vL, axis=1)
+ pitching_stats["Med%_vR"] = pitching_stats.apply(get_med_vR, axis=1)
- pitching_stats['Soft%_vL'] = round(1 - pitching_stats['Hard%_vL'] - pitching_stats['Med%_vL'], 5)
- pitching_stats['Soft%_vR'] = round(1 - pitching_stats['Hard%_vR'] - pitching_stats['Med%_vR'], 5)
+ pitching_stats["Soft%_vL"] = round(
+ 1 - pitching_stats["Hard%_vL"] - pitching_stats["Med%_vL"], 5
+ )
+ pitching_stats["Soft%_vR"] = round(
+ 1 - pitching_stats["Hard%_vR"] - pitching_stats["Med%_vR"], 5
+ )
- pitching_stats['FB%_vL'] = round(pitching_stats['FB_vL'] / (pitching_stats['FB_vL'] + pitching_stats['GB_vL'] + pitching_stats['LD_vL']), 5)
- pitching_stats['FB%_vR'] = round(pitching_stats['FB_vR'] / (pitching_stats['FB_vR'] + pitching_stats['GB_vR'] + pitching_stats['LD_vR']), 5)
+ pitching_stats["FB%_vL"] = round(
+ pitching_stats["FB_vL"]
+ / (pitching_stats["FB_vL"] + pitching_stats["GB_vL"] + pitching_stats["LD_vL"]),
+ 5,
+ )
+ pitching_stats["FB%_vR"] = round(
+ pitching_stats["FB_vR"]
+ / (pitching_stats["FB_vR"] + pitching_stats["GB_vR"] + pitching_stats["LD_vR"]),
+ 5,
+ )
- pitching_stats['GB%_vL'] = round(pitching_stats['GB_vL'] / (pitching_stats['FB_vL'] + pitching_stats['GB_vL'] + pitching_stats['LD_vL']), 5)
- pitching_stats['GB%_vR'] = round(pitching_stats['GB_vR'] / (pitching_stats['FB_vR'] + pitching_stats['GB_vR'] + pitching_stats['LD_vR']), 5)
+ pitching_stats["GB%_vL"] = round(
+ pitching_stats["GB_vL"]
+ / (pitching_stats["FB_vL"] + pitching_stats["GB_vL"] + pitching_stats["LD_vL"]),
+ 5,
+ )
+ pitching_stats["GB%_vR"] = round(
+ pitching_stats["GB_vR"]
+ / (pitching_stats["FB_vR"] + pitching_stats["GB_vR"] + pitching_stats["LD_vR"]),
+ 5,
+ )
def get_oppo_vl(row):
- count = all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batter_hand == 'l') & (all_plays.hit_location.str.contains('5|7'))].count()['event_type'].astype(int)
- denom = all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batter_hand == 'l') & (all_plays.batter_event == 't')].count()['event_type'].astype(int)
- return round(count / denom, 5)
- def get_oppo_vr(row):
- count = all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batter_hand == 'r') & (all_plays.hit_location.str.contains('3|9'))].count()['event_type'].astype(int)
- denom = all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batter_hand == 'r') & (all_plays.batter_event == 't')].count()['event_type'].astype(int)
+ count = (
+ all_plays[
+ (all_plays.pitcher_id == row["key_retro"])
+ & (all_plays.batter_hand == "l")
+ & (all_plays.hit_location.str.contains("5|7"))
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+ denom = (
+ all_plays[
+ (all_plays.pitcher_id == row["key_retro"])
+ & (all_plays.batter_hand == "l")
+ & (all_plays.batter_event == "t")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
return round(count / denom, 5)
- pitching_stats['Oppo%_vL'] = pitching_stats.apply(get_oppo_vl, axis=1)
- pitching_stats['Oppo%_vR'] = pitching_stats.apply(get_oppo_vr, axis=1)
+ def get_oppo_vr(row):
+ count = (
+ all_plays[
+ (all_plays.pitcher_id == row["key_retro"])
+ & (all_plays.batter_hand == "r")
+ & (all_plays.hit_location.str.contains("3|9"))
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+ denom = (
+ all_plays[
+ (all_plays.pitcher_id == row["key_retro"])
+ & (all_plays.batter_hand == "r")
+ & (all_plays.batter_event == "t")
+ ]
+ .count()["event_type"]
+ .astype(int)
+ )
+ return round(count / denom, 5)
+
+ pitching_stats["Oppo%_vL"] = pitching_stats.apply(get_oppo_vl, axis=1)
+ pitching_stats["Oppo%_vR"] = pitching_stats.apply(get_oppo_vr, axis=1)
pitching_stats = pitching_stats.fillna(0)
- print(f'Calculated fields: {(datetime.datetime.now() - start).total_seconds():.2f}s')
+ print(
+ f"Calculated fields: {(datetime.datetime.now() - start).total_seconds():.2f}s"
+ )
return pitching_stats
@@ -693,74 +1372,94 @@ def get_pitching_stats_by_date(retro_file_path, start_date: int, end_date: int)
def calc_batting_cards(bs: pd.DataFrame, season_pct: float) -> pd.DataFrame:
def create_batting_card(row):
steal_data = cba.stealing(
- chances=int(row['SBO']),
- sb2s=int(row['SB2']),
- cs2s=int(row['CS2']),
- sb3s=int(row['SB3']),
- cs3s=int(row['CS3']),
- season_pct=1.0
+ chances=int(row["SBO"]),
+ sb2s=int(row["SB2"]),
+ cs2s=int(row["CS2"]),
+ sb3s=int(row["SB3"]),
+ cs3s=int(row["CS3"]),
+ season_pct=1.0,
+ )
+ y = pd.DataFrame(
+ {
+ "key_bbref": [row["key_bbref"]],
+ "steal_low": [steal_data[0]],
+ "steal_high": [steal_data[1]],
+ "steal_auto": [steal_data[2]],
+ "steal_jump": [steal_data[3]],
+ "hit_and_run": [
+ cba.hit_and_run(
+ row["AB_vL"],
+ row["AB_vR"],
+ row["H_vL"],
+ row["H_vR"],
+ row["HR_vL"],
+ row["HR_vR"],
+ row["SO_vL"],
+ row["SO_vR"],
+ )
+ ],
+ "bunt": [cba.bunting(row["Bunts"], season_pct)],
+ "running": [cba.running(row["XBT%"])],
+ "hand": [row["bat_hand"]],
+ }
)
- y = pd.DataFrame({
- 'key_bbref': [row['key_bbref']],
- 'steal_low': [steal_data[0]],
- 'steal_high': [steal_data[1]],
- 'steal_auto': [steal_data[2]],
- 'steal_jump': [steal_data[3]],
- 'hit_and_run': [cba.hit_and_run(
- row['AB_vL'], row['AB_vR'], row['H_vL'], row['H_vR'],
- row['HR_vL'], row['HR_vR'], row['SO_vL'], row['SO_vR']
- )],
- 'bunt': [cba.bunting(row['Bunts'], season_pct)],
- 'running': [cba.running(row['XBT%'])],
- 'hand': [row['bat_hand']],
- })
return y.loc[0]
all_cards = bs.apply(create_batting_card, axis=1)
- all_cards = all_cards.set_index('key_bbref')
+ all_cards = all_cards.set_index("key_bbref")
return all_cards
def calc_pitching_cards(ps: pd.DataFrame, season_pct: float) -> pd.DataFrame:
def create_pitching_card(row):
- pow_data = cde.pow_ratings(row['IP'], row['GS'], row['G'])
- y = pd.DataFrame({
- "key_bbref": [row['key_bbref']],
- "balk": [cpi.balks(row['BK'], row['IP'], season_pct)],
- "wild_pitch": [cpi.wild_pitches(row['WP'], row['IP'], season_pct)],
- "hold": [cde.hold_pitcher(str(row['caught_stealing_perc']), int(row['pickoffs']), season_pct)],
- "starter_rating": [pow_data[0]],
- "relief_rating": [pow_data[1]],
- "closer_rating": [cpi.closer_rating(int(row['GF']), int(row['SV']), int(row['G']))],
- "batting": [f'#1W{row["pitch_hand"].upper()}-C']
- })
+ pow_data = cde.pow_ratings(row["IP"], row["GS"], row["G"])
+ y = pd.DataFrame(
+ {
+ "key_bbref": [row["key_bbref"]],
+ "balk": [cpi.balks(row["BK"], row["IP"], season_pct)],
+ "wild_pitch": [cpi.wild_pitches(row["WP"], row["IP"], season_pct)],
+ "hold": [
+ cde.hold_pitcher(
+ str(row["caught_stealing_perc"]),
+ int(row["pickoffs"]),
+ season_pct,
+ )
+ ],
+ "starter_rating": [pow_data[0]],
+ "relief_rating": [pow_data[1]],
+ "closer_rating": [
+ cpi.closer_rating(int(row["GF"]), int(row["SV"]), int(row["G"]))
+ ],
+ "batting": [f'#1W{row["pitch_hand"].upper()}-C'],
+ }
+ )
return y.loc[0]
-
+
all_cards = ps.apply(create_pitching_card, axis=1)
- all_cards = all_cards.set_index('key_bbref')
+ all_cards = all_cards.set_index("key_bbref")
return all_cards
def calc_batter_ratings(bs: pd.DataFrame) -> pd.DataFrame:
def create_batting_rating(row):
- if row['key_bbref'] == 'galaran01':
+ if row["key_bbref"] == "galaran01":
pass
ratings = cba.get_batter_ratings(row)
- ops_vl = ratings[0]['obp'] + ratings[0]['slg']
- ops_vr = ratings[1]['obp'] + ratings[1]['slg']
+ ops_vl = ratings[0]["obp"] + ratings[0]["slg"]
+ ops_vr = ratings[1]["obp"] + ratings[1]["slg"]
total_ops = (ops_vl + ops_vr + min(ops_vr, ops_vl)) / 3
-
+
def calc_cost(total_ops, base_cost, base_ops, max_delta) -> int:
delta = ((total_ops - base_ops) / 0.1) * 2
if delta < 1:
delta = (max_delta * (1 - (total_ops / base_ops))) * -0.1
-
+
final_cost = base_cost + (max_delta * delta)
return round(final_cost)
-
+
if total_ops >= 1.2:
rarity_id = 99
cost = calc_cost(total_ops, base_cost=2400, base_ops=1.215, max_delta=810)
@@ -780,31 +1479,33 @@ def calc_batter_ratings(bs: pd.DataFrame) -> pd.DataFrame:
rarity_id = 5
cost = calc_cost(total_ops, base_cost=10, base_ops=0.61, max_delta=8)
- x = pd.DataFrame({
- 'key_bbref': [row['key_bbref']],
- 'ratings_vL': [ratings[0]],
- 'ratings_vR': [ratings[1]],
- 'ops_vL': ops_vl,
- 'ops_vR': ops_vr,
- 'total_ops': total_ops,
- 'rarity_id': rarity_id,
- 'cost': cost
- })
+ x = pd.DataFrame(
+ {
+ "key_bbref": [row["key_bbref"]],
+ "ratings_vL": [ratings[0]],
+ "ratings_vR": [ratings[1]],
+ "ops_vL": ops_vl,
+ "ops_vR": ops_vr,
+ "total_ops": total_ops,
+ "rarity_id": rarity_id,
+ "cost": cost,
+ }
+ )
return x.loc[0]
all_ratings = bs.apply(create_batting_rating, axis=1)
- all_ratings = all_ratings.set_index('key_bbref')
+ all_ratings = all_ratings.set_index("key_bbref")
return all_ratings
def calc_pitcher_ratings(ps: pd.DataFrame) -> pd.DataFrame:
def create_pitching_rating(row):
- row['pitchingcard_id'] = row['key_fangraphs']
- row['pitch_hand'] = row['pitch_hand'].upper()
+ row["pitchingcard_id"] = row["key_fangraphs"]
+ row["pitch_hand"] = row["pitch_hand"].upper()
ratings = cpi.get_pitcher_ratings(row)
- ops_vl = ratings[0]['obp'] + ratings[0]['slg']
- ops_vr = ratings[1]['obp'] + ratings[1]['slg']
+ ops_vl = ratings[0]["obp"] + ratings[0]["slg"]
+ ops_vr = ratings[1]["obp"] + ratings[1]["slg"]
total_ops = (ops_vl + ops_vr + min(ops_vr, ops_vl)) / 3
def calc_cost(total_ops, base_cost, base_ops, max_delta) -> int:
@@ -815,8 +1516,8 @@ def calc_pitcher_ratings(ps: pd.DataFrame) -> pd.DataFrame:
final_cost = base_cost + (max_delta * delta)
return round(final_cost)
-
- if row['starter_rating'] > 3:
+
+ if row["starter_rating"] > 3:
if total_ops <= 0.4:
rarity_id = 99
cost = calc_cost(total_ops, 2400, 0.38, 810)
@@ -854,285 +1555,357 @@ def calc_pitcher_ratings(ps: pd.DataFrame) -> pd.DataFrame:
else:
rarity_id = 5
cost = calc_cost(total_ops, 10, 0.7, 8)
-
- x = pd.DataFrame({
- 'key_bbref': [row['key_bbref']],
- 'ratings_vL': [ratings[0]],
- 'ratings_vR': [ratings[1]],
- 'ops_vL': ops_vl,
- 'ops_vR': ops_vr,
- 'total_ops': total_ops,
- 'rarity_id': rarity_id,
- 'cost': cost
- })
+
+ x = pd.DataFrame(
+ {
+ "key_bbref": [row["key_bbref"]],
+ "ratings_vL": [ratings[0]],
+ "ratings_vR": [ratings[1]],
+ "ops_vL": ops_vl,
+ "ops_vR": ops_vr,
+ "total_ops": total_ops,
+ "rarity_id": rarity_id,
+ "cost": cost,
+ }
+ )
return x.loc[0]
all_ratings = ps.apply(create_pitching_rating, axis=1)
- all_ratings = all_ratings.set_index('key_bbref')
+ all_ratings = all_ratings.set_index("key_bbref")
return all_ratings
def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
- df_c = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_c.csv').set_index('key_bbref')
- df_1b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_1b.csv').set_index('key_bbref')
- df_2b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_2b.csv').set_index('key_bbref')
- df_3b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_3b.csv').set_index('key_bbref')
- df_ss = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_ss.csv').set_index('key_bbref')
- df_lf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_lf.csv').set_index('key_bbref')
- df_cf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_cf.csv').set_index('key_bbref')
- df_rf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_rf.csv').set_index('key_bbref')
- df_of = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_of.csv').set_index('key_bbref')
+ df_c = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_c.csv").set_index("key_bbref")
+ df_1b = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_1b.csv").set_index("key_bbref")
+ df_2b = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_2b.csv").set_index("key_bbref")
+ df_3b = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_3b.csv").set_index("key_bbref")
+ df_ss = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_ss.csv").set_index("key_bbref")
+ df_lf = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_lf.csv").set_index("key_bbref")
+ df_cf = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_cf.csv").set_index("key_bbref")
+ df_rf = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_rf.csv").set_index("key_bbref")
+ df_of = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_of.csv").set_index("key_bbref")
season_pct = 1.0
all_pos = []
def process_pos(row):
no_data = True
- for pos_df, position in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]:
- if row['key_bbref'] in pos_df.index:
- logger.info(f'Running {position} stats for {row["use_name"]} {row["last_name"]}')
+ for pos_df, position in [
+ (df_1b, "1b"),
+ (df_2b, "2b"),
+ (df_3b, "3b"),
+ (df_ss, "ss"),
+ ]:
+ if row["key_bbref"] in pos_df.index:
+ logger.info(
+ f'Running {position} stats for {row["use_name"]} {row["last_name"]}'
+ )
try:
- if 'bis_runs_total' in pos_df.columns:
- average_range = (int(pos_df.at[row["key_bbref"], 'tz_runs_total']) +
- int(pos_df.at[row["key_bbref"], 'bis_runs_total']) +
- min(
- int(pos_df.at[row["key_bbref"], 'tz_runs_total']),
- int(pos_df.at[row["key_bbref"], 'bis_runs_total'])
- )) / 3
- else:
- average_range = pos_df.at[row["key_bbref"], 'tz_runs_total']
-
- if float(pos_df.at[row["key_bbref"], 'Inn_def']) >= 10.0:
- all_pos.append({
- "key_bbref": row['key_bbref'],
- "position": position.upper(),
- "innings": float(pos_df.at[row["key_bbref"], 'Inn_def']),
- "range": cde.get_if_range(
- pos_code=position,
- tz_runs=round(average_range),
- r_dp=0,
- season_pct=season_pct
- ),
- "error": cde.get_any_error(
- pos_code=position,
- errors=int(pos_df.at[row["key_bbref"], 'E_def']),
- chances=int(pos_df.at[row["key_bbref"], 'chances']),
- season_pct=season_pct
+ if "bis_runs_total" in pos_df.columns:
+ average_range = (
+ int(pos_df.at[row["key_bbref"], "tz_runs_total"])
+ + int(pos_df.at[row["key_bbref"], "bis_runs_total"])
+ + min(
+ int(pos_df.at[row["key_bbref"], "tz_runs_total"]),
+ int(pos_df.at[row["key_bbref"], "bis_runs_total"]),
)
- })
+ ) / 3
+ else:
+ average_range = pos_df.at[row["key_bbref"], "tz_runs_total"]
+
+ if float(pos_df.at[row["key_bbref"], "Inn_def"]) >= 10.0:
+ all_pos.append(
+ {
+ "key_bbref": row["key_bbref"],
+ "position": position.upper(),
+ "innings": float(
+ pos_df.at[row["key_bbref"], "Inn_def"]
+ ),
+ "range": cde.get_if_range(
+ pos_code=position,
+ tz_runs=round(average_range),
+ r_dp=0,
+ season_pct=season_pct,
+ ),
+ "error": cde.get_any_error(
+ pos_code=position,
+ errors=int(pos_df.at[row["key_bbref"], "E_def"]),
+ chances=int(pos_df.at[row["key_bbref"], "chances"]),
+ season_pct=season_pct,
+ ),
+ }
+ )
no_data = False
except Exception as e:
- logger.info(f'Infield position failed: {e}')
+ logger.info(f"Infield position failed: {e}")
of_arms = []
of_payloads = []
- for pos_df, position in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]:
+ for pos_df, position in [(df_lf, "lf"), (df_cf, "cf"), (df_rf, "rf")]:
if row["key_bbref"] in pos_df.index:
try:
- if 'bis_runs_total' in pos_df.columns:
- average_range = (int(pos_df.at[row["key_bbref"], 'tz_runs_total']) +
- int(pos_df.at[row["key_bbref"], 'bis_runs_total']) +
- min(
- int(pos_df.at[row["key_bbref"], 'tz_runs_total']),
- int(pos_df.at[row["key_bbref"], 'bis_runs_total'])
- )) / 3
- else:
- average_range = pos_df.at[row["key_bbref"], 'tz_runs_total']
-
- if float(pos_df.at[row["key_bbref"], 'Inn_def']) >= 10.0:
- of_payloads.append({
- "key_bbref": row['key_bbref'],
- "position": position.upper(),
- "innings": float(pos_df.at[row["key_bbref"], 'Inn_def']),
- "range": cde.get_of_range(
- pos_code=position,
- tz_runs=round(average_range),
- season_pct=season_pct
+ if "bis_runs_total" in pos_df.columns:
+ average_range = (
+ int(pos_df.at[row["key_bbref"], "tz_runs_total"])
+ + int(pos_df.at[row["key_bbref"], "bis_runs_total"])
+ + min(
+ int(pos_df.at[row["key_bbref"], "tz_runs_total"]),
+ int(pos_df.at[row["key_bbref"], "bis_runs_total"]),
)
- })
- of_run_rating = 'bis_runs_outfield' if 'bis_runs_outfield' in pos_df.columns else 'tz_runs_total'
+ ) / 3
+ else:
+ average_range = pos_df.at[row["key_bbref"], "tz_runs_total"]
+
+ if float(pos_df.at[row["key_bbref"], "Inn_def"]) >= 10.0:
+ of_payloads.append(
+ {
+ "key_bbref": row["key_bbref"],
+ "position": position.upper(),
+ "innings": float(
+ pos_df.at[row["key_bbref"], "Inn_def"]
+ ),
+ "range": cde.get_of_range(
+ pos_code=position,
+ tz_runs=round(average_range),
+ season_pct=season_pct,
+ ),
+ }
+ )
+ of_run_rating = (
+ "bis_runs_outfield"
+ if "bis_runs_outfield" in pos_df.columns
+ else "tz_runs_total"
+ )
of_arms.append(int(pos_df.at[row["key_bbref"], of_run_rating]))
no_data = False
except Exception as e:
- logger.info(f'Outfield position failed: {e}')
+ logger.info(f"Outfield position failed: {e}")
- if row["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0:
+ if (
+ row["key_bbref"] in df_of.index
+ and len(of_arms) > 0
+ and len(of_payloads) > 0
+ ):
try:
error_rating = cde.get_any_error(
pos_code=position,
- errors=int(df_of.at[row["key_bbref"], 'E_def']),
- chances=int(df_of.at[row["key_bbref"], 'chances']),
- season_pct=season_pct
+ errors=int(df_of.at[row["key_bbref"], "E_def"]),
+ chances=int(df_of.at[row["key_bbref"], "chances"]),
+ season_pct=season_pct,
)
arm_rating = cde.arm_outfield(of_arms)
for f in of_payloads:
- f['error'] = error_rating
- f['arm'] = arm_rating
+ f["error"] = error_rating
+ f["arm"] = arm_rating
all_pos.append(f)
no_data = False
except Exception as e:
- logger.info(f'Outfield position failed: {e}')
+ logger.info(f"Outfield position failed: {e}")
if row["key_bbref"] in df_c.index:
try:
- run_rating = 'bis_runs_catcher_sb' if 'bis_runs_catcher_sb' in df_c else 'tz_runs_catcher'
-
- if df_c.at[row["key_bbref"], 'SB'] + df_c.at[row["key_bbref"], 'CS'] == 0:
+ run_rating = (
+ "bis_runs_catcher_sb"
+ if "bis_runs_catcher_sb" in df_c
+ else "tz_runs_catcher"
+ )
+
+ if (
+ df_c.at[row["key_bbref"], "SB"] + df_c.at[row["key_bbref"], "CS"]
+ == 0
+ ):
arm_rating = 3
else:
arm_rating = cde.arm_catcher(
- cs_pct=df_c.at[row["key_bbref"], 'caught_stealing_perc'],
+ cs_pct=df_c.at[row["key_bbref"], "caught_stealing_perc"],
raa=int(df_c.at[row["key_bbref"], run_rating]),
- season_pct=season_pct
+ season_pct=season_pct,
)
- if float(df_c.at[row["key_bbref"], 'Inn_def']) >= 10.0:
- all_pos.append({
- "key_bbref": row['key_bbref'],
- "position": 'C',
- "innings": float(df_c.at[row["key_bbref"], 'Inn_def']),
- "range": cde.range_catcher(
- rs_value=int(df_c.at[row["key_bbref"], 'tz_runs_catcher']),
- season_pct=season_pct
- ),
- "error": cde.get_any_error(
- pos_code='c',
- errors=int(df_c.at[row["key_bbref"], 'E_def']),
- chances=int(df_c.at[row["key_bbref"], 'chances']),
- season_pct=season_pct
- ),
- "arm": arm_rating,
- "pb": cde.pb_catcher(
- pb=int(df_c.at[row["key_bbref"], 'PB']),
- innings=int(float(df_c.at[row["key_bbref"], 'Inn_def'])),
- season_pct=season_pct
- ),
- "overthrow": cde.ot_catcher(
- errors=int(df_c.at[row["key_bbref"], 'E_def']),
- chances=int(df_c.at[row["key_bbref"], 'chances']),
- season_pct=season_pct
- )
- })
+ if float(df_c.at[row["key_bbref"], "Inn_def"]) >= 10.0:
+ all_pos.append(
+ {
+ "key_bbref": row["key_bbref"],
+ "position": "C",
+ "innings": float(df_c.at[row["key_bbref"], "Inn_def"]),
+ "range": cde.range_catcher(
+ rs_value=int(
+ df_c.at[row["key_bbref"], "tz_runs_catcher"]
+ ),
+ season_pct=season_pct,
+ ),
+ "error": cde.get_any_error(
+ pos_code="c",
+ errors=int(df_c.at[row["key_bbref"], "E_def"]),
+ chances=int(df_c.at[row["key_bbref"], "chances"]),
+ season_pct=season_pct,
+ ),
+ "arm": arm_rating,
+ "pb": cde.pb_catcher(
+ pb=int(df_c.at[row["key_bbref"], "PB"]),
+ innings=int(
+ float(df_c.at[row["key_bbref"], "Inn_def"])
+ ),
+ season_pct=season_pct,
+ ),
+ "overthrow": cde.ot_catcher(
+ errors=int(df_c.at[row["key_bbref"], "E_def"]),
+ chances=int(df_c.at[row["key_bbref"], "chances"]),
+ season_pct=season_pct,
+ ),
+ }
+ )
no_data = False
except Exception as e:
- logger.info(f'Catcher position failed: {e}')
+ logger.info(f"Catcher position failed: {e}")
if no_data:
- all_pos.append({
- "key_bbref": row['key_bbref'],
- "position": 'DH',
- "innings": row['PA_vL'] + row['PA_vR']
- })
+ all_pos.append(
+ {
+ "key_bbref": row["key_bbref"],
+ "position": "DH",
+ "innings": row["PA_vL"] + row["PA_vR"],
+ }
+ )
bs.apply(process_pos, axis=1)
pos_df = pd.DataFrame(all_pos)
- pos_df = pos_df.set_index('key_bbref')
+ pos_df = pos_df.set_index("key_bbref")
return pos_df
def calc_pitcher_defense(ps: pd.DataFrame) -> pd.DataFrame:
- df_p = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_p.csv').set_index('key_bbref')
+ df_p = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_p.csv").set_index("key_bbref")
all_pos = []
def process_def(row):
- if 'bis_runs_total' in df_p:
- range_val = cde.range_pitcher(rs_value=int(df_p.at[row['key_bbref'], 'bis_runs_total']))
- else:
- range_val = cde.range_pitcher(rf_per9_value=df_p.at[row['key_bbref'], 'range_factor_per_nine'])
-
- if row['key_bbref'] in df_p.index:
- all_pos.append({
- 'key_bbref': row['key_bbref'],
- 'position': 'P',
- 'innings': float(df_p.at[row['key_bbref'], 'Inn_def']),
- 'range': range_val,
- 'error': cde.get_any_error(
- pos_code='p',
- errors=int(df_p.at[row["key_bbref"], 'E_def']),
- chances=int(df_p.at[row["key_bbref"], 'chances']),
- season_pct=1.0
- )
- })
+ if "bis_runs_total" in df_p:
+ range_val = cde.range_pitcher(
+ rs_value=int(df_p.at[row["key_bbref"], "bis_runs_total"])
+ )
else:
- all_pos.append({
- "key_bbref": int(row['key_bbref']),
- "position": 'P',
- "innings": 1,
- "range": 5,
- "error": 51
- })
+ range_val = cde.range_pitcher(
+ rf_per9_value=df_p.at[row["key_bbref"], "range_factor_per_nine"]
+ )
+
+ if row["key_bbref"] in df_p.index:
+ all_pos.append(
+ {
+ "key_bbref": row["key_bbref"],
+ "position": "P",
+ "innings": float(df_p.at[row["key_bbref"], "Inn_def"]),
+ "range": range_val,
+ "error": cde.get_any_error(
+ pos_code="p",
+ errors=int(df_p.at[row["key_bbref"], "E_def"]),
+ chances=int(df_p.at[row["key_bbref"], "chances"]),
+ season_pct=1.0,
+ ),
+ }
+ )
+ else:
+ all_pos.append(
+ {
+ "key_bbref": int(row["key_bbref"]),
+ "position": "P",
+ "innings": 1,
+ "range": 5,
+ "error": 51,
+ }
+ )
ps.apply(process_def, axis=1)
pos_df = pd.DataFrame(all_pos)
- pos_df = pos_df.set_index('key_bbref')
+ pos_df = pos_df.set_index("key_bbref")
return pos_df
-async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.DataFrame = None, def_rat_df: pd.DataFrame = None, pstat_df: pd.DataFrame = None, pit_rat_df: pd.DataFrame = None) -> pd.DataFrame:
+async def get_or_post_players(
+ bstat_df: pd.DataFrame = None,
+ bat_rat_df: pd.DataFrame = None,
+ def_rat_df: pd.DataFrame = None,
+ pstat_df: pd.DataFrame = None,
+ pit_rat_df: pd.DataFrame = None,
+) -> pd.DataFrame:
all_players = []
- player_deltas = [['player_id', 'player_name', 'old-cost', 'new-cost', 'old-rarity', 'new-rarity']]
- new_players = [['player_id', 'player_name', 'cost', 'rarity', 'pos1']]
+ player_deltas = [
+ ["player_id", "player_name", "old-cost", "new-cost", "old-rarity", "new-rarity"]
+ ]
+ new_players = [["player_id", "player_name", "cost", "rarity", "pos1"]]
async def player_search(bbref_id: str):
- p_query = await db_get('players', params=[('bbref_id', bbref_id), ('cardset_id', CARDSET_ID)])
- if p_query['count'] > 0:
- return p_query['players'][0]
+ p_query = await db_get(
+ "players", params=[("bbref_id", bbref_id), ("cardset_id", CARDSET_ID)]
+ )
+ if p_query["count"] > 0:
+ return p_query["players"][0]
else:
return None
-
+
async def mlb_search_or_post(retro_id: int):
- mlb_query = await db_get('mlbplayers', params=[('key_retro', retro_id)])
- if mlb_query['count'] > 0:
- return mlb_query['players'][0]
+ mlb_query = await db_get("mlbplayers", params=[("key_retro", retro_id)])
+ if mlb_query["count"] > 0:
+ return mlb_query["players"][0]
else:
mlb_player = await db_post(
- 'mlbplayers/one',
+ "mlbplayers/one",
payload={
- 'first_name': row['use_name'],
- 'last_name': row['last_name'],
- 'key_mlbam': row['key_mlbam'],
- 'key_fangraphs': row['key_fangraphs'],
- 'key_bbref': row['key_bbref'],
- 'key_retro': row['key_retro']
- }
+ "first_name": row["use_name"],
+ "last_name": row["last_name"],
+ "key_mlbam": row["key_mlbam"],
+ "key_fangraphs": row["key_fangraphs"],
+ "key_bbref": row["key_bbref"],
+ "key_retro": row["key_retro"],
+ "offense_col": int(
+ row.get(
+ "offense_col",
+ hash_offense_col(f"{row['use_name']} {row['last_name']}"),
+ )
+ ),
+ },
)
return mlb_player
def new_player_payload(row, ratings_df: pd.DataFrame):
return {
- 'p_name': f'{row["use_name"]} {row["last_name"]}',
- 'cost': f'{ratings_df.loc[row['key_bbref']]["cost"]}',
- 'image': f'change-me',
- 'mlbclub': CLUB_LIST[row['Tm']],
- 'franchise': FRANCHISE_LIST[row['Tm']],
- 'cardset_id': CARDSET_ID,
- 'set_num': int(float(row['key_fangraphs'])),
- 'rarity_id': int(ratings_df.loc[row['key_bbref']]['rarity_id']),
- 'description': PLAYER_DESCRIPTION,
- 'bbref_id': row['key_bbref'],
- 'fangr_id': int(float(row['key_fangraphs'])),
- 'mlbplayer_id': mlb_player['id']
+ "p_name": f'{row["use_name"]} {row["last_name"]}',
+ "cost": f'{ratings_df.loc[row['key_bbref']]["cost"]}',
+ "image": f"change-me",
+ "mlbclub": CLUB_LIST[row["Tm"]],
+ "franchise": FRANCHISE_LIST[row["Tm"]],
+ "cardset_id": CARDSET_ID,
+ "set_num": int(float(row["key_fangraphs"])),
+ "rarity_id": int(ratings_df.loc[row["key_bbref"]]["rarity_id"]),
+ "description": PLAYER_DESCRIPTION,
+ "bbref_id": row["key_bbref"],
+ "fangr_id": int(float(row["key_fangraphs"])),
+ "mlbplayer_id": mlb_player["id"],
}
def get_player_record_pos(def_rat_df: pd.DataFrame, row) -> list[str]:
all_pos = [None, None, None, None, None, None, None, None]
try:
count = 0
- all_pos_df = def_rat_df.loc[row['key_bbref']].sort_values(by='innings', ascending=False)
+ all_pos_df = def_rat_df.loc[row["key_bbref"]].sort_values(
+ by="innings", ascending=False
+ )
for index, pos_row in all_pos_df.iterrows():
all_pos[count] = pos_row.position
count += 1
except KeyError:
- logger.info(f'No positions found for {row['use_name']} {row['last_name']}')
- all_pos[0] = 'DH'
+ logger.info(f"No positions found for {row['use_name']} {row['last_name']}")
+ all_pos[0] = "DH"
except TypeError:
- logger.info(f'Only one position found for {row['use_name']} {row['last_name']}')
- all_pos[0] = def_rat_df.loc[row['key_bbref']].position
-
+ logger.info(
+ f"Only one position found for {row['use_name']} {row['last_name']}"
+ )
+ all_pos[0] = def_rat_df.loc[row["key_bbref"]].position
+
return all_pos
dev_count = 0
@@ -1141,185 +1914,257 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
if dev_count < 0:
break
- p_search = await player_search(row['key_bbref'])
+ p_search = await player_search(row["key_bbref"])
if p_search is not None:
- if 'id' in p_search:
- player_id = p_search['id']
+ if "id" in p_search:
+ player_id = p_search["id"]
else:
- player_id = p_search['player_id']
+ player_id = p_search["player_id"]
# Update positions for existing players too
all_pos = get_player_record_pos(def_rat_df, row)
patch_params = [
- ('cost', f'{bat_rat_df.loc[row['key_bbref']]["cost"]}'),
- ('rarity_id', int(bat_rat_df.loc[row['key_bbref']]['rarity_id'])),
- ('image', f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}')
+ ("cost", f'{bat_rat_df.loc[row['key_bbref']]["cost"]}'),
+ ("rarity_id", int(bat_rat_df.loc[row["key_bbref"]]["rarity_id"])),
+ (
+ "image",
+ f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
+ ),
]
# Add position updates - set all 8 slots to clear any old positions
for x in enumerate(all_pos):
- patch_params.append((f'pos_{x[0] + 1}', x[1]))
+ patch_params.append((f"pos_{x[0] + 1}", x[1]))
- new_player = await db_patch('players', object_id=player_id, params=patch_params)
- new_player['bbref_id'] = row['key_bbref']
+ new_player = await db_patch(
+ "players", object_id=player_id, params=patch_params
+ )
+ new_player["bbref_id"] = row["key_bbref"]
all_players.append(new_player)
- player_deltas.append([
- new_player['player_id'], new_player['p_name'], p_search['cost'], new_player['cost'], p_search['rarity']['name'], new_player['rarity']['name']
- ])
+ player_deltas.append(
+ [
+ new_player["player_id"],
+ new_player["p_name"],
+ p_search["cost"],
+ new_player["cost"],
+ p_search["rarity"]["name"],
+ new_player["rarity"]["name"],
+ ]
+ )
else:
- mlb_player = await mlb_search_or_post(row['key_retro'])
-
+ mlb_player = await mlb_search_or_post(row["key_retro"])
+
player_payload = new_player_payload(row, bat_rat_df)
-
- all_pos = get_player_record_pos(def_rat_df, row)
+
+ all_pos = get_player_record_pos(def_rat_df, row)
for x in enumerate(all_pos):
- player_payload[f'pos_{x[0] + 1}'] = x[1]
+ player_payload[f"pos_{x[0] + 1}"] = x[1]
- new_player = await db_post('players', payload=player_payload)
+ new_player = await db_post("players", payload=player_payload)
- if 'id' in new_player:
- player_id = new_player['id']
+ if "id" in new_player:
+ player_id = new_player["id"]
else:
- player_id = new_player['player_id']
+ player_id = new_player["player_id"]
- new_player = await db_patch('players', object_id=player_id, params=[('image', f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}')])
- if 'paperdex' in new_player:
- del new_player['paperdex']
+ new_player = await db_patch(
+ "players",
+ object_id=player_id,
+ params=[
+ (
+ "image",
+ f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
+ )
+ ],
+ )
+ if "paperdex" in new_player:
+ del new_player["paperdex"]
# all_bbref_ids.append(row['key_bbref'])
# all_player_ids.append(player_id)
- new_player['bbref_id'] = row['key_bbref']
+ new_player["bbref_id"] = row["key_bbref"]
all_players.append(new_player)
- new_players.append([new_player['player_id'], new_player['p_name'], new_player['cost'], new_player['rarity']['name'], new_player['pos_1']])
+ new_players.append(
+ [
+ new_player["player_id"],
+ new_player["p_name"],
+ new_player["cost"],
+ new_player["rarity"]["name"],
+ new_player["pos_1"],
+ ]
+ )
dev_count += 1
elif pstat_df is not None and pit_rat_df is not None and def_rat_df is not None:
- starter_index = pstat_df.columns.get_loc('starter_rating')
- closer_index = pstat_df.columns.get_loc('closer_rating')
+ starter_index = pstat_df.columns.get_loc("starter_rating")
+ closer_index = pstat_df.columns.get_loc("closer_rating")
for index, row in pstat_df.iterrows():
if dev_count < 0:
break
- p_search = await player_search(row['key_bbref'])
+ p_search = await player_search(row["key_bbref"])
if p_search is not None:
- if 'id' in p_search:
- player_id = p_search['id']
+ if "id" in p_search:
+ player_id = p_search["id"]
else:
- player_id = p_search['player_id']
+ player_id = p_search["player_id"]
# Determine pitcher positions based on ratings
patch_params = [
- ('cost', f'{pit_rat_df.loc[row['key_bbref']]["cost"]}'),
- ('rarity_id', int(pit_rat_df.loc[row['key_bbref']]['rarity_id'])),
- ('image', f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}')
+ ("cost", f'{pit_rat_df.loc[row['key_bbref']]["cost"]}'),
+ ("rarity_id", int(pit_rat_df.loc[row["key_bbref"]]["rarity_id"])),
+ (
+ "image",
+ f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
+ ),
]
- player_index = pstat_df.index[pstat_df['key_bbref'] == row['key_bbref']].tolist()
+ player_index = pstat_df.index[
+ pstat_df["key_bbref"] == row["key_bbref"]
+ ].tolist()
stat_row = pstat_df.iloc[player_index]
starter_rating = stat_row.iat[0, starter_index]
if starter_rating >= 4:
- patch_params.append(('pos_1', 'SP'))
+ patch_params.append(("pos_1", "SP"))
# Clear other position slots
for i in range(2, 9):
- patch_params.append((f'pos_{i}', None))
+ patch_params.append((f"pos_{i}", None))
else:
- patch_params.append(('pos_1', 'RP'))
+ patch_params.append(("pos_1", "RP"))
closer_rating = stat_row.iat[0, closer_index]
if not pd.isna(closer_rating):
- patch_params.append(('pos_2', 'CP'))
+ patch_params.append(("pos_2", "CP"))
# Clear remaining position slots
for i in range(3, 9):
- patch_params.append((f'pos_{i}', None))
+ patch_params.append((f"pos_{i}", None))
else:
# Clear remaining position slots
for i in range(2, 9):
- patch_params.append((f'pos_{i}', None))
+ patch_params.append((f"pos_{i}", None))
- new_player = await db_patch('players', object_id=player_id, params=patch_params)
- new_player['bbref_id'] = row['key_bbref']
+ new_player = await db_patch(
+ "players", object_id=player_id, params=patch_params
+ )
+ new_player["bbref_id"] = row["key_bbref"]
all_players.append(new_player)
- player_deltas.append([
- new_player['player_id'], new_player['p_name'], p_search['cost'], new_player['cost'], p_search['rarity']['name'], new_player['rarity']['name']
- ])
+ player_deltas.append(
+ [
+ new_player["player_id"],
+ new_player["p_name"],
+ p_search["cost"],
+ new_player["cost"],
+ p_search["rarity"]["name"],
+ new_player["rarity"]["name"],
+ ]
+ )
else:
- mlb_player = await mlb_search_or_post(row['key_retro'])
-
+ mlb_player = await mlb_search_or_post(row["key_retro"])
+
player_payload = new_player_payload(row, pit_rat_df)
- player_index = pstat_df.index[pstat_df['key_bbref'] == row['key_bbref']].tolist()
+ player_index = pstat_df.index[
+ pstat_df["key_bbref"] == row["key_bbref"]
+ ].tolist()
stat_row = pstat_df.iloc[player_index]
-
+
starter_rating = stat_row.iat[0, starter_index]
if starter_rating >= 4:
- player_payload['pos_1'] = 'SP'
+ player_payload["pos_1"] = "SP"
else:
- player_payload['pos_1'] = 'RP'
+ player_payload["pos_1"] = "RP"
closer_rating = stat_row.iat[0, closer_index]
if not pd.isna(closer_rating):
- player_payload['pos_2'] = 'CP'
+ player_payload["pos_2"] = "CP"
- new_player = await db_post('players', payload=player_payload)
+ new_player = await db_post("players", payload=player_payload)
- if 'id' in new_player:
- player_id = new_player['id']
+ if "id" in new_player:
+ player_id = new_player["id"]
else:
- player_id = new_player['player_id']
+ player_id = new_player["player_id"]
- new_player = await db_patch('players', object_id=player_id, params=[('image', f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}')])
- if 'paperdex' in new_player:
- del new_player['paperdex']
+ new_player = await db_patch(
+ "players",
+ object_id=player_id,
+ params=[
+ (
+ "image",
+ f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
+ )
+ ],
+ )
+ if "paperdex" in new_player:
+ del new_player["paperdex"]
- new_player['bbref_id'] = row['key_bbref']
+ new_player["bbref_id"] = row["key_bbref"]
all_players.append(new_player)
- new_players.append([new_player['player_id'], new_player['p_name'], new_player['cost'], new_player['rarity']['name'], new_player['pos_1']])
-
+ new_players.append(
+ [
+ new_player["player_id"],
+ new_player["p_name"],
+ new_player["cost"],
+ new_player["rarity"]["name"],
+ new_player["pos_1"],
+ ]
+ )
+
dev_count += 1
else:
- raise KeyError(f'Could not get players - not enough stat DFs were supplied')
+ raise KeyError(f"Could not get players - not enough stat DFs were supplied")
- pd.DataFrame(player_deltas[1:], columns=player_deltas[0]).to_csv(f'{"batter" if bstat_df is not None else "pitcher"}-deltas.csv')
- pd.DataFrame(new_players[1:], columns=new_players[0]).to_csv(f'new-{"batter" if bstat_df is not None else "pitcher"}s.csv')
+ pd.DataFrame(player_deltas[1:], columns=player_deltas[0]).to_csv(
+ f'{"batter" if bstat_df is not None else "pitcher"}-deltas.csv'
+ )
+ pd.DataFrame(new_players[1:], columns=new_players[0]).to_csv(
+ f'new-{"batter" if bstat_df is not None else "pitcher"}s.csv'
+ )
- players_df = pd.DataFrame(all_players).set_index('bbref_id')
+ players_df = pd.DataFrame(all_players).set_index("bbref_id")
return players_df
async def post_batting_cards(cards_df: pd.DataFrame):
all_cards = []
- cards_df.apply(lambda x: all_cards.append({
- 'player_id': int(x["player_id"]),
- 'steal_low': x['steal_low'],
- 'steal_high': x['steal_high'],
- 'steal_auto': x['steal_auto'],
- 'steal_jump': x['steal_jump'],
- 'bunting': x['bunt'],
- 'hit_and_run': x['hit_and_run'],
- 'running': x['running'],
- 'hand': x['hand']
- }), axis=1)
- resp = await db_put('battingcards', payload={'cards': all_cards}, timeout=6)
+ cards_df.apply(
+ lambda x: all_cards.append(
+ {
+ "player_id": int(x["player_id"]),
+ "steal_low": x["steal_low"],
+ "steal_high": x["steal_high"],
+ "steal_auto": x["steal_auto"],
+ "steal_jump": x["steal_jump"],
+ "bunting": x["bunt"],
+ "hit_and_run": x["hit_and_run"],
+ "running": x["running"],
+ "hand": x["hand"],
+ }
+ ),
+ axis=1,
+ )
+ resp = await db_put("battingcards", payload={"cards": all_cards}, timeout=6)
if resp is not None:
pass
else:
- log_exception(ValueError, 'Unable to post batting cards')
-
- bc_query = await db_get('battingcards', params=[('cardset_id', CARDSET_ID)])
- if bc_query['count'] > 0:
- bc_data = bc_query['cards']
-
+ log_exception(ValueError, "Unable to post batting cards")
+
+ bc_query = await db_get("battingcards", params=[("cardset_id", CARDSET_ID)])
+ if bc_query["count"] > 0:
+ bc_data = bc_query["cards"]
+
for line in bc_data:
- line['player_id'] = line['player']['player_id']
- line['key_bbref'] = line['player']['bbref_id']
- line['battingcard_id'] = line['id']
+ line["player_id"] = line["player"]["player_id"]
+ line["key_bbref"] = line["player"]["bbref_id"]
+ line["battingcard_id"] = line["id"]
return pd.DataFrame(bc_data)
else:
- log_exception(ValueError, 'Unable to pull newly posted batting cards')
+ log_exception(ValueError, "Unable to pull newly posted batting cards")
async def post_pitching_cards(cards_df: pd.DataFrame):
all_cards = []
+
def get_closer_rating(raw_rating):
try:
if pd.isnull(raw_rating):
@@ -1329,83 +2174,96 @@ async def post_pitching_cards(cards_df: pd.DataFrame):
except AttributeError:
return None
- cards_df.apply(lambda x: all_cards.append({
- 'player_id': int(x['player_id']),
- 'balk': x['balk'],
- 'wild_pitch': x['wild_pitch'],
- 'hold': x['hold'],
- 'starter_rating': x['starter_rating'],
- 'relief_rating': x['relief_rating'],
- 'closer_rating': get_closer_rating(x['closer_rating']),
- 'batting': x['batting'],
- 'hand': x['pitch_hand'].upper()
- }), axis=1)
- resp = await db_put('pitchingcards', payload={'cards': all_cards}, timeout=6)
+ cards_df.apply(
+ lambda x: all_cards.append(
+ {
+ "player_id": int(x["player_id"]),
+ "balk": x["balk"],
+ "wild_pitch": x["wild_pitch"],
+ "hold": x["hold"],
+ "starter_rating": x["starter_rating"],
+ "relief_rating": x["relief_rating"],
+ "closer_rating": get_closer_rating(x["closer_rating"]),
+ "batting": x["batting"],
+ "hand": x["pitch_hand"].upper(),
+ }
+ ),
+ axis=1,
+ )
+ resp = await db_put("pitchingcards", payload={"cards": all_cards}, timeout=6)
if resp is not None:
pass
else:
- log_exception(ValueError, 'Unable to post pitcher cards')
-
- pc_query = await db_get('pitchingcards', params=[('cardset_id', CARDSET_ID)])
- if pc_query['count'] > 0:
- pc_data = pc_query['cards']
- if PLAYER_DESCRIPTION.lower() not in ['live', '1998']:
- pc_data = [x for x in pc_query['cards'] if x['player']['mlbplayer']['key_retro'] in PROMO_INCLUSION_RETRO_IDS]
+ log_exception(ValueError, "Unable to post pitcher cards")
+
+ pc_query = await db_get("pitchingcards", params=[("cardset_id", CARDSET_ID)])
+ if pc_query["count"] > 0:
+ pc_data = pc_query["cards"]
+ if PLAYER_DESCRIPTION.lower() not in ["live", "1998"]:
+ pc_data = [
+ x
+ for x in pc_query["cards"]
+ if x["player"]["mlbplayer"]["key_retro"] in PROMO_INCLUSION_RETRO_IDS
+ ]
for line in pc_data:
- line['player_id'] = line['player']['player_id']
- line['key_bbref'] = line['player']['bbref_id']
- line['pitchingcard_id'] = line['id']
+ line["player_id"] = line["player"]["player_id"]
+ line["key_bbref"] = line["player"]["bbref_id"]
+ line["pitchingcard_id"] = line["id"]
return pd.DataFrame(pc_data)
else:
- log_exception(ValueError, 'Unable to pull newly posted pitcher cards')
+ log_exception(ValueError, "Unable to pull newly posted pitcher cards")
async def post_batting_ratings(ratings_df: pd.DataFrame):
all_ratings = []
def append_ratings(row):
- vl = row['ratings_vL']
- vl['player_id'] = row['player_id']
- vl['battingcard_id'] = row['battingcard_id']
+ vl = row["ratings_vL"]
+ vl["player_id"] = row["player_id"]
+ vl["battingcard_id"] = row["battingcard_id"]
- vr = row['ratings_vR']
- vr['player_id'] = row['player_id']
- vr['battingcard_id'] = row['battingcard_id']
+ vr = row["ratings_vR"]
+ vr["player_id"] = row["player_id"]
+ vr["battingcard_id"] = row["battingcard_id"]
all_ratings.append(vl)
all_ratings.append(vr)
ratings_df.apply(append_ratings, axis=1)
- resp = await db_put('battingcardratings', payload={'ratings': all_ratings}, timeout=6)
+ resp = await db_put(
+ "battingcardratings", payload={"ratings": all_ratings}, timeout=6
+ )
if resp is not None:
return True
else:
- log_exception(ValueError, 'Unable to post batting ratings')
-
+ log_exception(ValueError, "Unable to post batting ratings")
+
async def post_pitching_ratings(ratings_df: pd.DataFrame):
all_ratings = []
def append_ratings(row):
- vl = row['ratings_vL']
- vl['player_id'] = row['player_id']
- vl['pitchingcard_id'] = row['pitchingcard_id']
+ vl = row["ratings_vL"]
+ vl["player_id"] = row["player_id"]
+ vl["pitchingcard_id"] = row["pitchingcard_id"]
- vr = row['ratings_vR']
- vr['player_id'] = row['player_id']
- vr['pitchingcard_id'] = row['pitchingcard_id']
+ vr = row["ratings_vR"]
+ vr["player_id"] = row["player_id"]
+ vr["pitchingcard_id"] = row["pitchingcard_id"]
all_ratings.append(vl)
all_ratings.append(vr)
ratings_df.apply(append_ratings, axis=1)
- resp = await db_put('pitchingcardratings', payload={'ratings': all_ratings}, timeout=6)
+ resp = await db_put(
+ "pitchingcardratings", payload={"ratings": all_ratings}, timeout=6
+ )
if resp is not None:
return True
else:
- log_exception(ValueError, 'Unable to post pitching ratings')
+ log_exception(ValueError, "Unable to post pitching ratings")
async def post_positions(pos_df: pd.DataFrame, delete_existing: bool = False):
@@ -1413,58 +2271,63 @@ async def post_positions(pos_df: pd.DataFrame, delete_existing: bool = False):
# (e.g., DH positions from buggy runs where outfielders had no defensive positions)
# Only delete on the first call (batters), not the second call (pitchers)
if delete_existing:
- player_ids = pos_df['player_id'].unique().tolist()
- logger.info(f'Deleting existing cardpositions for {len(player_ids)} players in current run')
- existing_positions = await db_get('cardpositions', params=[('cardset_id', CARDSET_ID)])
- if existing_positions and existing_positions.get('count', 0) > 0:
+ player_ids = pos_df["player_id"].unique().tolist()
+ logger.info(
+ f"Deleting existing cardpositions for {len(player_ids)} players in current run"
+ )
+ existing_positions = await db_get(
+ "cardpositions", params=[("cardset_id", CARDSET_ID)]
+ )
+ if existing_positions and existing_positions.get("count", 0) > 0:
deleted_count = 0
- for pos in existing_positions['positions']:
+ for pos in existing_positions["positions"]:
# Only delete positions for players being processed in this run
- if pos['player']['player_id'] in player_ids:
+ if pos["player"]["player_id"] in player_ids:
try:
- await db_delete('cardpositions', object_id=pos['id'], timeout=1)
+ await db_delete("cardpositions", object_id=pos["id"], timeout=1)
deleted_count += 1
except Exception as e:
- logger.warning(f'Failed to delete cardposition {pos["id"]}: {e}')
- logger.info(f'Deleted {deleted_count} positions for players in current run')
+ logger.warning(
+ f'Failed to delete cardposition {pos["id"]}: {e}'
+ )
+ logger.info(f"Deleted {deleted_count} positions for players in current run")
all_pos = []
def append_positions(row):
clean_row = row.dropna()
new_val = clean_row.to_dict()
- new_val['player_id'] = int(row['player_id'])
+ new_val["player_id"] = int(row["player_id"])
all_pos.append(new_val)
+
pos_df.apply(append_positions, axis=1)
- resp = await db_put('cardpositions', payload={'positions': all_pos}, timeout=6)
+ resp = await db_put("cardpositions", payload={"positions": all_pos}, timeout=6)
if resp is not None:
return True
else:
- log_exception(ValueError, 'Unable to post positions')
+ log_exception(ValueError, "Unable to post positions")
-async def post_batter_data(bs: pd.DataFrame, bc: pd.DataFrame, br: pd.DataFrame, dr: pd.DataFrame) -> int:
+async def post_batter_data(
+ bs: pd.DataFrame, bc: pd.DataFrame, br: pd.DataFrame, dr: pd.DataFrame
+) -> int:
all_players = await get_or_post_players(bstat_df=bs, bat_rat_df=br, def_rat_df=dr)
-
+
# Post Batting Cards
bc = pd.merge(
- left=bc,
- right=all_players,
- how='left',
- left_on='key_bbref',
- right_on='bbref_id'
+ left=bc, right=all_players, how="left", left_on="key_bbref", right_on="bbref_id"
)
bc = await post_batting_cards(bc)
-
+
# Post Batting Ratings
# Only merge the columns we need to avoid corrupting dict columns in br
br = pd.merge(
left=br,
- right=bc[['key_bbref', 'player_id', 'battingcard_id']],
- how='left',
- left_on='key_bbref',
- right_on='key_bbref'
+ right=bc[["key_bbref", "player_id", "battingcard_id"]],
+ how="left",
+ left_on="key_bbref",
+ right_on="key_bbref",
)
br = await post_batting_ratings(br)
@@ -1472,23 +2335,21 @@ async def post_batter_data(bs: pd.DataFrame, bc: pd.DataFrame, br: pd.DataFrame,
dr = pd.merge(
left=dr,
right=all_players,
- how='right', # 'left',
- left_on='key_bbref',
- right_on='bbref_id'
+ how="right", # 'left',
+ left_on="key_bbref",
+ right_on="bbref_id",
)
await post_positions(dr, delete_existing=True) # Delete on first call (batters)
return len(all_players)
-async def post_pitcher_data(ps: pd.DataFrame, pc: pd.DataFrame, pr: pd.DataFrame, dr: pd.DataFrame) -> int:
+async def post_pitcher_data(
+ ps: pd.DataFrame, pc: pd.DataFrame, pr: pd.DataFrame, dr: pd.DataFrame
+) -> int:
all_players = await get_or_post_players(pstat_df=ps, pit_rat_df=pr, def_rat_df=dr)
ps = pd.merge(
- left=all_players,
- right=ps,
- how='left',
- left_on='bbref_id',
- right_on='key_bbref'
+ left=all_players, right=ps, how="left", left_on="bbref_id", right_on="key_bbref"
)
# Post Pitching Cards
@@ -1498,35 +2359,41 @@ async def post_pitcher_data(ps: pd.DataFrame, pc: pd.DataFrame, pr: pd.DataFrame
# Only merge the columns we need to avoid corrupting dict columns in pr
pr = pd.merge(
left=pr,
- right=pc[['key_bbref', 'player_id', 'pitchingcard_id']],
- how='left',
- left_on='key_bbref',
- right_on='key_bbref'
+ right=pc[["key_bbref", "player_id", "pitchingcard_id"]],
+ how="left",
+ left_on="key_bbref",
+ right_on="key_bbref",
)
pr = await post_pitching_ratings(pr)
# Post Positions
dr = pd.merge(
- left=all_players,
- right=dr,
- how='left',
- left_on='bbref_id',
- right_on='key_bbref'
+ left=all_players, right=dr, how="left", left_on="bbref_id", right_on="key_bbref"
)
- await post_positions(dr, delete_existing=False) # Don't delete on second call (pitchers)
+ await post_positions(
+ dr, delete_existing=False
+ ) # Don't delete on second call (pitchers)
return len(all_players)
-
-async def run_batters(data_input_path: str, start_date: int, end_date: int, post_data: bool = False, season_pct: float = 1.0):
- print(f'Running the batter calcs...')
+
+async def run_batters(
+ data_input_path: str,
+ start_date: int,
+ end_date: int,
+ post_data: bool = False,
+ season_pct: float = 1.0,
+):
+ print(f"Running the batter calcs...")
# batter_start = datetime.datetime.now()
# Get batting stats
- batting_stats = get_batting_stats_by_date(f'{RETRO_FILE_PATH}{EVENTS_FILENAME}', start_date=start_date, end_date=end_date)
+ batting_stats = get_batting_stats_by_date(
+ f"{RETRO_FILE_PATH}{EVENTS_FILENAME}", start_date=start_date, end_date=end_date
+ )
bs_len = len(batting_stats)
- # end_calc = datetime.datetime.now()
+ # end_calc = datetime.datetime.now()
# print(f'Combined batting stats: {(end_calc - batter_start).total_seconds():.2f}s\n')
running_start = datetime.datetime.now()
@@ -1536,74 +2403,91 @@ async def run_batters(data_input_path: str, start_date: int, end_date: int, post
batting_stats = pd.merge(
left=batting_stats,
right=running_stats,
- how='left',
- left_on='key_bbref',
- right_on='key_bbref'
+ how="left",
+ left_on="key_bbref",
+ right_on="key_bbref",
)
# Handle players who played for multiple teams - keep only highest-level combined totals
# Players traded during season have multiple rows: one per team + one combined (2TM, 3TM, etc.)
# Prefer: 3TM > 2TM > TOT > individual teams
- duplicated_mask = batting_stats['key_bbref'].duplicated(keep=False)
+ duplicated_mask = batting_stats["key_bbref"].duplicated(keep=False)
if duplicated_mask.any():
# Sort by Tm (descending) to prioritize higher-numbered combined totals (3TM > 2TM)
# Then drop duplicates, keeping only the first (highest priority) row per player
- batting_stats = batting_stats.sort_values('Tm', ascending=False)
- batting_stats = batting_stats.drop_duplicates(subset='key_bbref', keep='first')
+ batting_stats = batting_stats.sort_values("Tm", ascending=False)
+ batting_stats = batting_stats.drop_duplicates(subset="key_bbref", keep="first")
logger.info("Removed team-specific rows for traded batters")
bs_len = len(batting_stats) # Update length after removing duplicates
end_calc = datetime.datetime.now()
- print(f'Running stats: {(end_calc - running_start).total_seconds():.2f}s')
+ print(f"Running stats: {(end_calc - running_start).total_seconds():.2f}s")
if len(batting_stats) != bs_len:
- raise DataMismatchError(f'retrosheet_data - run_batters - We started with {bs_len} batting lines and have {len(batting_stats)} after merging with running_stats')
-
+ raise DataMismatchError(
+ f"retrosheet_data - run_batters - We started with {bs_len} batting lines and have {len(batting_stats)} after merging with running_stats"
+ )
+
+ # Resolve offense_col for card layout builder
+ batting_stats = await resolve_offense_cols(batting_stats, api_available=post_data)
+
# Calculate batting cards
card_start = datetime.datetime.now()
all_batting_cards = calc_batting_cards(batting_stats, season_pct)
card_end = datetime.datetime.now()
- print(f'Create batting cards: {(card_end - card_start).total_seconds():.2f}s')
+ print(f"Create batting cards: {(card_end - card_start).total_seconds():.2f}s")
# Calculate batting ratings
rating_start = datetime.datetime.now()
- batting_stats['battingcard_id'] = batting_stats['key_fangraphs']
+ batting_stats["battingcard_id"] = batting_stats["key_fangraphs"]
all_batting_ratings = calc_batter_ratings(batting_stats)
rating_end = datetime.datetime.now()
- print(f'Create batting ratings: {(rating_end - rating_start).total_seconds():.2f}s')
+ print(f"Create batting ratings: {(rating_end - rating_start).total_seconds():.2f}s")
# Calculate defense ratings
defense_start = datetime.datetime.now()
all_defense_ratings = calc_positions(batting_stats)
defense_end = datetime.datetime.now()
- print(f'Create defense ratings: {(defense_end - defense_start).total_seconds():.2f}s')
+ print(
+ f"Create defense ratings: {(defense_end - defense_start).total_seconds():.2f}s"
+ )
# Post all data
if post_data:
- print(f'Posting player data...')
+ print(f"Posting player data...")
post_start = datetime.datetime.now()
- num_players = await post_batter_data(batting_stats, all_batting_cards, all_batting_ratings, all_defense_ratings)
+ num_players = await post_batter_data(
+ batting_stats, all_batting_cards, all_batting_ratings, all_defense_ratings
+ )
post_end = datetime.datetime.now()
- print(f'Post player data: {(post_end - post_start).total_seconds()}s')
+ print(f"Post player data: {(post_end - post_start).total_seconds()}s")
- post_msg = f'Posted {num_players} players to the database'
+ post_msg = f"Posted {num_players} players to the database"
logger.info(post_msg)
print(post_msg)
else:
- post_msg = f'{batting_stats.index.size} total batters\n\nPlayers are NOT being posted to the database'
+ post_msg = f"{batting_stats.index.size} total batters\n\nPlayers are NOT being posted to the database"
logger.warning(post_msg)
print(post_msg)
return batting_stats
-async def run_pitchers(data_input_path: str, start_date: int, end_date: int, post_data: bool = False, season_pct: float = 1.0):
+async def run_pitchers(
+ data_input_path: str,
+ start_date: int,
+ end_date: int,
+ post_data: bool = False,
+ season_pct: float = 1.0,
+):
# Get pitching stats
- pitching_stats = get_pitching_stats_by_date(f'{RETRO_FILE_PATH}{EVENTS_FILENAME}', start_date=start_date, end_date=end_date)
+ pitching_stats = get_pitching_stats_by_date(
+ f"{RETRO_FILE_PATH}{EVENTS_FILENAME}", start_date=start_date, end_date=end_date
+ )
# Get peripheral stats
start_time = datetime.datetime.now()
@@ -1612,42 +2496,47 @@ async def run_pitchers(data_input_path: str, start_date: int, end_date: int, pos
pitching_stats = pd.merge(
left=pitching_stats,
right=periph_stats,
- how='left',
- left_on='key_bbref',
- right_on='key_bbref'
+ how="left",
+ left_on="key_bbref",
+ right_on="key_bbref",
)
# Handle players who played for multiple teams - keep only highest-level combined totals
# Players traded during season have multiple rows: one per team + one combined (2TM, 3TM, etc.)
# Prefer: 3TM > 2TM > TOT > individual teams
- duplicated_mask = pitching_stats['key_bbref'].duplicated(keep=False)
+ duplicated_mask = pitching_stats["key_bbref"].duplicated(keep=False)
if duplicated_mask.any():
# Sort by Tm (descending) to prioritize higher-numbered combined totals (3TM > 2TM)
# Then drop duplicates, keeping only the first (highest priority) row per player
- pitching_stats = pitching_stats.sort_values('Tm', ascending=False)
- pitching_stats = pitching_stats.drop_duplicates(subset='key_bbref', keep='first')
+ pitching_stats = pitching_stats.sort_values("Tm", ascending=False)
+ pitching_stats = pitching_stats.drop_duplicates(
+ subset="key_bbref", keep="first"
+ )
logger.info(f"Removed team-specific rows for traded players")
end_time = datetime.datetime.now()
- print(f'Peripheral stats: {(end_time - start_time).total_seconds():.2f}s')
+ print(f"Peripheral stats: {(end_time - start_time).total_seconds():.2f}s")
# Calculate defense ratings
start_time = datetime.datetime.now()
- df_p = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_p.csv').set_index('key_bbref')
+ df_p = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_p.csv").set_index("key_bbref")
# Drop 'Tm' from defense data to avoid column name conflicts (we already have it from periph_stats)
- if 'Tm' in df_p.columns:
- df_p = df_p.drop(columns=['Tm'])
+ if "Tm" in df_p.columns:
+ df_p = df_p.drop(columns=["Tm"])
pitching_stats = pd.merge(
left=pitching_stats,
right=df_p,
- how='left',
- left_on='key_bbref',
- right_on='key_bbref'
+ how="left",
+ left_on="key_bbref",
+ right_on="key_bbref",
)
pitching_stats = pitching_stats.fillna(0)
all_defense_ratings = calc_pitcher_defense(pitching_stats)
end_time = datetime.datetime.now()
- print(f'Defense stats: {(end_time - start_time).total_seconds():.2f}s')
+ print(f"Defense stats: {(end_time - start_time).total_seconds():.2f}s")
+
+ # Resolve offense_col for card layout builder
+ pitching_stats = await resolve_offense_cols(pitching_stats, api_available=post_data)
# Calculate pitching cards
start_time = datetime.datetime.now()
@@ -1655,43 +2544,48 @@ async def run_pitchers(data_input_path: str, start_date: int, end_date: int, pos
pitching_stats = pd.merge(
left=pitching_stats,
right=all_pitching_cards,
- how='left',
- left_on='key_bbref',
- right_on='key_bbref'
+ how="left",
+ left_on="key_bbref",
+ right_on="key_bbref",
)
end_time = datetime.datetime.now()
- print(f'Pit cards: {(end_time - start_time).total_seconds():.2f}s')
+ print(f"Pit cards: {(end_time - start_time).total_seconds():.2f}s")
# Calculate pitching card ratings
start_time = datetime.datetime.now()
all_pitching_ratings = calc_pitcher_ratings(pitching_stats)
end_time = datetime.datetime.now()
- print(f'Pit ratings: {(end_time - start_time).total_seconds():.2f}s')
+ print(f"Pit ratings: {(end_time - start_time).total_seconds():.2f}s")
# Post all data
if post_data:
- print(f'\nPosting player data...')
+ print(f"\nPosting player data...")
post_start = datetime.datetime.now()
- num_players = await post_pitcher_data(pitching_stats, all_pitching_cards, all_pitching_ratings, all_defense_ratings)
+ num_players = await post_pitcher_data(
+ pitching_stats,
+ all_pitching_cards,
+ all_pitching_ratings,
+ all_defense_ratings,
+ )
post_end = datetime.datetime.now()
- print(f'Post player data: {(post_end - post_start).total_seconds()}s')
+ print(f"Post player data: {(post_end - post_start).total_seconds()}s")
- post_msg = f'\nPosted {num_players} pitchers to the database'
+ post_msg = f"\nPosted {num_players} pitchers to the database"
logger.info(post_msg)
print(post_msg)
else:
- post_msg = f'{pitching_stats.index.size} total pitchers\n\nPlayers are NOT being posted to the database'
+ post_msg = f"{pitching_stats.index.size} total pitchers\n\nPlayers are NOT being posted to the database"
logger.warning(post_msg)
print(post_msg)
-
+
return pitching_stats
async def main(args):
- if len(PROMO_INCLUSION_RETRO_IDS) > 0 and PLAYER_DESCRIPTION == 'Live':
- msg = f'Player description is set to *Live*, but there are {len(PROMO_INCLUSION_RETRO_IDS)} IDs in the promo inclusion list. Clear the promo list or change the player description.'
- log_exception(ValueError, msg=msg, level='error')
+ if len(PROMO_INCLUSION_RETRO_IDS) > 0 and PLAYER_DESCRIPTION == "Live":
+ msg = f"Player description is set to *Live*, but there are {len(PROMO_INCLUSION_RETRO_IDS)} IDs in the promo inclusion list. Clear the promo list or change the player description."
+ log_exception(ValueError, msg=msg, level="error")
# Temporarily commented out for Ryan Zimmerman full season run
# if weeks_between(START_DATE, END_DATE) > 5 and len(PROMO_INCLUSION_RETRO_IDS) > 0:
@@ -1699,21 +2593,33 @@ async def main(args):
# log_exception(ValueError, msg=msg, level='error')
batter_start = datetime.datetime.now()
- batting_stats = await run_batters(f'{DATA_INPUT_FILE_PATH}', start_date=START_DATE, end_date=END_DATE, post_data=POST_DATA, season_pct=SEASON_PCT)
- batting_stats.to_csv(f'batting_stats.csv')
+ batting_stats = await run_batters(
+ f"{DATA_INPUT_FILE_PATH}",
+ start_date=START_DATE,
+ end_date=END_DATE,
+ post_data=POST_DATA,
+ season_pct=SEASON_PCT,
+ )
+ batting_stats.to_csv(f"batting_stats.csv")
batter_end = datetime.datetime.now()
- print(f'\nBatter time: {(batter_end - batter_start).total_seconds():.2f}s\n')
+ print(f"\nBatter time: {(batter_end - batter_start).total_seconds():.2f}s\n")
pitcher_start = datetime.datetime.now()
- pitching_stats = await run_pitchers(f'{DATA_INPUT_FILE_PATH}', start_date=START_DATE, end_date=END_DATE, post_data=POST_DATA, season_pct=SEASON_PCT)
- pitching_stats.to_csv(f'pitching_stats.csv')
+ pitching_stats = await run_pitchers(
+ f"{DATA_INPUT_FILE_PATH}",
+ start_date=START_DATE,
+ end_date=END_DATE,
+ post_data=POST_DATA,
+ season_pct=SEASON_PCT,
+ )
+ pitching_stats.to_csv(f"pitching_stats.csv")
pitcher_end = datetime.datetime.now()
- print(f'\nPitcher time: {(pitcher_end - pitcher_start).total_seconds():.2f}s')
+ print(f"\nPitcher time: {(pitcher_end - pitcher_start).total_seconds():.2f}s")
- print(f'Total: {(pitcher_end - batter_start).total_seconds():.2f}s\n\nDone!')
+ print(f"Total: {(pitcher_end - batter_start).total_seconds():.2f}s\n\nDone!")
# await store_defense_to_csv(1998)
-if __name__ == '__main__':
+if __name__ == "__main__":
asyncio.run(main(sys.argv[1:]))
diff --git a/tests/test_batter_calcs.py b/tests/test_batter_calcs.py
index ce346c6..b40046c 100644
--- a/tests/test_batter_calcs.py
+++ b/tests/test_batter_calcs.py
@@ -1,7 +1,7 @@
from decimal import ROUND_HALF_EVEN, Decimal
import math
-from batters.calcs_batter import bp_singles, wh_singles
+from batters.models import bp_singles, wh_singles
diff --git a/tests/test_rate_stats_formulas.py b/tests/test_rate_stats_formulas.py
new file mode 100644
index 0000000..3582c26
--- /dev/null
+++ b/tests/test_rate_stats_formulas.py
@@ -0,0 +1,90 @@
+from batters.models import BattingCardRatingsModel
+from pitchers.models import PitchingCardRatingsModel
+from creation_helpers import mround
+
+
+def test_batting_model_slg_formula_matches_canonical_weights():
+ ratings = BattingCardRatingsModel(
+ battingcard_id=1,
+ bat_hand='R',
+ vs_hand='R',
+ hard_rate=0.3,
+ med_rate=0.3,
+ soft_rate=0.3,
+ pull_rate=0.3,
+ center_rate=0.3,
+ slap_rate=0.3,
+ homerun=1,
+ bp_homerun=2,
+ triple=3,
+ double_three=4,
+ double_two=5,
+ double_pull=6,
+ single_two=7,
+ single_one=8,
+ single_center=9,
+ bp_single=10,
+ )
+
+ ratings.calculate_rate_stats()
+
+ expected = mround(
+ (
+ ratings.homerun * 4
+ + ratings.bp_homerun * 2
+ + ratings.triple * 3
+ + ratings.double_three * 2
+ + ratings.double_two * 2
+ + ratings.double_pull * 2
+ + ratings.single_two
+ + ratings.single_one
+ + ratings.single_center
+ + ratings.bp_single / 2
+ ) / 108,
+ prec=5,
+ base=0.00001,
+ )
+
+ assert ratings.slg == expected
+
+
+def test_pitching_model_slg_formula_matches_canonical_weights():
+ ratings = PitchingCardRatingsModel(
+ pitchingcard_id=1,
+ pit_hand='R',
+ vs_hand='R',
+ hard_rate=0.3,
+ med_rate=0.3,
+ soft_rate=0.3,
+ homerun=1,
+ bp_homerun=2,
+ triple=3,
+ double_three=4,
+ double_two=5,
+ double_cf=6,
+ single_two=7,
+ single_one=8,
+ single_center=9,
+ bp_single=10,
+ )
+
+ ratings.calculate_rate_stats()
+
+ expected = mround(
+ (
+ ratings.homerun * 4
+ + ratings.bp_homerun * 2
+ + ratings.triple * 3
+ + ratings.double_three * 2
+ + ratings.double_two * 2
+ + ratings.double_cf * 2
+ + ratings.single_two
+ + ratings.single_one
+ + ratings.single_center
+ + ratings.bp_single / 2
+ ) / 108,
+ prec=5,
+ base=0.00001,
+ )
+
+ assert ratings.slg == expected