This commit adds support for the new Retrosheet CSV format and resolves multiple data processing issues in retrosheet_data.py. New Features: - Created retrosheet_transformer.py with smart caching system - Transforms new Retrosheet CSV format to legacy format - Checks file timestamps to avoid redundant transformations - Caches normalized data for instant subsequent loads (~5s → <1s) - Handles column mapping: gid→game_id, bathand→batter_hand, etc. - Derives event_type from multiple boolean columns - Converts handedness values R/L → r/l - Explicitly sets string dtypes for hit_val, hit_location, batted_ball_type Configuration Updates: - Updated retrosheet_data.py for 2005 season data - START_DATE: 19980301 → 20050403 (2005 Opening Day) - END_DATE: 19980430 → 20051002 (2005 Regular Season End) - SEASON_PCT: 28/162 → 162/162 (full season) - MIN_PA_VL/VR: 20/40 → 50/75 (full season minimums) - CARDSET_ID: Updated for 2005 cardsets - EVENTS_FILENAME: Updated to use retrosheets_events_2005.csv Bug Fixes: 1. Multi-team player duplicates - Players traded during season had duplicate rows (one per team + combined) - Added filtering to keep only combined totals (2TM, 3TM, etc.) - Prevents duplicate key_bbref values in ratings dataframes 2. Column name conflicts - Fixed Tm column conflict when merging periph_stats and defense_p - Drop duplicate Tm from defense data before merge 3. Pitcher rating calculations (pitchers/calcs_pitcher.py) - Fixed "truth value is ambiguous" error in min() comparisons - Explicitly convert pandas values to float before min() operations 4. Dictionary column corruption in ratings - Fixed ratings_vL and ratings_vR corruption during DataFrame merges - Only merge specific columns (key_bbref, player_id, card_id) instead of full DataFrame - Removed unnecessary .set_index() calls from post_batting_cards() and post_pitching_cards() Documentation: - Updated CLAUDE.md with comprehensive troubleshooting section - Added Retrosheet transformation documentation - Documented defense CSV requirements and column naming - Added configuration checklist for retrosheet_data.py - Documented common issues: multi-team players, dictionary corruption, string types 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
611 lines
22 KiB
Python
611 lines
22 KiB
Python
import math
|
|
|
|
import pydantic
|
|
|
|
from creation_helpers import mround, sanitize_chance_output
|
|
from typing import List, Literal
|
|
from exceptions import logger
|
|
|
|
|
|
class PitchingCardRatingsModel(pydantic.BaseModel):
|
|
pitchingcard_id: int
|
|
pit_hand: Literal['R', 'L']
|
|
vs_hand: Literal['R', 'L']
|
|
all_hits: float = 0.0
|
|
all_other_ob: float = 0.0
|
|
all_outs: float = 0.0
|
|
rem_singles: float = 0.0
|
|
rem_xbh: float = 0.0
|
|
rem_hr: float = 0.0
|
|
rem_doubles: float = 0.0
|
|
hard_rate: float
|
|
med_rate: float
|
|
soft_rate: float
|
|
# pull_rate: float
|
|
# center_rate: float
|
|
# slap_rate: float
|
|
homerun: float = 0.0
|
|
bp_homerun: float = 0.0
|
|
triple: float = 0.0
|
|
double_three: float = 0.0
|
|
double_two: float = 0.0
|
|
double_cf: float = 0.0
|
|
single_two: float = 0.0
|
|
single_one: float = 0.0
|
|
single_center: float = 0.0
|
|
bp_single: float = 0.0
|
|
hbp: float = 0.0
|
|
walk: float = 0.0
|
|
strikeout: float = 0.0
|
|
rem_flyballs: float = 0.0
|
|
flyout_lf_b: float = 0.0
|
|
flyout_cf_b: float = 0.0
|
|
flyout_rf_b: float = 0.0
|
|
rem_groundballs: float = 0.0
|
|
groundout_a: float = 0.0
|
|
groundout_b: float = 0.0
|
|
xcheck_p: float = float(1.0)
|
|
xcheck_c: float = float(3.0)
|
|
xcheck_1b: float = float(2.0)
|
|
xcheck_2b: float = float(6.0)
|
|
xcheck_3b: float = float(3.0)
|
|
xcheck_ss: float = float(7.0)
|
|
xcheck_lf: float = float(2.0)
|
|
xcheck_cf: float = float(3.0)
|
|
xcheck_rf: float = float(2.0)
|
|
avg: float = 0.0
|
|
obp: float = 0.0
|
|
slg: float = 0.0
|
|
|
|
def total_chances(self):
|
|
return mround(sum([
|
|
self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
|
|
self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk, self.strikeout,
|
|
self.flyout_lf_b, self.flyout_cf_b, self.flyout_rf_b, self.groundout_a, self.groundout_b, self.xcheck_p,
|
|
self.xcheck_c, self.xcheck_1b, self.xcheck_2b, self.xcheck_3b, self.xcheck_ss, self.xcheck_lf,
|
|
self.xcheck_cf, self.xcheck_rf
|
|
]))
|
|
|
|
def total_hits(self):
|
|
return mround(sum([
|
|
self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
|
|
self.single_two, self.single_one, self.single_center, self.bp_single
|
|
]))
|
|
|
|
def total_ob(self):
|
|
return mround(sum([
|
|
self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
|
|
self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk
|
|
]))
|
|
|
|
def total_outs(self):
|
|
return mround(sum([
|
|
self.strikeout, self.flyout_lf_b, self.flyout_cf_b, self.flyout_rf_b, self.groundout_a, self.groundout_b,
|
|
self.xcheck_p, self.xcheck_c, self.xcheck_1b, self.xcheck_2b, self.xcheck_3b, self.xcheck_ss,
|
|
self.xcheck_lf, self.xcheck_cf, self.xcheck_rf
|
|
]))
|
|
|
|
def calculate_rate_stats(self):
|
|
self.avg = mround(self.total_hits() / 108, prec=5, base=0.00001)
|
|
self.obp = mround((self.total_hits() + self.hbp + self.walk) / 108, prec=5, base=0.00001)
|
|
self.slg = mround((
|
|
self.homerun * 4 + self.triple * 3 + self.single_center + self.single_two + self.single_two +
|
|
(self.double_two + self.double_three + self.double_two + self.bp_homerun) * 2 + self.bp_single / 2) / 108, prec=5, base=0.00001)
|
|
|
|
def custom_to_dict(self):
|
|
self.calculate_rate_stats()
|
|
return {
|
|
'pitchingcard_id': self.pitchingcard_id,
|
|
'vs_hand': self.vs_hand,
|
|
'homerun': self.homerun,
|
|
'bp_homerun': self.bp_homerun,
|
|
'triple': self.triple,
|
|
'double_three': self.double_three,
|
|
'double_two': self.double_two,
|
|
'double_cf': self.double_cf,
|
|
'single_two': self.single_two,
|
|
'single_one': self.single_one,
|
|
'single_center': self.single_center,
|
|
'bp_single': self.bp_single,
|
|
'hbp': self.hbp,
|
|
'walk': self.walk,
|
|
'strikeout': self.strikeout,
|
|
'flyout_lf_b': self.flyout_lf_b,
|
|
'flyout_cf_b': self.flyout_cf_b,
|
|
'flyout_rf_b': self.flyout_rf_b,
|
|
'groundout_a': self.groundout_a,
|
|
'groundout_b': self.groundout_b,
|
|
'xcheck_p': self.xcheck_p,
|
|
'xcheck_c': self.xcheck_c,
|
|
'xcheck_1b': self.xcheck_1b,
|
|
'xcheck_2b': self.xcheck_2b,
|
|
'xcheck_3b': self.xcheck_3b,
|
|
'xcheck_ss': self.xcheck_ss,
|
|
'xcheck_lf': self.xcheck_lf,
|
|
'xcheck_cf': self.xcheck_cf,
|
|
'xcheck_rf': self.xcheck_rf,
|
|
'avg': self.avg,
|
|
'obp': self.obp,
|
|
'slg': self.slg
|
|
}
|
|
|
|
def calculate_singles(self, szn_hits, szn_singles):
|
|
if szn_hits == 0:
|
|
return
|
|
|
|
tot = sanitize_chance_output(self.all_hits * (szn_singles / szn_hits))
|
|
logger.debug(f'total singles: {tot}')
|
|
self.rem_singles = tot
|
|
|
|
self.bp_single = 5.0 if self.rem_singles >= 5 else 0.0
|
|
self.rem_singles -= self.bp_single
|
|
|
|
self.single_two = sanitize_chance_output(self.rem_singles / 2) if self.hard_rate >= 0.2 else 0.0
|
|
self.rem_singles -= self.single_two
|
|
|
|
self.single_one = sanitize_chance_output(self.rem_singles) if self.soft_rate >= .2 else 0.0
|
|
self.rem_singles -= self.single_one
|
|
|
|
self.single_center = sanitize_chance_output(self.rem_singles)
|
|
self.rem_singles -= self.single_center
|
|
|
|
self.rem_xbh = self.all_hits - self.single_center - self.single_one - self.single_two - self.bp_single
|
|
logger.info(f'remaining singles: {self.rem_singles} / total xbh: {self.rem_xbh}')
|
|
|
|
def calculate_xbh(self, szn_doubles, szn_triples, szn_homeruns, hr_per_fb_rate):
|
|
szn_xbh = szn_doubles + szn_triples + szn_homeruns
|
|
if szn_xbh == 0:
|
|
return
|
|
|
|
hr_rate = mround(szn_homeruns / szn_xbh)
|
|
tr_rate = mround(szn_triples / szn_xbh)
|
|
do_rate = mround(szn_doubles / szn_xbh)
|
|
logger.info(f'hr%: {hr_rate:.2f} / tr%: {tr_rate:.2f} / do%: {do_rate:.2f}')
|
|
|
|
raw_do_chances = sanitize_chance_output(self.rem_xbh * do_rate)
|
|
logger.info(f'raw do chances: {raw_do_chances}')
|
|
self.double_two = raw_do_chances if self.soft_rate > .2 else 0.0
|
|
self.double_cf = mround(raw_do_chances - self.double_two)
|
|
self.rem_xbh -= mround(self.double_two + self.double_cf + self.double_three)
|
|
logger.info(f'Double**: {self.double_two} / Double(cf): {self.double_cf} / rem xbh: {self.rem_xbh}')
|
|
|
|
self.triple = sanitize_chance_output(self.rem_xbh * tr_rate)
|
|
self.rem_xbh = mround(self.rem_xbh - self.triple)
|
|
logger.info(f'Triple: {self.triple} / rem xbh: {self.rem_xbh}')
|
|
|
|
raw_hr_chances = self.rem_xbh
|
|
logger.info(f'raw hr chances: {raw_hr_chances}')
|
|
|
|
if hr_per_fb_rate < .08:
|
|
self.bp_homerun = sanitize_chance_output(raw_hr_chances, min_chances=1.0, rounding=1.0)
|
|
elif hr_per_fb_rate > .28:
|
|
self.homerun = raw_hr_chances
|
|
elif hr_per_fb_rate > .18:
|
|
self.bp_homerun = sanitize_chance_output(raw_hr_chances * 0.4, min_chances=1.0, rounding=1.0)
|
|
self.homerun = self.rem_xbh - self.bp_homerun
|
|
else:
|
|
self.bp_homerun = sanitize_chance_output(raw_hr_chances * .75, min_chances=1.0, rounding=1.0)
|
|
self.homerun = mround(self.rem_xbh - self.bp_homerun)
|
|
logger.info(f'BP HR: {self.bp_homerun} / ND HR: {self.homerun}')
|
|
|
|
self.rem_xbh -= (self.bp_homerun + self.homerun)
|
|
logger.info(f'excess xbh: {self.rem_xbh}')
|
|
|
|
if self.rem_xbh > 0:
|
|
if self.triple > 1:
|
|
logger.info(f'Passing {self.rem_xbh} xbh to triple')
|
|
self.triple += self.rem_xbh
|
|
self.rem_xbh = 0.0
|
|
elif self.double_cf > 1:
|
|
logger.info(f'Passing {self.rem_xbh} xbh to double(cf)')
|
|
self.double_cf += self.rem_xbh
|
|
self.rem_xbh = 0.0
|
|
elif self.double_two > 1:
|
|
logger.info(f'Passing {self.rem_xbh} xbh to double**')
|
|
self.double_two += self.rem_xbh
|
|
self.rem_xbh = 0.0
|
|
elif self.single_two > 1:
|
|
logger.info(f'Passing {self.rem_xbh} xbh to single**')
|
|
self.single_two += self.rem_xbh
|
|
self.rem_xbh = 0.0
|
|
elif self.single_center > 1:
|
|
logger.info(f'Passing {self.rem_xbh} xbh to single(cf)')
|
|
self.single_center += self.rem_xbh
|
|
self.rem_xbh = 0.0
|
|
elif self.single_one > 1:
|
|
logger.info(f'Passing {self.rem_xbh} xbh to single*')
|
|
self.single_one += self.rem_xbh
|
|
self.rem_xbh = 0.0
|
|
else:
|
|
logger.info(f'Passing {self.rem_xbh} xbh to other_ob')
|
|
self.all_other_ob += self.rem_xbh
|
|
|
|
def calculate_other_ob(self, szn_walks, szn_hbp):
|
|
if szn_walks + szn_hbp == 0:
|
|
return
|
|
|
|
this_hbp = sanitize_chance_output(self.all_other_ob * szn_hbp / (szn_walks + szn_hbp), rounding=1.0)
|
|
logger.info(f'hbp value candidate: {this_hbp} / all_other_ob: {self.all_other_ob}')
|
|
self.hbp = max(min(this_hbp, self.all_other_ob), 0)
|
|
self.walk = mround(self.all_other_ob - self.hbp)
|
|
logger.info(f'self.hbp: {self.hbp} / self.walk: {self.walk}')
|
|
|
|
def calculate_strikouts(self, szn_strikeouts, szn_ab, szn_hits):
|
|
denom = max(szn_ab - szn_hits, 1)
|
|
raw_so = sanitize_chance_output(self.all_outs * (szn_strikeouts * 1.2) / denom)
|
|
sum_bb_so = self.walk + raw_so
|
|
excess = sum_bb_so - mround(math.floor(sum_bb_so))
|
|
logger.info(f'raw_so: {raw_so} / sum_bb_so: {sum_bb_so} / excess: {excess}')
|
|
|
|
self.strikeout = max(raw_so - excess - .05, 0.0)
|
|
if self.strikeout < 0:
|
|
logger.error(f'Strikeouts are less than zero :confusedpsyduck:')
|
|
|
|
def calculate_other_outs(self, fb_pct, gb_pct, oppo_pct):
|
|
rem_outs = 108 - self.total_chances()
|
|
|
|
all_fo = sanitize_chance_output(rem_outs * fb_pct)
|
|
if self.pit_hand == 'L':
|
|
self.flyout_lf_b = sanitize_chance_output(all_fo * oppo_pct)
|
|
else:
|
|
self.flyout_rf_b = sanitize_chance_output(all_fo * oppo_pct)
|
|
self.flyout_cf_b = all_fo - self.flyout_lf_b - self.flyout_rf_b
|
|
rem_outs -= (self.flyout_lf_b + self.flyout_cf_b + self.flyout_rf_b)
|
|
|
|
all_gb = rem_outs
|
|
self.groundout_a = sanitize_chance_output(all_gb * self.soft_rate)
|
|
self.groundout_b = sanitize_chance_output(all_gb - self.groundout_a)
|
|
|
|
rem_chances = 108 - self.total_chances()
|
|
logger.info(f'Remaining outs: {rem_chances}')
|
|
|
|
if self.strikeout > 1:
|
|
logger.info(f'Passing {rem_chances} outs to strikeouts')
|
|
self.strikeout += rem_chances
|
|
elif self.flyout_cf_b > 1:
|
|
logger.info(f'Passing {rem_chances} outs to fly(cf)')
|
|
self.flyout_cf_b += rem_chances
|
|
elif self.flyout_rf_b > 1:
|
|
logger.info(f'Passing {rem_chances} outs to fly(rf)')
|
|
self.flyout_rf_b += rem_chances
|
|
elif self.flyout_lf_b > 1:
|
|
logger.info(f'Passing {rem_chances} outs to fly(lf)')
|
|
self.flyout_lf_b += rem_chances
|
|
elif self.groundout_a > 1:
|
|
logger.info(f'Passing {rem_chances} outs to gbA')
|
|
self.groundout_a += rem_chances
|
|
elif self.single_one > 1:
|
|
logger.info(f'Passing {rem_chances} outs to single*')
|
|
self.single_one += rem_chances
|
|
elif self.single_center > 1:
|
|
logger.info(f'Passing {rem_chances} outs to single(cf)')
|
|
self.single_center += rem_chances
|
|
elif self.single_two > 1:
|
|
logger.info(f'Passing {rem_chances} outs to single**')
|
|
self.single_two += rem_chances
|
|
elif self.double_two > 1:
|
|
logger.info(f'Passing {rem_chances} outs to double**')
|
|
self.double_two += rem_chances
|
|
elif self.double_cf > 1:
|
|
logger.info(f'Passing {rem_chances} outs to double(cf)')
|
|
self.double_cf += rem_chances
|
|
elif self.triple > 1:
|
|
logger.info(f'Passing {rem_chances} outs to triple')
|
|
self.triple += rem_chances
|
|
elif self.homerun > 1:
|
|
logger.info(f'Passing {rem_chances} outs to homerun')
|
|
self.homerun += rem_chances
|
|
else:
|
|
raise ValueError(f'Could not complete card')
|
|
|
|
|
|
def get_pitcher_ratings(df_data) -> List[dict]:
|
|
# Calculate OB values with min cap (ensure scalar values for comparison)
|
|
ob_vl = float(108 * (df_data['BB_vL'] + df_data['HBP_vL']) / df_data['TBF_vL'])
|
|
ob_vr = float(108 * (df_data['BB_vR'] + df_data['HBP_vR']) / df_data['TBF_vR'])
|
|
|
|
vl = PitchingCardRatingsModel(
|
|
pitchingcard_id=df_data.pitchingcard_id,
|
|
pit_hand=df_data.pitch_hand,
|
|
vs_hand='L',
|
|
all_hits=sanitize_chance_output((df_data['AVG_vL'] - 0.05) * 108), # Subtracting chances from BP results
|
|
all_other_ob=sanitize_chance_output(min(ob_vl, 0.8)),
|
|
hard_rate=df_data['Hard%_vL'],
|
|
med_rate=df_data['Med%_vL'],
|
|
soft_rate=df_data['Soft%_vL']
|
|
)
|
|
vr = PitchingCardRatingsModel(
|
|
pitchingcard_id=df_data.pitchingcard_id,
|
|
pit_hand=df_data.pitch_hand,
|
|
vs_hand='R',
|
|
all_hits=sanitize_chance_output((df_data['AVG_vR'] - 0.05) * 108), # Subtracting chances from BP results
|
|
all_other_ob=sanitize_chance_output(min(ob_vr, 0.8)),
|
|
hard_rate=df_data['Hard%_vR'],
|
|
med_rate=df_data['Med%_vR'],
|
|
soft_rate=df_data['Soft%_vR']
|
|
)
|
|
vl.all_outs = mround(108 - vl.all_hits - vl.all_other_ob, base=0.5)
|
|
vr.all_outs = mround(108 - vr.all_hits - vr.all_other_ob, base=0.5)
|
|
|
|
logger.info(
|
|
f'vL - All Hits: {vl.all_hits} / Other OB: {vl.all_other_ob} / All Outs: {vl.all_outs} '
|
|
f'/ Total: {vl.total_chances()}'
|
|
)
|
|
logger.info(
|
|
f'vR - All Hits: {vr.all_hits} / Other OB: {vr.all_other_ob} / All Outs: {vr.all_outs} '
|
|
f'/ Total: {vr.total_chances()}'
|
|
)
|
|
|
|
vl.calculate_singles(df_data['H_vL'], df_data['H_vL'] - df_data['2B_vL'] - df_data['3B_vL'] - df_data['HR_vL'])
|
|
vr.calculate_singles(df_data['H_vR'], df_data['H_vR'] - df_data['2B_vR'] - df_data['3B_vR'] - df_data['HR_vR'])
|
|
|
|
logger.info(f'vL: All Hits: {vl.all_hits} / BP Singles: {vl.bp_single} / Single 2: {vl.single_two} / '
|
|
f'Single 1: {vl.single_one} / Single CF: {vl.single_center}')
|
|
logger.info(f'vR: All Hits: {vr.all_hits} / BP Singles: {vr.bp_single} / Single 2: {vr.single_two} / '
|
|
f'Single 1: {vr.single_one} / Single CF: {vr.single_center}')
|
|
|
|
vl.calculate_xbh(df_data['2B_vL'], df_data['3B_vL'], df_data['HR_vL'], df_data['HR/FB_vL'])
|
|
vr.calculate_xbh(df_data['2B_vR'], df_data['3B_vR'], df_data['HR_vR'], df_data['HR/FB_vR'])
|
|
|
|
logger.debug(f'vL: All XBH: {vl.all_hits - vl.single_one - vl.single_two - vl.single_center - vl.bp_single} / '
|
|
f'Double**: {vl.double_two} / Double(cf): {vl.double_cf} / Triple: {vl.triple} / '
|
|
f'BP HR: {vl.bp_homerun} / ND HR: {vl.homerun}')
|
|
logger.debug(f'vR: All XBH: {vr.all_hits - vr.single_one - vr.single_two - vr.single_center - vr.bp_single} / '
|
|
f'Double**: {vr.double_two} / Double(cf): {vr.double_cf} / Triple: {vr.triple} / '
|
|
f'BP HR: {vr.bp_homerun} / ND HR: {vr.homerun}')
|
|
|
|
vl.calculate_other_ob(df_data['BB_vL'], df_data['HBP_vL'])
|
|
vr.calculate_other_ob(df_data['BB_vR'], df_data['HBP_vR'])
|
|
|
|
logger.info(f'vL: All other OB: {vl.all_other_ob} / HBP: {vl.hbp} / BB: {vl.walk} / '
|
|
f'Total Chances: {vl.total_chances()}')
|
|
logger.info(f'vR: All other OB: {vr.all_other_ob} / HBP: {vr.hbp} / BB: {vr.walk} / '
|
|
f'Total Chances: {vr.total_chances()}')
|
|
|
|
vl.calculate_strikouts(
|
|
df_data['SO_vL'], df_data['TBF_vL'] - df_data['BB_vL'] - df_data['IBB_vL'] - df_data['HBP_vL'], df_data['H_vL'])
|
|
vr.calculate_strikouts(
|
|
df_data['SO_vR'], df_data['TBF_vR'] - df_data['BB_vR'] - df_data['IBB_vR'] - df_data['HBP_vR'], df_data['H_vR'])
|
|
|
|
logger.info(f'vL: All Outs: {vl.all_outs} / Ks: {vl.strikeout} / Current Outs: {vl.total_outs()}')
|
|
logger.info(f'vR: All Outs: {vr.all_outs} / Ks: {vr.strikeout} / Current Outs: {vr.total_outs()}')
|
|
|
|
vl.calculate_other_outs(df_data['FB%_vL'], df_data['GB%_vL'], df_data['Oppo%_vL'])
|
|
vr.calculate_other_outs(df_data['FB%_vR'], df_data['GB%_vR'], df_data['Oppo%_vR'])
|
|
|
|
logger.info(f'vL: Total chances: {vl.total_chances()}')
|
|
logger.info(f'vR: Total chances: {vr.total_chances()}')
|
|
|
|
return [vl.custom_to_dict(), vr.custom_to_dict()]
|
|
|
|
|
|
def total_chances(chance_data):
|
|
sum_chances = 0
|
|
for key in chance_data:
|
|
if key not in ['id', 'player_id', 'cardset_id', 'vs_hand', 'is_prep']:
|
|
sum_chances += chance_data[key]
|
|
|
|
return sum_chances
|
|
|
|
|
|
def soft_rate(pct):
|
|
if pct > .2:
|
|
return 'high'
|
|
elif pct < .1:
|
|
return 'low'
|
|
else:
|
|
return 'avg'
|
|
|
|
|
|
def med_rate(pct):
|
|
if pct > .65:
|
|
return 'high'
|
|
elif pct < .4:
|
|
return 'low'
|
|
else:
|
|
return 'avg'
|
|
|
|
|
|
def hard_rate(pct):
|
|
if pct > .4:
|
|
return 'high'
|
|
elif pct < .2:
|
|
return 'low'
|
|
else:
|
|
return 'avg'
|
|
|
|
|
|
def hr_per_fb_rate(pct):
|
|
if pct > .18:
|
|
return 'high'
|
|
elif pct < .08:
|
|
return 'low'
|
|
else:
|
|
return 'avg'
|
|
|
|
|
|
def all_singles(row, hits_vl, hits_vr):
|
|
if int(row[7]) == 0:
|
|
tot_singles_vl = 0
|
|
else:
|
|
tot_singles_vl = hits_vl * ((int(row[7]) - int(row[8]) - int(row[9]) - int(row[12]))
|
|
/ int(row[7]))
|
|
if int(row[40]) == 0:
|
|
tot_singles_vr = 0
|
|
else:
|
|
tot_singles_vr = hits_vr * ((int(row[40]) - int(row[41]) - int(row[42]) - int(row[45]))
|
|
/ int(row[40]))
|
|
|
|
return mround(tot_singles_vl), mround(tot_singles_vr)
|
|
|
|
|
|
def bp_singles(singles_vl, singles_vr):
|
|
bpsi_vl = 5 if singles_vl >= 5 else 0
|
|
bpsi_vr = 5 if singles_vr >= 5 else 0
|
|
|
|
return mround(bpsi_vl), mround(bpsi_vr)
|
|
|
|
|
|
def wh_singles(rem_si_vl, rem_si_vr, hard_rate_vl, hard_rate_vr):
|
|
if hard_rate_vl == 'low':
|
|
whs_vl = 0
|
|
else:
|
|
whs_vl = rem_si_vl / 2
|
|
|
|
if hard_rate_vr == 'low':
|
|
whs_vr = 0
|
|
else:
|
|
whs_vr = rem_si_vr / 2
|
|
|
|
return mround(whs_vl), mround(whs_vr)
|
|
|
|
|
|
def one_singles(rem_si_vl, rem_si_vr, soft_rate_vl, soft_rate_vr):
|
|
if soft_rate_vl == 'high':
|
|
oss_vl = rem_si_vl
|
|
else:
|
|
oss_vl = 0
|
|
|
|
if soft_rate_vr == 'high':
|
|
oss_vr = rem_si_vr
|
|
else:
|
|
oss_vr = 0
|
|
|
|
return mround(oss_vl), mround(oss_vr)
|
|
|
|
|
|
def bp_homerun(hr_vl, hr_vr, hr_rate_vl, hr_rate_vr):
|
|
if hr_rate_vl == 'low':
|
|
bphr_vl = hr_vl
|
|
elif hr_rate_vl == 'avg':
|
|
bphr_vl = hr_vl * .75
|
|
else:
|
|
bphr_vl = hr_vl * .4
|
|
|
|
if hr_rate_vr == 'low':
|
|
bphr_vr = hr_vr
|
|
elif hr_rate_vr == 'avg':
|
|
bphr_vr = hr_vr * .75
|
|
else:
|
|
bphr_vr = hr_vr * .4
|
|
|
|
return mround(bphr_vl), mround(bphr_vr)
|
|
|
|
|
|
def triples(all_xbh_vl, all_xbh_vr, triple_rate_vl, triple_rate_vr):
|
|
tr_vl = all_xbh_vl * triple_rate_vl if all_xbh_vl > 0 else 0
|
|
tr_vr = all_xbh_vr * triple_rate_vr if all_xbh_vr > 0 else 0
|
|
|
|
return mround(tr_vl), mround(tr_vr)
|
|
|
|
|
|
def two_doubles(all_doubles_vl, all_doubles_vr, soft_rate_vl, soft_rate_vr):
|
|
two_doubles_vl = all_doubles_vl if soft_rate_vl == 'high' else 0
|
|
two_doubles_vr = all_doubles_vr if soft_rate_vr == 'high' else 0
|
|
|
|
return mround(two_doubles_vl), mround(two_doubles_vr)
|
|
|
|
|
|
def hbp_rate(hbp, bb):
|
|
if hbp == 0:
|
|
return 0
|
|
elif bb == 0:
|
|
return 1
|
|
else:
|
|
return hbp / bb
|
|
|
|
|
|
def hbps(all_ob, this_hbp_rate):
|
|
if all_ob == 0 or this_hbp_rate == 0:
|
|
return 0
|
|
else:
|
|
return mround(all_ob * this_hbp_rate)
|
|
|
|
|
|
def xchecks(pos, all_chances=True):
|
|
if pos.lower() == 'p':
|
|
return 1 if all_chances else 0
|
|
elif pos.lower() == 'c':
|
|
return 3 if all_chances else 2
|
|
elif pos.lower() == '1b':
|
|
return 2 if all_chances else 1
|
|
elif pos.lower() == '2b':
|
|
return 6 if all_chances else 5
|
|
elif pos.lower() == '3b':
|
|
return 3 if all_chances else 2
|
|
elif pos.lower() == 'ss':
|
|
return 7 if all_chances else 6
|
|
elif pos.lower() == 'lf':
|
|
return 2 if all_chances else 1
|
|
elif pos.lower() == 'cf':
|
|
return 3 if all_chances else 2
|
|
else:
|
|
return 2 if all_chances else 1
|
|
|
|
|
|
def oppo_fly(all_fly, oppo_rate):
|
|
if all_fly == 0 or oppo_rate == 0:
|
|
return 0
|
|
else:
|
|
return mround(all_fly * oppo_rate)
|
|
|
|
|
|
def groundball_a(all_gb, dp_rate):
|
|
if all_gb == 0 or dp_rate == 0:
|
|
return 0
|
|
elif dp_rate > .6:
|
|
return all_gb
|
|
else:
|
|
return mround(all_gb * (dp_rate * 1.5))
|
|
|
|
|
|
def balks(total_balks: int, innings: float, season_pct):
|
|
try:
|
|
total_balks = int(total_balks)
|
|
except ValueError:
|
|
logger.error(f'Could not read balks: {total_balks} / setting to 0')
|
|
total_balks = 0
|
|
|
|
try:
|
|
innings = float(innings)
|
|
except ValueError:
|
|
logger.error(f'Could not read innings: {innings} / setting to 0')
|
|
innings = 0
|
|
|
|
if innings == 0:
|
|
return 0
|
|
|
|
numerator = (total_balks * 290 * season_pct)
|
|
logger.info(f'total_balks: {total_balks} / season_pct {season_pct} / innings: {innings} / numerator: {numerator}')
|
|
|
|
return min(round(numerator / innings), 20)
|
|
|
|
|
|
def wild_pitches(total_wps: int, innings: float, season_pct):
|
|
if innings == 0:
|
|
return 0
|
|
# return min(round((int(total_wps) * 200 * season_pct) / float(innings)), 20)
|
|
return min(round((int(total_wps) * 200) / float(innings)), 20)
|
|
|
|
|
|
def closer_rating(gf: int, saves: int, games: int):
|
|
if gf == 0 or games == 0 or saves == 0:
|
|
return None
|
|
|
|
if gf / games >= .875:
|
|
return 6
|
|
elif gf / games >= .8:
|
|
return 5
|
|
elif gf / games >= .7:
|
|
return 4
|
|
elif gf / games >= .55:
|
|
return 3
|
|
elif gf / games >= .4:
|
|
return 2
|
|
elif gf / games >= .25:
|
|
return 1
|
|
elif gf / games >= .1:
|
|
return 0
|
|
else:
|
|
return None
|