All pitcher data is built, ready to post data

This commit is contained in:
Cal Corum 2024-10-27 23:41:44 -05:00
parent e396b50230
commit 93b8a230db
2 changed files with 210 additions and 108 deletions

View File

@ -4,7 +4,6 @@ import math
import pydantic
from creation_helpers import mround, sanitize_chance_output
from decimal import Decimal
from typing import List, Literal
@ -12,54 +11,54 @@ class PitchingCardRatingsModel(pydantic.BaseModel):
pitchingcard_id: int
pit_hand: Literal['R', 'L']
vs_hand: Literal['R', 'L']
all_hits: Decimal = Decimal(0.0)
all_other_ob: Decimal = Decimal(0.0)
all_outs: Decimal = Decimal(0.0)
rem_singles: Decimal = Decimal(0.0)
rem_xbh: Decimal = Decimal(0.0)
rem_hr: Decimal = Decimal(0.0)
rem_doubles: Decimal = Decimal(0.0)
hard_rate: Decimal
med_rate: Decimal
soft_rate: Decimal
# pull_rate: Decimal
# center_rate: Decimal
# slap_rate: Decimal
homerun: Decimal = Decimal(0.0)
bp_homerun: Decimal = Decimal(0.0)
triple: Decimal = Decimal(0.0)
double_three: Decimal = Decimal(0.0)
double_two: Decimal = Decimal(0.0)
double_cf: Decimal = Decimal(0.0)
single_two: Decimal = Decimal(0.0)
single_one: Decimal = Decimal(0.0)
single_center: Decimal = Decimal(0.0)
bp_single: Decimal = Decimal(0.0)
hbp: Decimal = Decimal(0.0)
walk: Decimal = Decimal(0.0)
strikeout: Decimal = Decimal(0.0)
rem_flyballs: Decimal = Decimal(0.0)
flyout_lf_b: Decimal = Decimal(0.0)
flyout_cf_b: Decimal = Decimal(0.0)
flyout_rf_b: Decimal = Decimal(0.0)
rem_groundballs: Decimal = Decimal(0.0)
groundout_a: Decimal = Decimal(0.0)
groundout_b: Decimal = Decimal(0.0)
xcheck_p: Decimal = Decimal(1.0)
xcheck_c: Decimal = Decimal(3.0)
xcheck_1b: Decimal = Decimal(2.0)
xcheck_2b: Decimal = Decimal(6.0)
xcheck_3b: Decimal = Decimal(3.0)
xcheck_ss: Decimal = Decimal(7.0)
xcheck_lf: Decimal = Decimal(2.0)
xcheck_cf: Decimal = Decimal(3.0)
xcheck_rf: Decimal = Decimal(2.0)
avg: Decimal = 0.0
obp: Decimal = 0.0
slg: Decimal = 0.0
all_hits: float = 0.0
all_other_ob: float = 0.0
all_outs: float = 0.0
rem_singles: float = 0.0
rem_xbh: float = 0.0
rem_hr: float = 0.0
rem_doubles: float = 0.0
hard_rate: float
med_rate: float
soft_rate: float
# pull_rate: float
# center_rate: float
# slap_rate: float
homerun: float = 0.0
bp_homerun: float = 0.0
triple: float = 0.0
double_three: float = 0.0
double_two: float = 0.0
double_cf: float = 0.0
single_two: float = 0.0
single_one: float = 0.0
single_center: float = 0.0
bp_single: float = 0.0
hbp: float = 0.0
walk: float = 0.0
strikeout: float = 0.0
rem_flyballs: float = 0.0
flyout_lf_b: float = 0.0
flyout_cf_b: float = 0.0
flyout_rf_b: float = 0.0
rem_groundballs: float = 0.0
groundout_a: float = 0.0
groundout_b: float = 0.0
xcheck_p: float = float(1.0)
xcheck_c: float = float(3.0)
xcheck_1b: float = float(2.0)
xcheck_2b: float = float(6.0)
xcheck_3b: float = float(3.0)
xcheck_ss: float = float(7.0)
xcheck_lf: float = float(2.0)
xcheck_cf: float = float(3.0)
xcheck_rf: float = float(2.0)
avg: float = 0.0
obp: float = 0.0
slg: float = 0.0
def total_chances(self):
return Decimal(sum([
return mround(sum([
self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk, self.strikeout,
self.flyout_lf_b, self.flyout_cf_b, self.flyout_rf_b, self.groundout_a, self.groundout_b, self.xcheck_p,
@ -68,19 +67,19 @@ class PitchingCardRatingsModel(pydantic.BaseModel):
]))
def total_hits(self):
return Decimal(sum([
return mround(sum([
self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
self.single_two, self.single_one, self.single_center, self.bp_single
]))
def total_ob(self):
return Decimal(sum([
return mround(sum([
self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk
]))
def total_outs(self):
return Decimal(sum([
return mround(sum([
self.strikeout, self.flyout_lf_b, self.flyout_cf_b, self.flyout_rf_b, self.groundout_a, self.groundout_b,
self.xcheck_p, self.xcheck_c, self.xcheck_1b, self.xcheck_2b, self.xcheck_3b, self.xcheck_ss,
self.xcheck_lf, self.xcheck_cf, self.xcheck_rf
@ -98,50 +97,53 @@ class PitchingCardRatingsModel(pydantic.BaseModel):
return {
'pitchingcard_id': self.pitchingcard_id,
'vs_hand': self.vs_hand,
'homerun': float(self.homerun),
'bp_homerun': float(self.bp_homerun),
'triple': float(self.triple),
'double_three': float(self.double_three),
'double_two': float(self.double_two),
'double_cf': float(self.double_cf),
'single_two': float(self.single_two),
'single_one': float(self.single_one),
'single_center': float(self.single_center),
'bp_single': float(self.bp_single),
'hbp': float(self.hbp),
'walk': float(self.walk),
'strikeout': float(self.strikeout),
'flyout_lf_b': float(self.flyout_lf_b),
'flyout_cf_b': float(self.flyout_cf_b),
'flyout_rf_b': float(self.flyout_rf_b),
'groundout_a': float(self.groundout_a),
'groundout_b': float(self.groundout_b),
'xcheck_p': float(self.xcheck_p),
'xcheck_c': float(self.xcheck_c),
'xcheck_1b': float(self.xcheck_1b),
'xcheck_2b': float(self.xcheck_2b),
'xcheck_3b': float(self.xcheck_3b),
'xcheck_ss': float(self.xcheck_ss),
'xcheck_lf': float(self.xcheck_lf),
'xcheck_cf': float(self.xcheck_cf),
'xcheck_rf': float(self.xcheck_rf)
'homerun': self.homerun,
'bp_homerun': self.bp_homerun,
'triple': self.triple,
'double_three': self.double_three,
'double_two': self.double_two,
'double_cf': self.double_cf,
'single_two': self.single_two,
'single_one': self.single_one,
'single_center': self.single_center,
'bp_single': self.bp_single,
'hbp': self.hbp,
'walk': self.walk,
'strikeout': self.strikeout,
'flyout_lf_b': self.flyout_lf_b,
'flyout_cf_b': self.flyout_cf_b,
'flyout_rf_b': self.flyout_rf_b,
'groundout_a': self.groundout_a,
'groundout_b': self.groundout_b,
'xcheck_p': self.xcheck_p,
'xcheck_c': self.xcheck_c,
'xcheck_1b': self.xcheck_1b,
'xcheck_2b': self.xcheck_2b,
'xcheck_3b': self.xcheck_3b,
'xcheck_ss': self.xcheck_ss,
'xcheck_lf': self.xcheck_lf,
'xcheck_cf': self.xcheck_cf,
'xcheck_rf': self.xcheck_rf,
'avg': self.avg,
'obp': self.obp,
'slg': self.slg
}
def calculate_singles(self, szn_hits, szn_singles):
if szn_hits == 0:
return
tot = sanitize_chance_output(self.all_hits * Decimal(szn_singles / szn_hits))
tot = sanitize_chance_output(self.all_hits * (szn_singles / szn_hits))
logging.debug(f'total singles: {tot}')
self.rem_singles = tot
self.bp_single = Decimal(5) if self.rem_singles >= 5 else Decimal(0)
self.bp_single = 5.0 if self.rem_singles >= 5 else 0.0
self.rem_singles -= self.bp_single
self.single_two = sanitize_chance_output(self.rem_singles / 2) if self.hard_rate >= 0.2 else Decimal(0)
self.single_two = sanitize_chance_output(self.rem_singles / 2) if self.hard_rate >= 0.2 else 0.0
self.rem_singles -= self.single_two
self.single_one = sanitize_chance_output(self.rem_singles) if self.soft_rate >= .2 else Decimal(0)
self.single_one = sanitize_chance_output(self.rem_singles) if self.soft_rate >= .2 else 0.0
self.rem_singles -= self.single_one
self.single_center = sanitize_chance_output(self.rem_singles)
@ -155,19 +157,19 @@ class PitchingCardRatingsModel(pydantic.BaseModel):
if szn_xbh == 0:
return
hr_rate = Decimal(szn_homeruns / szn_xbh)
tr_rate = Decimal(szn_triples / szn_xbh)
do_rate = Decimal(szn_doubles / szn_xbh)
hr_rate = mround(szn_homeruns / szn_xbh)
tr_rate = mround(szn_triples / szn_xbh)
do_rate = mround(szn_doubles / szn_xbh)
logging.info(f'hr%: {hr_rate:.2f} / tr%: {tr_rate:.2f} / do%: {do_rate:.2f}')
raw_do_chances = sanitize_chance_output(Decimal(self.rem_xbh * do_rate))
raw_do_chances = sanitize_chance_output(self.rem_xbh * do_rate)
logging.info(f'raw do chances: {raw_do_chances}')
self.double_two = raw_do_chances if self.soft_rate > .2 else Decimal(0)
self.double_two = raw_do_chances if self.soft_rate > .2 else 0.0
self.double_cf = raw_do_chances - self.double_two
self.rem_xbh -= (self.double_two + self.double_cf + self.double_three)
logging.info(f'Double**: {self.double_two} / Double(cf): {self.double_cf} / rem xbh: {self.rem_xbh}')
self.triple = sanitize_chance_output(Decimal(self.rem_xbh * tr_rate))
self.triple = sanitize_chance_output(self.rem_xbh * tr_rate)
self.rem_xbh -= self.triple
logging.info(f'Triple: {self.triple} / rem xbh: {self.rem_xbh}')
@ -179,10 +181,10 @@ class PitchingCardRatingsModel(pydantic.BaseModel):
elif hr_per_fb_rate > .28:
self.homerun = raw_hr_chances
elif hr_per_fb_rate > .18:
self.bp_homerun = sanitize_chance_output(raw_hr_chances * Decimal(0.4), rounding=1.0)
self.bp_homerun = sanitize_chance_output(raw_hr_chances * 0.4, rounding=1.0)
self.homerun = self.rem_xbh - self.bp_homerun
else:
self.bp_homerun = sanitize_chance_output(raw_hr_chances * Decimal(.75), rounding=1.0)
self.bp_homerun = sanitize_chance_output(raw_hr_chances * .75, rounding=1.0)
self.homerun = self.rem_xbh - self.bp_homerun
logging.info(f'BP HR: {self.bp_homerun} / ND HR: {self.homerun}')
@ -193,27 +195,27 @@ class PitchingCardRatingsModel(pydantic.BaseModel):
if self.triple > 1:
logging.info(f'Passing {self.rem_xbh} xbh to triple')
self.triple += self.rem_xbh
self.rem_xbh = Decimal(0)
self.rem_xbh = 0.0
elif self.double_cf > 1:
logging.info(f'Passing {self.rem_xbh} xbh to double(cf)')
self.double_cf += self.rem_xbh
self.rem_xbh = Decimal(0)
self.rem_xbh = 0.0
elif self.double_two > 1:
logging.info(f'Passing {self.rem_xbh} xbh to double**')
self.double_two += self.rem_xbh
self.rem_xbh = Decimal(0)
self.rem_xbh = 0.0
elif self.single_two > 1:
logging.info(f'Passing {self.rem_xbh} xbh to single**')
self.single_two += self.rem_xbh
self.rem_xbh = Decimal(0)
self.rem_xbh = 0.0
elif self.single_center > 1:
logging.info(f'Passing {self.rem_xbh} xbh to single(cf)')
self.single_center += self.rem_xbh
self.rem_xbh = Decimal(0)
self.rem_xbh = 0.0
elif self.single_one > 1:
logging.info(f'Passing {self.rem_xbh} xbh to single*')
self.single_one += self.rem_xbh
self.rem_xbh = Decimal(0)
self.rem_xbh = 0.0
else:
logging.info(f'Passing {self.rem_xbh} xbh to other_ob')
self.all_other_ob += self.rem_xbh
@ -222,23 +224,23 @@ class PitchingCardRatingsModel(pydantic.BaseModel):
if szn_walks + szn_hbp == 0:
return
self.hbp = sanitize_chance_output(self.all_other_ob * Decimal(szn_hbp / (szn_walks + szn_hbp)), rounding=1.0)
self.hbp = sanitize_chance_output(self.all_other_ob * szn_hbp / (szn_walks + szn_hbp), rounding=1.0)
self.walk = self.all_other_ob - self.hbp
def calculate_strikouts(self, szn_strikeouts, szn_ab, szn_hits):
raw_so = sanitize_chance_output(self.all_outs * Decimal((szn_strikeouts * 1.2) / (szn_ab - szn_hits)))
raw_so = sanitize_chance_output(self.all_outs * (szn_strikeouts * 1.2) / (szn_ab - szn_hits))
sum_bb_so = self.walk + raw_so
excess = sum_bb_so - Decimal(math.floor(sum_bb_so))
self.strikeout = raw_so - excess - Decimal(.05)
excess = sum_bb_so - mround(math.floor(sum_bb_so))
self.strikeout = raw_so - excess - .05
def calculate_other_outs(self, fb_pct, gb_pct, oppo_pct):
rem_outs = Decimal(108) - self.total_chances()
rem_outs = 108 - self.total_chances()
all_fo = sanitize_chance_output(rem_outs * Decimal(fb_pct))
all_fo = sanitize_chance_output(rem_outs * fb_pct)
if self.pit_hand == 'L':
self.flyout_lf_b = sanitize_chance_output(all_fo * Decimal(oppo_pct))
self.flyout_lf_b = sanitize_chance_output(all_fo * oppo_pct)
else:
self.flyout_rf_b = sanitize_chance_output(all_fo * Decimal(oppo_pct))
self.flyout_rf_b = sanitize_chance_output(all_fo * oppo_pct)
self.flyout_cf_b = all_fo - self.flyout_lf_b - self.flyout_rf_b
rem_outs -= (self.flyout_lf_b + self.flyout_cf_b + self.flyout_rf_b)
@ -246,7 +248,7 @@ class PitchingCardRatingsModel(pydantic.BaseModel):
self.groundout_a = sanitize_chance_output(all_gb * self.soft_rate)
self.groundout_b = sanitize_chance_output(all_gb - self.groundout_a)
rem_chances = Decimal(108) - self.total_chances()
rem_chances = 108 - self.total_chances()
logging.info(f'Remaining outs: {rem_chances}')
if self.strikeout > 1:
@ -302,7 +304,7 @@ def get_pitcher_ratings(df_data) -> List[dict]:
)
vr = PitchingCardRatingsModel(
pitchingcard_id=df_data.pitchingcard_id,
pit_hand=df_data.hand,
pit_hand=df_data.pitch_hand,
vs_hand='R',
all_hits=sanitize_chance_output((df_data['AVG_vR'] - 0.05) * 108), # Subtracting chances from BP results
all_other_ob=sanitize_chance_output(108 * (df_data['BB_vR'] + df_data['HBP_vR']) / df_data['TBF_vR']),
@ -310,8 +312,8 @@ def get_pitcher_ratings(df_data) -> List[dict]:
med_rate=df_data['Med%_vR'],
soft_rate=df_data['Soft%_vR']
)
vl.all_outs = Decimal(108 - vl.all_hits - vl.all_other_ob).quantize(Decimal("0.05"))
vr.all_outs = Decimal(108 - vr.all_hits - vr.all_other_ob).quantize(Decimal("0.05"))
vl.all_outs = mround(108 - vl.all_hits - vl.all_other_ob, base=0.5)
vr.all_outs = mround(108 - vr.all_hits - vr.all_other_ob, base=0.5)
logging.info(
f'vL - All Hits: {vl.all_hits} / Other OB: {vl.all_other_ob} / All Outs: {vl.all_outs} '

View File

@ -449,12 +449,20 @@ def get_pitching_stats_by_date(retro_file_path, start_date: int, end_date: int)
def get_gb_vr(row):
return all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batted_ball_type == 'G') & (all_plays.batter_hand == 'r')].count()['event_type'].astype(int)
def get_ld_vl(row):
return all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batted_ball_type == 'l') & (all_plays.pitcher_hand == 'l')].count()['event_type'].astype(int)
def get_ld_vr(row):
return all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batted_ball_type == 'l') & (all_plays.pitcher_hand == 'r')].count()['event_type'].astype(int)
pitching_stats['FB_vL'] = pitching_stats.apply(get_fb_vl, axis=1)
pitching_stats['FB_vR'] = pitching_stats.apply(get_fb_vr, axis=1)
pitching_stats['GB_vL'] = pitching_stats.apply(get_gb_vl, axis=1)
pitching_stats['GB_vR'] = pitching_stats.apply(get_gb_vr, axis=1)
pitching_stats['LD_vL'] = pitching_stats.apply(get_ld_vl, axis=1)
pitching_stats['LD_vR'] = pitching_stats.apply(get_ld_vr, axis=1)
pitching_stats['H_vL'] = pitching_stats['1B_vL'] + pitching_stats['2B_vL'] + pitching_stats['3B_vL'] + pitching_stats['HR_vL']
pitching_stats['H_vR'] = pitching_stats['1B_vR'] + pitching_stats['2B_vR'] + pitching_stats['3B_vR'] + pitching_stats['HR_vR']
@ -486,6 +494,12 @@ def get_pitching_stats_by_date(retro_file_path, start_date: int, end_date: int)
pitching_stats['Soft%_vL'] = round(1 - pitching_stats['Hard%_vL'] - pitching_stats['Med%_vL'], 5)
pitching_stats['Soft%_vR'] = round(1 - pitching_stats['Hard%_vR'] - pitching_stats['Med%_vR'], 5)
pitching_stats['FB%_vL'] = round(pitching_stats['FB_vL'] / (pitching_stats['FB_vL'] + pitching_stats['GB_vL'] + pitching_stats['LD_vL']), 5)
pitching_stats['FB%_vR'] = round(pitching_stats['FB_vR'] / (pitching_stats['FB_vR'] + pitching_stats['GB_vR'] + pitching_stats['LD_vR']), 5)
pitching_stats['GB%_vL'] = round(pitching_stats['GB_vL'] / (pitching_stats['FB_vL'] + pitching_stats['GB_vL'] + pitching_stats['LD_vL']), 5)
pitching_stats['GB%_vR'] = round(pitching_stats['GB_vR'] / (pitching_stats['FB_vR'] + pitching_stats['GB_vR'] + pitching_stats['LD_vR']), 5)
def get_oppo_vl(row):
count = all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batter_hand == 'l') & (all_plays.hit_location.str.contains('5|7'))].count()['event_type'].astype(int)
denom = all_plays[(all_plays.pitcher_id == row['key_retro']) & (all_plays.batter_hand == 'l') & (all_plays.batter_event == 't')].count()['event_type'].astype(int)
@ -611,6 +625,81 @@ def calc_batter_ratings(bs: pd.DataFrame) -> pd.DataFrame:
return all_ratings
def calc_pitcher_ratings(ps: pd.DataFrame) -> pd.DataFrame:
def create_pitching_rating(row):
row['pitchingcard_id'] = row['key_fangraphs']
row['pitch_hand'] = row['pitch_hand'].upper()
ratings = cpi.get_pitcher_ratings(row)
ops_vl = ratings[0]['obp'] + ratings[0]['slg']
ops_vr = ratings[1]['obp'] + ratings[1]['slg']
total_ops = (ops_vl + ops_vr + min(ops_vr, ops_vl)) / 3
def calc_cost(total_ops, base_cost, base_ops, max_delta) -> int:
delta = ((base_ops - total_ops) / 0.1) * 2
if delta < -0.9:
delta = -0.95
final_cost = base_cost + (max_delta * delta)
return round(final_cost)
if row['starter_rating'] > 3:
if total_ops <= 0.4:
rarity_id = 99
cost = calc_cost(total_ops, 2400, 0.38, 810)
elif total_ops <= 0.475:
rarity_id = 1
cost = calc_cost(total_ops, 810, 0.44, 270)
elif total_ops <= 0.53:
rarity_id = 2
cost = calc_cost(total_ops, 270, 0.51, 90)
elif total_ops <= 0.6:
rarity_id = 3
cost = calc_cost(total_ops, 90, 0.575, 30)
elif total_ops <= 0.675:
rarity_id = 4
cost = calc_cost(total_ops, 30, 0.64, 10)
else:
rarity_id = 5
cost = calc_cost(total_ops, 10, 0.7, 8)
else:
if total_ops <= 0.325:
rarity_id = 99
cost = calc_cost(total_ops, 2400, 0.38, 810)
elif total_ops <= 0.4:
rarity_id = 1
cost = calc_cost(total_ops, 810, 0.44, 270)
elif total_ops <= 0.475:
rarity_id = 2
cost = calc_cost(total_ops, 270, 0.51, 90)
elif total_ops <= 0.55:
rarity_id = 3
cost = calc_cost(total_ops, 90, 0.575, 30)
elif total_ops <= 0.625:
rarity_id = 4
cost = calc_cost(total_ops, 30, 0.64, 10)
else:
rarity_id = 5
cost = calc_cost(total_ops, 10, 0.7, 8)
x = pd.DataFrame({
'key_bbref': [row['key_bbref']],
'ratings_vL': [ratings[0]],
'ratings_vR': [ratings[1]],
'ops_vL': ops_vl,
'ops_vR': ops_vr,
'total_ops': total_ops,
'rarity_id': rarity_id,
'cost': cost
})
return x.loc[0]
all_ratings = ps.apply(create_pitching_rating, axis=1)
all_ratings = all_ratings.set_index('key_bbref')
return all_ratings
def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
df_c = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_c.csv').set_index('key_bbref')
df_1b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_1b.csv').set_index('key_bbref')
@ -1121,10 +1210,21 @@ async def run_pitchers(data_input_path: str, start_date: int, end_date: int, pos
# Calculate pitching cards
start_time = datetime.datetime.now()
all_pitching_cards = calc_pitching_cards(pitching_stats, season_pct)
pitching_stats = pd.merge(
left=pitching_stats,
right=all_pitching_cards,
how='left',
left_on='key_bbref',
right_on='key_bbref'
)
end_time = datetime.datetime.now()
print(f'Pit cards stats: {(end_time - start_time).total_seconds():.2f}s')
print(f'Pit cards: {(end_time - start_time).total_seconds():.2f}s')
# Calculate pitching card ratings
start_time = datetime.datetime.now()
all_pitching_ratings = calc_pitcher_ratings(pitching_stats)
end_time = datetime.datetime.now()
print(f'Pit ratings: {(end_time - start_time).total_seconds():.2f}s')
# Post all data
if post_data: