Beginning work on pitching rating calcs

This commit is contained in:
Cal Corum 2023-09-29 16:24:43 -05:00
parent f8f3dd9ec1
commit b3c98e3c6e
5 changed files with 299 additions and 67 deletions

View File

@ -1,11 +1,9 @@
import decimal
import logging
import math
import random
import pydantic
from creation_helpers import mround
from creation_helpers import mround, sanitize_chance_output
from typing import List, Literal
from decimal import Decimal
@ -212,14 +210,6 @@ class BattingCardRatingsModel(pydantic.BaseModel):
# return mround(sum_chances)
def sanitize_chance_output(total_chances, min_chances=1.0, rounding=0.05):
# r_val = mround(total_chances) if total_chances >= min_chances else 0
r_val = Decimal(total_chances) if total_chances >= min_chances else Decimal(0)
logging.debug(f'r_val: {r_val}')
return Decimal(float(round(r_val / Decimal(rounding)) * Decimal(rounding))).quantize(Decimal("0.05"))
# return r_val.quantize(Decimal(rounding))
def total_singles(all_hits, szn_singles, szn_hits):
return sanitize_chance_output(all_hits * ((szn_singles * .8) / szn_hits))
@ -271,7 +261,7 @@ def bp_homeruns(all_hr, hr_rate):
elif hr_rate > .2:
return sanitize_chance_output(all_hr * Decimal(.4), rounding=1.0)
else:
return sanitize_chance_output(all_hr * Decimal(.75), rounding=1.0)
return sanitize_chance_output(all_hr * Decimal(.8), rounding=1.0)
def triples(all_xbh, tr_count, do_count):
@ -499,13 +489,14 @@ def hit_and_run(ab_vl: int, ab_vr: int, hits_vl: int, hits_vr: int, hr_vl: int,
def get_batter_ratings(df_data) -> List[dict]:
# Consider a sliding offense_mod based on OPS; floor of 1x and ceiling of 1.5x ?
offense_mod = 1.2
offense_mod = 1.25
vl = BattingCardRatingsModel(
battingcard_id=df_data.battingcard_id,
bat_hand=df_data['bat_hand'],
vs_hand='L',
all_hits=mround(108 * offense_mod * df_data['AVG_vL']),
all_other_ob=mround(108 * offense_mod * ((df_data['BB_vL'] + df_data['HBP_vL']) / df_data['PA_vL'])),
all_hits=sanitize_chance_output(108 * offense_mod * df_data['AVG_vL']),
all_other_ob=sanitize_chance_output(108 * offense_mod *
((df_data['BB_vL'] + df_data['HBP_vL']) / df_data['PA_vL'])),
hard_rate=df_data['Hard%_vL'],
med_rate=df_data['Med%_vL'],
soft_rate=df_data['Soft%_vL'],
@ -517,8 +508,9 @@ def get_batter_ratings(df_data) -> List[dict]:
battingcard_id=df_data.battingcard_id,
bat_hand=df_data['bat_hand'],
vs_hand='R',
all_hits=mround(108 * offense_mod * df_data['AVG_vR']),
all_other_ob=mround(108 * offense_mod * ((df_data['BB_vR'] + df_data['HBP_vR']) / df_data['PA_vR'])),
all_hits=sanitize_chance_output(108 * offense_mod * df_data['AVG_vR']),
all_other_ob=sanitize_chance_output(108 * offense_mod *
((df_data['BB_vR'] + df_data['HBP_vR']) / df_data['PA_vR'])),
hard_rate=df_data['Hard%_vR'],
med_rate=df_data['Med%_vR'],
soft_rate=df_data['Soft%_vR'],

View File

@ -9,11 +9,11 @@ from typing import Literal
def range_pitcher(rs_value: int, season_pct: float):
if rs_value >= (3 * season_pct):
return 1
elif rs_value >= (0 * season_pct):
elif rs_value >= (1 * season_pct):
return 2
elif rs_value >= (-1 * season_pct):
elif rs_value >= (0 * season_pct):
return 3
elif rs_value >= (-100 * season_pct):
elif rs_value >= (-2 * season_pct):
return 4
else:
return 5
@ -336,7 +336,8 @@ def hold_pitcher(raw_cs: str, picks: int, season_pct: float) -> str:
hold_num = 9
final_hold = min(pick_cap, hold_num)
return f'{"+" if final_hold >= 0 else ""}{final_hold}'
return final_hold
# return f'{"+" if final_hold >= 0 else ""}{final_hold}'
def pow_ratings(innings: float, gs: int, games: int) -> (int, int):
@ -403,5 +404,8 @@ def get_bbref_fielding_df(
if len(headers) == 0:
headers.extend(col_names)
pos_frame = pd.DataFrame(data, index=indeces, columns=headers).query('key_bbref == key_bbref')
if position == 'p':
return pos_frame.drop_duplicates(subset=['key_bbref'], keep='first')
tmp = pos_frame[~pos_frame['chances'].isin(['0', '1', '2'])]
return tmp.drop_duplicates(subset=['key_bbref'], keep='first')

View File

@ -1,4 +1,189 @@
from creation_helpers import mround
import logging
import pydantic
from creation_helpers import mround, sanitize_chance_output
from decimal import Decimal
from typing import List, Literal
def get_pitcher_ratings(df_data) -> List[dict]:
vl = PitchingCardRatingsModel(
pitchingcard_id=df_data.pitchingcard_id,
pit_hand=df_data.hand,
vs_hand='L',
all_hits=sanitize_chance_output((df_data['AVG_vL'] - 0.05) * 108), # Subtracting chances from BP results
all_other_ob=sanitize_chance_output(108 * (df_data['BB_vL'] + df_data['HBP_vL']) / df_data['TBF_vL']),
hard_rate=df_data['Hard%_vL'],
med_rate=df_data['Med%_vL'],
soft_rate=df_data['Soft%_vL']
)
vr = PitchingCardRatingsModel(
pitchingcard_id=df_data.pitchingcard_id,
pit_hand=df_data.hand,
vs_hand='L',
all_hits=sanitize_chance_output((df_data['AVG_vR'] - 0.05) * 108), # Subtracting chances from BP results
all_other_ob=sanitize_chance_output(108 * (df_data['BB_vR'] + df_data['HBP_vR']) / df_data['TBF_vR']),
hard_rate=df_data['Hard%_vR'],
med_rate=df_data['Med%_vR'],
soft_rate=df_data['Soft%_vR']
)
vl.all_outs = Decimal(108 - vl.all_hits - vl.all_other_ob).quantize(Decimal("0.05"))
vr.all_outs = Decimal(108 - vr.all_hits - vr.all_other_ob).quantize(Decimal("0.05"))
logging.info(
f'vL - All Hits: {vl.all_hits} / Other OB: {vl.all_other_ob} / All Outs: {vl.all_outs} '
f'/ Total: {vl.total_chances()}'
)
logging.info(
f'vR - All Hits: {vr.all_hits} / Other OB: {vr.all_other_ob} / All Outs: {vr.all_outs} '
f'/ Total: {vr.total_chances()}'
)
vl.calculate_singles(df_data['H_vL'], df_data['H_vL'] - df_data['2B_vL'] - df_data['3B_vL'] - df_data['HR_vL'])
vr.calculate_singles(df_data['H_vR'], df_data['H_vR'] - df_data['2B_vR'] - df_data['3B_vR'] - df_data['HR_vR'])
logging.info(f'vL: All Hits: {vl.all_hits} / BP Singles: {vl.bp_single} / Single 2: {vl.single_two} / '
f'Single 1: {vl.single_one} / Single CF: {vl.single_center}')
logging.info(f'vR: All Hits: {vr.all_hits} / BP Singles: {vr.bp_single} / Single 2: {vr.single_two} / '
f'Single 1: {vr.single_one} / Single CF: {vr.single_center}')
return [vl.custom_to_dict(), vr.custom_to_dict()]
class PitchingCardRatingsModel(pydantic.BaseModel):
pitchingcard_id: int
pit_hand: Literal['R', 'L']
vs_hand: Literal['R', 'L']
all_hits: Decimal = Decimal(0.0)
all_other_ob: Decimal = Decimal(0.0)
all_outs: Decimal = Decimal(0.0)
rem_singles: Decimal = Decimal(0.0)
rem_xbh: Decimal = Decimal(0.0)
rem_hr: Decimal = Decimal(0.0)
rem_doubles: Decimal = Decimal(0.0)
hard_rate: Decimal
med_rate: Decimal
soft_rate: Decimal
# pull_rate: Decimal
# center_rate: Decimal
# slap_rate: Decimal
homerun: Decimal = Decimal(0.0)
bp_homerun: Decimal = Decimal(0.0)
triple: Decimal = Decimal(0.0)
double_three: Decimal = Decimal(0.0)
double_two: Decimal = Decimal(0.0)
double_cf: Decimal = Decimal(0.0)
single_two: Decimal = Decimal(0.0)
single_one: Decimal = Decimal(0.0)
single_center: Decimal = Decimal(0.0)
bp_single: Decimal = Decimal(0.0)
hbp: Decimal = Decimal(0.0)
walk: Decimal = Decimal(0.0)
strikeout: Decimal = Decimal(0.0)
rem_flyballs: Decimal = Decimal(0.0)
flyout_lf_b: Decimal = Decimal(0.0)
flyout_cf_b: Decimal = Decimal(0.0)
flyout_rf_b: Decimal = Decimal(0.0)
rem_groundballs: Decimal = Decimal(0.0)
groundout_a: Decimal = Decimal(0.0)
groundout_b: Decimal = Decimal(0.0)
xcheck_p: Decimal = Decimal(1.0)
xcheck_c: Decimal = Decimal(3.0)
xcheck_1b: Decimal = Decimal(2.0)
xcheck_2b: Decimal = Decimal(6.0)
xcheck_3b: Decimal = Decimal(3.0)
xcheck_ss: Decimal = Decimal(7.0)
xcheck_lf: Decimal = Decimal(2.0)
xcheck_cf: Decimal = Decimal(3.0)
xcheck_rf: Decimal = Decimal(2.0)
avg: Decimal = 0.0
obp: Decimal = 0.0
slg: Decimal = 0.0
def total_chances(self):
return Decimal(sum([
self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk, self.strikeout,
self.flyout_lf_b, self.flyout_cf_b, self.flyout_rf_b, self.groundout_a, self.groundout_b, self.xcheck_p,
self.xcheck_c, self.xcheck_1b, self.xcheck_2b, self.xcheck_3b, self.xcheck_ss, self.xcheck_lf,
self.xcheck_cf, self.xcheck_rf
]))
def total_hits(self):
return Decimal(sum([
self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
self.single_two, self.single_one, self.single_center, self.bp_single
]))
def total_ob(self):
return Decimal(sum([
self.homerun, self.bp_homerun, self.triple, self.double_three, self.double_two, self.double_cf,
self.single_two, self.single_one, self.single_center, self.bp_single, self.hbp, self.walk
]))
def total_outs(self):
return Decimal(sum([
self.strikeout, self.flyout_lf_b, self.flyout_cf_b, self.flyout_rf_b, self.groundout_a, self.groundout_b,
self.xcheck_p, self.xcheck_c, self.xcheck_1b, self.xcheck_2b, self.xcheck_3b, self.xcheck_ss,
self.xcheck_lf, self.xcheck_cf, self.xcheck_rf
]))
def custom_to_dict(self):
return {
'pitchingcard_id': self.pitchingcard_id,
'vs_hand': self.vs_hand,
'homerun': float(self.homerun),
'bp_homerun': float(self.bp_homerun),
'triple': float(self.triple),
'double_three': float(self.double_three),
'double_two': float(self.double_two),
'double_cf': float(self.double_cf),
'single_two': float(self.single_two),
'single_one': float(self.single_one),
'single_center': float(self.single_center),
'bp_single': float(self.bp_single),
'hbp': float(self.hbp),
'walk': float(self.walk),
'strikeout': float(self.strikeout),
'flyout_lf_b': float(self.flyout_lf_b),
'flyout_cf_b': float(self.flyout_cf_b),
'flyout_rf_b': float(self.flyout_rf_b),
'groundout_a': float(self.groundout_a),
'groundout_b': float(self.groundout_b),
'xcheck_p': float(self.xcheck_p),
'xcheck_c': float(self.xcheck_c),
'xcheck_1b': float(self.xcheck_1b),
'xcheck_2b': float(self.xcheck_2b),
'xcheck_3b': float(self.xcheck_3b),
'xcheck_ss': float(self.xcheck_ss),
'xcheck_lf': float(self.xcheck_lf),
'xcheck_cf': float(self.xcheck_cf),
'xcheck_rf': float(self.xcheck_rf)
}
def calculate_singles(self, szn_hits, szn_singles):
if szn_hits == 0:
return
tot = sanitize_chance_output(self.all_hits * Decimal(szn_singles / szn_hits))
logging.debug(f'total singles: {tot}')
self.rem_singles = tot
self.bp_single = Decimal(5) if self.rem_singles >= 5 else Decimal(0)
self.rem_singles -= self.bp_single
self.single_two = sanitize_chance_output(self.rem_singles / 2) if self.hard_rate >= 0.2 else Decimal(0)
self.rem_singles -= self.single_two
self.single_one = sanitize_chance_output(self.rem_singles) if self.soft_rate >= .2 else Decimal(0)
self.rem_singles -= self.single_one
self.single_center = sanitize_chance_output(self.rem_singles)
self.rem_singles -= self.single_center
self.rem_xbh = self.all_hits - self.single_center - self.single_one - self.single_two - self.bp_single
logging.info(f'remaining singles: {self.rem_singles}')
def total_chances(chance_data):
@ -193,23 +378,23 @@ def wild_pitches(total_wps: int, innings: float, season_pct):
return min(round((total_wps * 200 * season_pct) / innings), 20)
def closer_rating(gf: int, saves: int, games: int) -> str:
def closer_rating(gf: int, saves: int, games: int):
if gf == 0 or games == 0 or saves == 0:
return 'N'
return None
if gf / games >= .875:
return '6'
return 6
elif gf / games >= .8:
return '5'
return 5
elif gf / games >= .7:
return '4'
return 4
elif gf / games >= .55:
return '3'
return 3
elif gf / games >= .4:
return '2'
return 2
elif gf / games >= .25:
return '1'
return 1
elif gf / games >= .1:
return '0'
return 0
else:
return 'N'
return None

View File

@ -1,6 +1,8 @@
import csv
import datetime
import logging
from decimal import Decimal
import pandas as pd
import random
import requests
@ -856,3 +858,10 @@ def defense_rg(all_pos: list) -> list:
return rg_data
def sanitize_chance_output(total_chances, min_chances=1.0, rounding=0.05):
# r_val = mround(total_chances) if total_chances >= min_chances else 0
r_val = Decimal(total_chances) if total_chances >= min_chances else Decimal(0)
logging.debug(f'r_val: {r_val}')
return Decimal(float(round(r_val / Decimal(rounding)) * Decimal(rounding))).quantize(Decimal("0.05"))
# return r_val.quantize(Decimal(rounding))

View File

@ -16,7 +16,7 @@ import pydantic
import sys
from creation_helpers import pd_players_df, get_batting_stats, pd_battingcards_df, pd_battingcardratings_df, \
get_pitching_stats, get_pitching_peripherals
get_pitching_stats, get_pitching_peripherals, pd_pitchingcards_df
from db_calls import db_get, db_put, db_post, db_patch
from typing import Literal
from bs4 import BeautifulSoup
@ -168,7 +168,7 @@ async def main(args):
'mlbclub': 'None',
'franchise': 'None',
'cardset_id': cardset['id'],
'set_num': df_data['key_fangraphs'],
'set_num': int(float(df_data['key_fangraphs'])),
'rarity_id': 99,
'pos_1': 'DH',
'description': f'{player_desc_prefix} {f_name} {l_name}',
@ -197,9 +197,9 @@ async def main(args):
del final_batting, run_data
print(f'Stats are tallied\n{len(offense_stats.values)} players remain\n\nCollecting defensive data from bbref...')
print(f'Pulling pitcher defense...')
df_p = cde.get_bbref_fielding_df('p', season)
if 'pull_fielding' in arg_data and arg_data['pull_fielding'].lower() == 'true':
print(f'Pulling pitcher defense...')
df_p = cde.get_bbref_fielding_df('p', season)
print(f'Pulling catcher defense...')
df_c = cde.get_bbref_fielding_df('c', season)
print(f'Pulling first base defense...')
@ -234,7 +234,7 @@ async def main(args):
batting_cards.append({
"player_id": df_data['player_id'],
"key_bbref": df_data.name,
"key_fangraphs": df_data['key_fangraphs'],
"key_fangraphs": int(float(df_data['key_fangraphs'])),
"key_mlbam": df_data['key_mlbam'],
"key_retro": df_data['key_retro'],
"name_first": df_data["name_first"].title(),
@ -360,7 +360,7 @@ async def main(args):
batting_ratings = []
def create_batting_card_ratings(df_data):
logging.info(f'Calculating card ratings for {df_data.name}')
logging.debug(f'Calculating card ratings for {df_data.name}')
batting_ratings.extend(cba.get_batter_ratings(df_data))
print(f'Calculating card ratings...')
@ -539,7 +539,7 @@ async def main(args):
'mlbclub': 'None',
'franchise': 'None',
'cardset_id': cardset['id'],
'set_num': df_data['key_fangraphs'],
'set_num': int(float(df_data['key_fangraphs'])),
'rarity_id': 99,
'pos_1': 'P',
'description': f'{player_desc_prefix} {f_name} {l_name}',
@ -554,10 +554,11 @@ async def main(args):
player_data.at[x['bbref_id'], 'player_id'] = this_player['player_id']
player_data.at[x['bbref_id'], 'p_name'] = this_player['p_name']
final_pitching = pd.merge(
step_pitching = pd.merge(
player_data, all_pitching, left_on='key_fangraphs', right_on='playerId', sort=False
).set_index('key_bbref', drop=False)
del ids_and_names, all_pitching, p_data
final_pitching = step_pitching.join(df_p, rsuffix='_r')
del ids_and_names, all_pitching, p_data, step_pitching
print(f'Player IDs linked to batting stats.\n{len(final_pitching.values)} players remain\n')
print(f'Reading pitching peripheral stats...')
@ -565,43 +566,29 @@ async def main(args):
.drop_duplicates(subset=['Name-additional'], keep='first')
.set_index('Name-additional'))
pit_data['pitch_hand'] = pit_data.apply(get_hand, axis=1)
pitching_stats = final_pitching.join(pit_data)
pitching_stats = final_pitching.join(pit_data, lsuffix='_l')
del final_pitching, pit_data
print(f'Stats are tallied\n{len(pitching_stats.values)} players remain\n\nPosting new players...')
print(f'Stats are tallied\n{len(pitching_stats.values)} players remain\n')
pitching_cards = []
def create_pitching_card(df_data):
s_data = cba.stealing(
chances=df_data['SBO'],
sb2s=df_data['SB2'],
cs2s=df_data['CS2'],
sb3s=df_data['SB3'],
cs3s=df_data['CS3'],
season_pct=season_pct
)
pow_data = cde.pow_ratings(float(df_data['Inn_def']), int(df_data['GS']), int(df_data['G']))
pitching_cards.append({
"player_id": df_data['player_id'],
"player_id": int(float(df_data['player_id'])),
"key_bbref": df_data.name,
"key_fangraphs": df_data['key_fangraphs'],
"key_mlbam": df_data['key_mlbam'],
"key_fangraphs": int(float(df_data['key_fangraphs'])),
"key_mlbam": int(float(df_data['key_mlbam'])),
"key_retro": df_data['key_retro'],
"name_first": df_data["name_first"].title(),
"name_last": df_data["name_last"].title(),
"balks": cpi.balks(df_data['BK'], df_data['IP'], season_pct),
"balk": cpi.balks(df_data['BK'], df_data['IP'], season_pct),
"wild_pitch": cpi.wild_pitches(df_data['WP'], df_data['IP'], season_pct),
# "hold": cde.hold_pitcher(),
"steal_low": s_data[0],
"steal_high": s_data[1],
"steal_auto": s_data[2],
"steal_jump": s_data[3],
"hit_and_run": cba.hit_and_run(
df_data['AB_vL'], df_data['AB_vR'], df_data['H_vL'], df_data['H_vR'],
df_data['HR_vL'], df_data['HR_vR'], df_data['SO_vL'], df_data['SO_vR']
),
"running": cba.running(df_data['XBT%']),
"hand": df_data['bat_hand']
"hold": cde.hold_pitcher(df_data['caught_stealing_perc'], int(df_data['pickoffs']), season_pct),
"starter_rating": pow_data[0],
"relief_rating": pow_data[1],
"closer_rating": cpi.closer_rating(int(df_data['GF']), int(df_data['SV']), int(df_data['G'])),
"hand": df_data['pitch_hand']
})
print(f'Calculating pitching cards...')
@ -610,6 +597,61 @@ async def main(args):
if 'post_updates' not in arg_data or arg_data['post_updates'].lower() == 'true':
resp = await db_put('pitchingcards', payload={'cards': pitching_cards}, timeout=30)
print(f'Response: {resp}\n\nMatching batting card database IDs to player stats...')
# final_pitching_stats = pd.merge(
# pitching_stats, await pd_pitchingcards_df(cardset['id']), on='player_id')
# final_pitching_stats.set_index('key_bbref', drop=False, inplace=True)
# final_pitching_stats = final_pitching_stats.astype({'player_id': int})
pc_df = await pd_pitchingcards_df(cardset['id'])
pitching_stats = pitching_stats.merge(pc_df, how='left', on='player_id').set_index('key_bbref', drop=False)
pit_positions = []
def create_pit_position(df_data):
if df_data.name in df_p.index:
logging.debug(f'Running P stats for {df_data["p_name"]}')
pit_positions.append({
"player_id": int(player_data.at[df_data.name, 'player_id']),
"position": 'P',
"innings": float(df_p.at[df_data.name, 'Inn_def']),
"range": cde.range_pitcher(
rs_value=int(df_p.at[df_data.name, 'bis_runs_total']),
season_pct=season_pct
),
"error": cde.get_any_error(
pos_code='p',
errors=int(df_p.at[df_data.name, 'E_def']),
chances=int(df_p.at[df_data.name, 'chances']),
season_pct=season_pct
)
})
else:
pit_positions.append({
"player_id": int(player_data.at[df_data.name, 'player_id']),
"position": 'P',
"innings": 1,
"range": 5,
"error": 51
})
print(f'Calculating pitcher fielding lines now...')
pitching_stats.apply(create_pit_position, axis=1)
print(f'Fielding is complete.\n\nPosting positions now...')
if 'post_updates' not in arg_data or arg_data['post_updates'].lower() == 'true':
resp = await db_put('cardpositions', payload={'positions': position_payload}, timeout=30)
print(f'Response: {resp}\n')
pitching_ratings = []
def create_pitching_card_ratings(df_data):
logging.info(f'Calculating pitching card ratings for {df_data.name}')
pitching_ratings.extend(cpi.get_pitcher_ratings(df_data))
print(f'Calculating card ratings...')
pitching_stats.apply(create_pitching_card_ratings, axis=1) # LOOK AT SINGLES
print(f'Ratings are complete\n\nPosting ratings now...')
if 'post_updates' not in arg_data or arg_data['post_updates'].lower() == 'true':
resp = await db_put('pitchingcardratings', payload={'ratings': pitching_ratings}, timeout=30)
print(f'Response: {resp}\n\nPulling all positions to set player positions...')
if __name__ == '__main__':