Full 2023 run complete

This commit is contained in:
Cal Corum 2023-10-21 00:40:15 -05:00
parent 1bfd6ad799
commit 3a247f7487
5 changed files with 174 additions and 48 deletions

View File

@ -85,7 +85,7 @@ class BattingCardRatingsModel(pydantic.BaseModel):
return self.all_other_ob - self.hbp - self.walk
def calculate_singles(self, szn_singles, szn_hits, ifh_rate: Decimal):
tot = sanitize_chance_output(self.all_hits * Decimal((szn_singles * .8) / szn_hits))
tot = sanitize_chance_output(self.all_hits * Decimal((szn_singles * .8) / max(szn_hits, 1)))
logging.debug(f'tot: {tot}')
self.rem_singles = tot
@ -135,7 +135,7 @@ class BattingCardRatingsModel(pydantic.BaseModel):
self.all_outs += Decimal(rem)
def calculate_strikeouts(self, szn_so, szn_ab, szn_hits):
self.strikeout = strikeouts(self.all_outs, (szn_so / (szn_ab - szn_hits)))
self.strikeout = strikeouts(self.all_outs, (szn_so / max(szn_ab - szn_hits, 1)))
def calculate_other_outs(self, fb_rate, ld_rate, gb_rate, szn_gidp, szn_ab):
self.rem_flyballs = sanitize_chance_output(self.rem_outs() * Decimal(fb_rate))
@ -157,7 +157,7 @@ class BattingCardRatingsModel(pydantic.BaseModel):
if self.rem_flyballs > 0:
logging.debug(f'Adding {self.rem_flyballs} chances to lineouts')
tot_oneouts = sanitize_chance_output(self.rem_outs() * Decimal(ld_rate / (ld_rate + gb_rate)))
tot_oneouts = sanitize_chance_output(self.rem_outs() * Decimal(ld_rate / max(ld_rate + gb_rate, .01)))
self.lineout = sanitize_chance_output(Decimal(random.random()) * tot_oneouts)
self.popout = sanitize_chance_output(tot_oneouts - self.lineout)
@ -214,7 +214,7 @@ class BattingCardRatingsModel(pydantic.BaseModel):
def total_singles(all_hits, szn_singles, szn_hits):
return sanitize_chance_output(all_hits * ((szn_singles * .8) / szn_hits))
return sanitize_chance_output(all_hits * ((szn_singles * .8) / max(szn_hits, 1)))
def bp_singles(all_singles):
@ -246,7 +246,7 @@ def all_homeruns(rem_hits, all_hits, hrs, hits, singles):
if rem_hits == 0 or all_hits == 0 or hrs == 0 or hits - singles == 0:
return 0
else:
return mround(min(rem_hits, all_hits * ((hrs * 1.15) / hits)))
return mround(min(rem_hits, all_hits * ((hrs * 1.15) / max(hits, 1))))
def nd_homeruns(all_hr, hr_rate):
@ -271,7 +271,7 @@ def triples(all_xbh, tr_count, do_count):
if all_xbh == Decimal(0) or tr_count == Decimal(0):
return Decimal(0)
else:
return sanitize_chance_output(all_xbh * Decimal(tr_count / (tr_count + do_count)), min_chances=1)
return sanitize_chance_output(all_xbh * Decimal(tr_count / max(tr_count + do_count, 1)), min_chances=1)
def two_doubles(all_doubles, soft_rate):
@ -284,10 +284,10 @@ def two_doubles(all_doubles, soft_rate):
def hit_by_pitch(other_ob, hbps, walks):
if hbps == 0 or other_ob * Decimal(hbps / (hbps + walks)) < 1:
if hbps == 0 or other_ob * Decimal(hbps / max(hbps + walks, 1)) < 1:
return 0
else:
return sanitize_chance_output(other_ob * Decimal(hbps / (hbps + walks)), rounding=1.0)
return sanitize_chance_output(other_ob * Decimal(hbps / max(hbps + walks, 1)), rounding=1.0)
def strikeouts(all_outs, k_rate):
@ -329,7 +329,7 @@ def groundball_a(all_groundouts, gidps, abs):
if all_groundouts == 0 or gidps == 0:
return Decimal(0)
else:
return sanitize_chance_output(Decimal(min(gidps ** 2.5, abs) / abs) * all_groundouts)
return sanitize_chance_output(Decimal(min(gidps ** 2.5, abs) / max(abs, 1)) * all_groundouts)
def groundball_c(rem_groundouts, med_rate):
@ -479,7 +479,7 @@ def running(extra_base_pct: str):
def hit_and_run(ab_vl: int, ab_vr: int, hits_vl: int, hits_vr: int, hr_vl: int, hr_vr: int, so_vl: int, so_vr: int):
babip = (hits_vr + hits_vl - hr_vl - hr_vr) / (ab_vl + ab_vr - so_vl - so_vr - hr_vl - hr_vl)
babip = (hits_vr + hits_vl - hr_vl - hr_vr) / max(ab_vl + ab_vr - so_vl - so_vr - hr_vl - hr_vl, 1)
if babip >= .35:
return 'A'
elif babip >= .3:
@ -492,7 +492,7 @@ def hit_and_run(ab_vl: int, ab_vr: int, hits_vl: int, hits_vr: int, hr_vl: int,
def get_batter_ratings(df_data) -> List[dict]:
# Consider a sliding offense_mod based on OPS; floor of 1x and ceiling of 1.5x ?
offense_mod = 1.25
offense_mod = 1.2
vl = BattingCardRatingsModel(
battingcard_id=df_data.battingcard_id,
bat_hand=df_data['bat_hand'],

View File

@ -11,7 +11,7 @@ from typing import List, Literal
def get_pitcher_ratings(df_data) -> List[dict]:
vl = PitchingCardRatingsModel(
pitchingcard_id=df_data.pitchingcard_id,
pit_hand=df_data.hand,
pit_hand=df_data.pitch_hand,
vs_hand='L',
all_hits=sanitize_chance_output((df_data['AVG_vL'] - 0.05) * 108), # Subtracting chances from BP results
all_other_ob=sanitize_chance_output(108 * (df_data['BB_vL'] + df_data['HBP_vL']) / df_data['TBF_vL']),

View File

@ -43,6 +43,7 @@ async def main(args):
successes = []
cxn_error = False
count = -1
start_time = datetime.datetime.now()
for x in all_players:
if 'pitching' in x['image'] and 'skip_arms' in arg_data and arg_data['skip_arms'].lower() == 'true':
@ -119,7 +120,9 @@ async def main(args):
for x in successes:
logging.info(f'ID {x["player_id"]} {x["p_name"]}')
p_run_time = datetime.datetime.now() - start_time
print(f'\nAll done!\nErrors: {len(errors)}\nSuccesses: {len(successes)}')
print(f'Total runtime: {p_run_time.total_seconds()} seconds')
if __name__ == '__main__':

View File

@ -5,6 +5,7 @@ import math
from decimal import Decimal
import pandas as pd
import pybaseball as pb
import random
import requests
import time
@ -460,9 +461,13 @@ async def pd_pitchingcards_df(cardset_id: int):
async def pd_battingcardratings_df(cardset_id: int):
vl_query = await db_get(
'battingcardratings', params=[('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True)])
'battingcardratings', params=[
('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True), ('team_id', 31),
('ts', 's37136685556r6135248705')])
vr_query = await db_get(
'battingcardratings', params=[('cardset_id', cardset_id), ('vs_hand', 'R'), ('short_output', True)])
'battingcardratings', params=[
('cardset_id', cardset_id), ('vs_hand', 'R'), ('short_output', True), ('team_id', 31),
('ts', 's37136685556r6135248705')])
if 0 in [vl_query['count'], vr_query['count']]:
raise ValueError(f'No batting card ratings returned from Paper Dynasty API')
vl = pd.DataFrame(vl_query['ratings'])
@ -932,3 +937,72 @@ def sanitize_chance_output(total_chances, min_chances=1.0, rounding=0.05):
for x in exact_chances:
if rounded_val <= x:
return x
def mlbteam_and_franchise(mlbam_playerid):
api_url = f'https://statsapi.mlb.com/api/v1/people/{mlbam_playerid}?hydrate=currentTeam'
logging.info(f'Calling {api_url}')
p_data = {'mlbclub': None, 'franchise': None}
club_list = [
'Arizona Diamondbacks',
'Atlanta Braves',
'Baltimore Orioles',
'Boston Red Sox',
'Chicago Cubs',
'Chicago White Sox',
'Cincinnati Reds',
'Cleveland Guardians',
'Colorado Rockies',
'Detroit Tigers',
'Houston Astros',
'Kansas City Royals',
'Los Angeles Angels',
'Los Angeles Dodgers',
'Miami Marlins',
'Milwaukee Brewers',
'Minnesota Twins',
'New York Mets',
'New York Yankees',
'Oakland Athletics',
'Philadelphia Phillies',
'Pittsburgh Pirates',
'San Diego Padres',
'Seattle Mariners',
'San Francisco Giants',
'St Louis Cardinals',
'Tampa Bay Rays',
'Texas Rangers',
'Toronto Blue Jays',
'Washington Nationals'
]
try:
resp = requests.get(api_url, timeout=2)
except requests.ReadTimeout as e:
logging.error(f'mlbteam_and_franchise - ReadTimeout pull MLB team for MLB AM player ID {mlbam_playerid}')
return p_data
if resp.status_code == 200:
data = resp.json()
data = data['people'][0]
logging.debug(f'data: {data}')
if data['currentTeam']['name'] in club_list:
p_data['mlbclub'] = data['currentTeam']['name']
p_data['franchise'] = data['currentTeam']['name']
else:
logging.error(f'Could not set team for {mlbam_playerid}; received {data["currentTeam"]["name"]}')
else:
logging.error(f'mlbteam_and_franchise - Bad response from mlbstatsapi: {resp.status_code}')
return p_data
def get_all_pybaseball_ids(player_id: list, key_type: str):
q = pb.playerid_reverse_lookup(player_id, key_type=key_type)
if len(q.values) > 0:
return_val = q.loc[0]
else:
logging.error(f'get_all_pybaseball_ids - Could not find id {player_id} / {key_type} in pybaseball')
return_val = None
return return_val

View File

@ -17,8 +17,8 @@ import pydantic
import sys
from creation_helpers import pd_players_df, get_batting_stats, pd_battingcards_df, pd_battingcardratings_df, \
get_pitching_stats, get_pitching_peripherals, pd_pitchingcards_df, pd_pitchingcardratings_df, pd_positions_df, \
get_args
get_pitching_stats, get_all_pybaseball_ids, pd_pitchingcards_df, pd_pitchingcardratings_df, pd_positions_df, \
get_args, mlbteam_and_franchise
from db_calls import db_get, db_put, db_post, db_patch
from typing import Literal
from bs4 import BeautifulSoup
@ -79,6 +79,18 @@ def sanitize_name(start_name: str) -> str:
async def main(args):
"""
params:
cardset_name: str - to be searched in pd database
games_played: int - from 1 - 162
pull_fielding: bool - whether or not to pull fielding stats from bbref
post_batters: bool - whether or not to post batting cards, batting card ratings, and batter updates
post_pitchers: bool - whether or not to post pitching cards, pitching card ratings, and pitching updates
post_fielders: bool - whether or not to post card positions
post_players: bool - whether or not to post player updates
p_desc_prefix: str - shows as cardset on card image and prefixes player name in discord
is_liveseries: str - whether or not to look up players' current MLB club from MLB statsapi
"""
arg_data = get_args(args)
# cardset_name = input(f'What is the name of this Cardset? ')
@ -120,10 +132,7 @@ async def main(args):
print(f'Processed {len(all_batting.values)} batters\n')
def get_pids(df_data):
q = pb.playerid_reverse_lookup([df_data["playerId"]], key_type="fangraphs")
return_val = q.loc[0] if len(q.values) > 0 else None
# print(f'lookup id: {df_data["playerId"]}\n{return_val}')
return return_val
return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs')
def get_hand(df_data):
if df_data['Name'][-1] == '*':
@ -163,7 +172,8 @@ async def main(args):
'pos_1': 'DH',
'description': f'{player_description}',
'bbref_id': df_data.name,
'fangr_id': int(float(df_data['key_fangraphs']))
'fangr_id': int(float(df_data['key_fangraphs'])),
'strat_code': int(float(df_data['key_mlbam']))
})
player_data[player_data['player_id'].isnull()].apply(create_batters, axis=1)
@ -411,6 +421,15 @@ async def main(args):
).set_index('player_id', drop=False)
del total_ratings, offense_stats
def get_pids(df_data):
return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref')
ids_and_names = player_data.apply(get_pids, axis=1)
player_data = (ids_and_names
.merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id')
.query('key_mlbam == key_mlbam')
.set_index('key_bbref', drop=False))
player_updates = {} # { <player_id> : [ (param pairs) ] }
rarity_group = player_data.query('rarity == new_rarity_id').groupby('rarity')
average_ops = rarity_group['total_OPS'].mean().to_dict()
@ -427,9 +446,17 @@ async def main(args):
}
params = [('description', f'{player_description}')]
if release_directory not in df_data['image']:
params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/battingcard'
f'{urllib.parse.quote("?d=")}{release_directory}')])
if 'is_liveseries' in arg_data and arg_data['is_liveseries'].lower() == 'true':
team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam'])))
if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None:
params.extend([('mlbclub', team_data['mlbclub'])])
if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None:
params.extend([('franchise', team_data['franchise'])])
# if release_directory not in df_data['image']:
params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/battingcard'
f'{urllib.parse.quote("?d=")}{release_directory}')])
if df_data['cost'] == 99999:
params.extend([
@ -516,17 +543,17 @@ async def main(args):
params.extend([('cost', new_cost), ('rarity_id', new_rarity)])
if len(params) > 0:
if df_data.name not in player_updates.keys():
player_updates[df_data.name] = params
if df_data.player_id not in player_updates.keys():
player_updates[df_data.player_id] = params
else:
player_updates[df_data.name].extend(params)
player_updates[df_data.player_id].extend(params)
player_data.apply(get_player_updates, axis=1)
# print(f'Sending {len(player_updates)} player updates to PD database...')
# if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true':
# for x in player_updates:
# await db_patch('players', object_id=x, params=player_updates[x])
print(f'Sending {len(player_updates)} player updates to PD database...')
if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true':
for x in player_updates:
await db_patch('players', object_id=x, params=player_updates[x])
print(f'Batter updates are complete')
start_time_two = datetime.datetime.now()
@ -538,6 +565,9 @@ async def main(args):
all_pitching = get_pitching_stats(file_path=input_path)
print(f'Processed {len(all_pitching.values)} pitchers\n')
def get_pids(df_data):
return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs')
print(f'Now pulling mlbam player IDs...')
ids_and_names = all_pitching.apply(get_pids, axis=1)
player_data = (ids_and_names
@ -564,7 +594,8 @@ async def main(args):
'pos_1': 'P',
'description': f'{player_description}',
'bbref_id': df_data.name,
'fangr_id': int(float(df_data['key_fangraphs']))
'fangr_id': int(float(df_data['key_fangraphs'])),
'strat_code': int(float(df_data['key_mlbam']))
})
player_data[player_data['player_id'].isnull()].apply(create_pitchers, axis=1)
@ -675,6 +706,13 @@ async def main(args):
resp = await db_put('pitchingcardratings', payload={'ratings': pitching_ratings}, timeout=30)
print(f'Response: {resp}\n\nPulling all positions to set player positions...')
print(f'Pitcher updates are complete')
start_time_three = datetime.datetime.now()
p_run_time = datetime.datetime.now() - start_time_two
print(f'Total pitching cards: {len(pitching_cards)}\nNew cardset pitchers: {len(new_players)}\n'
f'Pitcher runtime: {round(p_run_time.total_seconds())} seconds\n')
print(f'Checking for player updates...')
"""
Pull fresh pd_players and set_index to player_id
Pull fresh battingcards and set_index to player
@ -731,7 +769,16 @@ async def main(args):
).set_index('player_id', drop=False)
del total_ratings, pitching_stats
# player_updates = {} # { <player_id> : [ (param pairs) ] }
def get_pids(df_data):
return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref')
ids_and_names = player_data.apply(get_pids, axis=1)
player_data = (ids_and_names
.merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id')
.query('key_mlbam == key_mlbam')
.set_index('key_bbref', drop=False))
player_updates = {} # { <player_id> : [ (param pairs) ] }
rarity_group = player_data.query('rarity == new_rarity_id').groupby('rarity')
average_ops = rarity_group['total_OPS'].mean().to_dict()
# cost_groups = rarity_group['cost'].mean()
@ -747,9 +794,17 @@ async def main(args):
}
params = [('description', f'{player_description}')]
if release_directory not in df_data['image']:
params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/pitchingcard'
f'{urllib.parse.quote("?d=")}{release_directory}')])
if 'is_liveseries' in arg_data and arg_data['is_liveseries'].lower() == 'true':
team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam'])))
if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None:
params.extend([('mlbclub', team_data['mlbclub'])])
if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None:
params.extend([('franchise', team_data['franchise'])])
# if release_directory not in df_data['image']:
params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/pitchingcard'
f'{urllib.parse.quote("?d=")}{release_directory}')])
if df_data['cost'] == 99999:
params.extend([
@ -836,10 +891,10 @@ async def main(args):
params.extend([('cost', new_cost), ('rarity_id', new_rarity)])
if len(params) > 0:
if df_data.name not in player_updates.keys():
player_updates[df_data.name] = params
if df_data.player_id not in player_updates.keys():
player_updates[df_data.player_id] = params
else:
player_updates[df_data.name].extend(params)
player_updates[df_data.player_id].extend(params)
player_data.apply(get_player_updates, axis=1)
@ -848,13 +903,7 @@ async def main(args):
# for x in player_updates:
# await db_patch('players', object_id=x, params=player_updates[x])
print(f'Pitcher updates are complete')
start_time_three = datetime.datetime.now()
p_run_time = datetime.datetime.now() - start_time_two
print(f'Total pitching cards: {len(pitching_cards)}\nNew cardset pitchers: {len(new_players)}\n'
f'Pitcher runtime: {round(p_run_time.total_seconds())} seconds\n')
print(f'Running player position updates')
print(f'Running player position updates..')
all_pos = await pd_positions_df(cardset['id'])
def set_all_positions(df_data):
@ -863,7 +912,7 @@ async def main(args):
count = 1
for this_pos in pos_series:
if this_pos == 'P':
this_pitcher = player_data.loc[df_data['player_id']]
this_pitcher = player_data.loc[df_data['bbref_id']]
if this_pitcher['starter_rating'] > 3:
pos_updates.append((f'pos_{count}', 'SP'))
count += 1
@ -879,7 +928,7 @@ async def main(args):
count += 1
else:
pos_updates.append((f'pos_{count}', this_pos))
count += 1
count += 1
if count == 1:
pos_updates.append(('pos_1', 'DH'))