diff --git a/calcs_batter.py b/calcs_batter.py index c05f080..d1edf3b 100644 --- a/calcs_batter.py +++ b/calcs_batter.py @@ -85,7 +85,7 @@ class BattingCardRatingsModel(pydantic.BaseModel): return self.all_other_ob - self.hbp - self.walk def calculate_singles(self, szn_singles, szn_hits, ifh_rate: Decimal): - tot = sanitize_chance_output(self.all_hits * Decimal((szn_singles * .8) / szn_hits)) + tot = sanitize_chance_output(self.all_hits * Decimal((szn_singles * .8) / max(szn_hits, 1))) logging.debug(f'tot: {tot}') self.rem_singles = tot @@ -135,7 +135,7 @@ class BattingCardRatingsModel(pydantic.BaseModel): self.all_outs += Decimal(rem) def calculate_strikeouts(self, szn_so, szn_ab, szn_hits): - self.strikeout = strikeouts(self.all_outs, (szn_so / (szn_ab - szn_hits))) + self.strikeout = strikeouts(self.all_outs, (szn_so / max(szn_ab - szn_hits, 1))) def calculate_other_outs(self, fb_rate, ld_rate, gb_rate, szn_gidp, szn_ab): self.rem_flyballs = sanitize_chance_output(self.rem_outs() * Decimal(fb_rate)) @@ -157,7 +157,7 @@ class BattingCardRatingsModel(pydantic.BaseModel): if self.rem_flyballs > 0: logging.debug(f'Adding {self.rem_flyballs} chances to lineouts') - tot_oneouts = sanitize_chance_output(self.rem_outs() * Decimal(ld_rate / (ld_rate + gb_rate))) + tot_oneouts = sanitize_chance_output(self.rem_outs() * Decimal(ld_rate / max(ld_rate + gb_rate, .01))) self.lineout = sanitize_chance_output(Decimal(random.random()) * tot_oneouts) self.popout = sanitize_chance_output(tot_oneouts - self.lineout) @@ -214,7 +214,7 @@ class BattingCardRatingsModel(pydantic.BaseModel): def total_singles(all_hits, szn_singles, szn_hits): - return sanitize_chance_output(all_hits * ((szn_singles * .8) / szn_hits)) + return sanitize_chance_output(all_hits * ((szn_singles * .8) / max(szn_hits, 1))) def bp_singles(all_singles): @@ -246,7 +246,7 @@ def all_homeruns(rem_hits, all_hits, hrs, hits, singles): if rem_hits == 0 or all_hits == 0 or hrs == 0 or hits - singles == 0: return 0 else: - return mround(min(rem_hits, all_hits * ((hrs * 1.15) / hits))) + return mround(min(rem_hits, all_hits * ((hrs * 1.15) / max(hits, 1)))) def nd_homeruns(all_hr, hr_rate): @@ -271,7 +271,7 @@ def triples(all_xbh, tr_count, do_count): if all_xbh == Decimal(0) or tr_count == Decimal(0): return Decimal(0) else: - return sanitize_chance_output(all_xbh * Decimal(tr_count / (tr_count + do_count)), min_chances=1) + return sanitize_chance_output(all_xbh * Decimal(tr_count / max(tr_count + do_count, 1)), min_chances=1) def two_doubles(all_doubles, soft_rate): @@ -284,10 +284,10 @@ def two_doubles(all_doubles, soft_rate): def hit_by_pitch(other_ob, hbps, walks): - if hbps == 0 or other_ob * Decimal(hbps / (hbps + walks)) < 1: + if hbps == 0 or other_ob * Decimal(hbps / max(hbps + walks, 1)) < 1: return 0 else: - return sanitize_chance_output(other_ob * Decimal(hbps / (hbps + walks)), rounding=1.0) + return sanitize_chance_output(other_ob * Decimal(hbps / max(hbps + walks, 1)), rounding=1.0) def strikeouts(all_outs, k_rate): @@ -329,7 +329,7 @@ def groundball_a(all_groundouts, gidps, abs): if all_groundouts == 0 or gidps == 0: return Decimal(0) else: - return sanitize_chance_output(Decimal(min(gidps ** 2.5, abs) / abs) * all_groundouts) + return sanitize_chance_output(Decimal(min(gidps ** 2.5, abs) / max(abs, 1)) * all_groundouts) def groundball_c(rem_groundouts, med_rate): @@ -479,7 +479,7 @@ def running(extra_base_pct: str): def hit_and_run(ab_vl: int, ab_vr: int, hits_vl: int, hits_vr: int, hr_vl: int, hr_vr: int, so_vl: int, so_vr: int): - babip = (hits_vr + hits_vl - hr_vl - hr_vr) / (ab_vl + ab_vr - so_vl - so_vr - hr_vl - hr_vl) + babip = (hits_vr + hits_vl - hr_vl - hr_vr) / max(ab_vl + ab_vr - so_vl - so_vr - hr_vl - hr_vl, 1) if babip >= .35: return 'A' elif babip >= .3: @@ -492,7 +492,7 @@ def hit_and_run(ab_vl: int, ab_vr: int, hits_vl: int, hits_vr: int, hr_vl: int, def get_batter_ratings(df_data) -> List[dict]: # Consider a sliding offense_mod based on OPS; floor of 1x and ceiling of 1.5x ? - offense_mod = 1.25 + offense_mod = 1.2 vl = BattingCardRatingsModel( battingcard_id=df_data.battingcard_id, bat_hand=df_data['bat_hand'], diff --git a/calcs_pitcher.py b/calcs_pitcher.py index 1dd7b06..d9d6711 100644 --- a/calcs_pitcher.py +++ b/calcs_pitcher.py @@ -11,7 +11,7 @@ from typing import List, Literal def get_pitcher_ratings(df_data) -> List[dict]: vl = PitchingCardRatingsModel( pitchingcard_id=df_data.pitchingcard_id, - pit_hand=df_data.hand, + pit_hand=df_data.pitch_hand, vs_hand='L', all_hits=sanitize_chance_output((df_data['AVG_vL'] - 0.05) * 108), # Subtracting chances from BP results all_other_ob=sanitize_chance_output(108 * (df_data['BB_vL'] + df_data['HBP_vL']) / df_data['TBF_vL']), diff --git a/check_cards.py b/check_cards.py index c6ae7cd..5cbaa57 100644 --- a/check_cards.py +++ b/check_cards.py @@ -43,6 +43,7 @@ async def main(args): successes = [] cxn_error = False count = -1 + start_time = datetime.datetime.now() for x in all_players: if 'pitching' in x['image'] and 'skip_arms' in arg_data and arg_data['skip_arms'].lower() == 'true': @@ -119,7 +120,9 @@ async def main(args): for x in successes: logging.info(f'ID {x["player_id"]} {x["p_name"]}') + p_run_time = datetime.datetime.now() - start_time print(f'\nAll done!\nErrors: {len(errors)}\nSuccesses: {len(successes)}') + print(f'Total runtime: {p_run_time.total_seconds()} seconds') if __name__ == '__main__': diff --git a/creation_helpers.py b/creation_helpers.py index 3afc993..81a9f5e 100644 --- a/creation_helpers.py +++ b/creation_helpers.py @@ -5,6 +5,7 @@ import math from decimal import Decimal import pandas as pd +import pybaseball as pb import random import requests import time @@ -460,9 +461,13 @@ async def pd_pitchingcards_df(cardset_id: int): async def pd_battingcardratings_df(cardset_id: int): vl_query = await db_get( - 'battingcardratings', params=[('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True)]) + 'battingcardratings', params=[ + ('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True), ('team_id', 31), + ('ts', 's37136685556r6135248705')]) vr_query = await db_get( - 'battingcardratings', params=[('cardset_id', cardset_id), ('vs_hand', 'R'), ('short_output', True)]) + 'battingcardratings', params=[ + ('cardset_id', cardset_id), ('vs_hand', 'R'), ('short_output', True), ('team_id', 31), + ('ts', 's37136685556r6135248705')]) if 0 in [vl_query['count'], vr_query['count']]: raise ValueError(f'No batting card ratings returned from Paper Dynasty API') vl = pd.DataFrame(vl_query['ratings']) @@ -932,3 +937,72 @@ def sanitize_chance_output(total_chances, min_chances=1.0, rounding=0.05): for x in exact_chances: if rounded_val <= x: return x + + +def mlbteam_and_franchise(mlbam_playerid): + api_url = f'https://statsapi.mlb.com/api/v1/people/{mlbam_playerid}?hydrate=currentTeam' + logging.info(f'Calling {api_url}') + p_data = {'mlbclub': None, 'franchise': None} + club_list = [ + 'Arizona Diamondbacks', + 'Atlanta Braves', + 'Baltimore Orioles', + 'Boston Red Sox', + 'Chicago Cubs', + 'Chicago White Sox', + 'Cincinnati Reds', + 'Cleveland Guardians', + 'Colorado Rockies', + 'Detroit Tigers', + 'Houston Astros', + 'Kansas City Royals', + 'Los Angeles Angels', + 'Los Angeles Dodgers', + 'Miami Marlins', + 'Milwaukee Brewers', + 'Minnesota Twins', + 'New York Mets', + 'New York Yankees', + 'Oakland Athletics', + 'Philadelphia Phillies', + 'Pittsburgh Pirates', + 'San Diego Padres', + 'Seattle Mariners', + 'San Francisco Giants', + 'St Louis Cardinals', + 'Tampa Bay Rays', + 'Texas Rangers', + 'Toronto Blue Jays', + 'Washington Nationals' + ] + + try: + resp = requests.get(api_url, timeout=2) + except requests.ReadTimeout as e: + logging.error(f'mlbteam_and_franchise - ReadTimeout pull MLB team for MLB AM player ID {mlbam_playerid}') + return p_data + + if resp.status_code == 200: + data = resp.json() + data = data['people'][0] + logging.debug(f'data: {data}') + if data['currentTeam']['name'] in club_list: + p_data['mlbclub'] = data['currentTeam']['name'] + p_data['franchise'] = data['currentTeam']['name'] + else: + logging.error(f'Could not set team for {mlbam_playerid}; received {data["currentTeam"]["name"]}') + else: + logging.error(f'mlbteam_and_franchise - Bad response from mlbstatsapi: {resp.status_code}') + + return p_data + + +def get_all_pybaseball_ids(player_id: list, key_type: str): + q = pb.playerid_reverse_lookup(player_id, key_type=key_type) + if len(q.values) > 0: + return_val = q.loc[0] + else: + logging.error(f'get_all_pybaseball_ids - Could not find id {player_id} / {key_type} in pybaseball') + return_val = None + + return return_val diff --git a/live_series_update.py b/live_series_update.py index e5a15af..5090a47 100644 --- a/live_series_update.py +++ b/live_series_update.py @@ -17,8 +17,8 @@ import pydantic import sys from creation_helpers import pd_players_df, get_batting_stats, pd_battingcards_df, pd_battingcardratings_df, \ - get_pitching_stats, get_pitching_peripherals, pd_pitchingcards_df, pd_pitchingcardratings_df, pd_positions_df, \ - get_args + get_pitching_stats, get_all_pybaseball_ids, pd_pitchingcards_df, pd_pitchingcardratings_df, pd_positions_df, \ + get_args, mlbteam_and_franchise from db_calls import db_get, db_put, db_post, db_patch from typing import Literal from bs4 import BeautifulSoup @@ -79,6 +79,18 @@ def sanitize_name(start_name: str) -> str: async def main(args): + """ + params: + cardset_name: str - to be searched in pd database + games_played: int - from 1 - 162 + pull_fielding: bool - whether or not to pull fielding stats from bbref + post_batters: bool - whether or not to post batting cards, batting card ratings, and batter updates + post_pitchers: bool - whether or not to post pitching cards, pitching card ratings, and pitching updates + post_fielders: bool - whether or not to post card positions + post_players: bool - whether or not to post player updates + p_desc_prefix: str - shows as cardset on card image and prefixes player name in discord + is_liveseries: str - whether or not to look up players' current MLB club from MLB statsapi + """ arg_data = get_args(args) # cardset_name = input(f'What is the name of this Cardset? ') @@ -120,10 +132,7 @@ async def main(args): print(f'Processed {len(all_batting.values)} batters\n') def get_pids(df_data): - q = pb.playerid_reverse_lookup([df_data["playerId"]], key_type="fangraphs") - return_val = q.loc[0] if len(q.values) > 0 else None - # print(f'lookup id: {df_data["playerId"]}\n{return_val}') - return return_val + return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs') def get_hand(df_data): if df_data['Name'][-1] == '*': @@ -163,7 +172,8 @@ async def main(args): 'pos_1': 'DH', 'description': f'{player_description}', 'bbref_id': df_data.name, - 'fangr_id': int(float(df_data['key_fangraphs'])) + 'fangr_id': int(float(df_data['key_fangraphs'])), + 'strat_code': int(float(df_data['key_mlbam'])) }) player_data[player_data['player_id'].isnull()].apply(create_batters, axis=1) @@ -411,6 +421,15 @@ async def main(args): ).set_index('player_id', drop=False) del total_ratings, offense_stats + def get_pids(df_data): + return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref') + + ids_and_names = player_data.apply(get_pids, axis=1) + player_data = (ids_and_names + .merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id') + .query('key_mlbam == key_mlbam') + .set_index('key_bbref', drop=False)) + player_updates = {} # { : [ (param pairs) ] } rarity_group = player_data.query('rarity == new_rarity_id').groupby('rarity') average_ops = rarity_group['total_OPS'].mean().to_dict() @@ -427,9 +446,17 @@ async def main(args): } params = [('description', f'{player_description}')] - if release_directory not in df_data['image']: - params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/battingcard' - f'{urllib.parse.quote("?d=")}{release_directory}')]) + if 'is_liveseries' in arg_data and arg_data['is_liveseries'].lower() == 'true': + team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam']))) + + if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None: + params.extend([('mlbclub', team_data['mlbclub'])]) + if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None: + params.extend([('franchise', team_data['franchise'])]) + + # if release_directory not in df_data['image']: + params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/battingcard' + f'{urllib.parse.quote("?d=")}{release_directory}')]) if df_data['cost'] == 99999: params.extend([ @@ -516,17 +543,17 @@ async def main(args): params.extend([('cost', new_cost), ('rarity_id', new_rarity)]) if len(params) > 0: - if df_data.name not in player_updates.keys(): - player_updates[df_data.name] = params + if df_data.player_id not in player_updates.keys(): + player_updates[df_data.player_id] = params else: - player_updates[df_data.name].extend(params) + player_updates[df_data.player_id].extend(params) player_data.apply(get_player_updates, axis=1) - # print(f'Sending {len(player_updates)} player updates to PD database...') - # if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true': - # for x in player_updates: - # await db_patch('players', object_id=x, params=player_updates[x]) + print(f'Sending {len(player_updates)} player updates to PD database...') + if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true': + for x in player_updates: + await db_patch('players', object_id=x, params=player_updates[x]) print(f'Batter updates are complete') start_time_two = datetime.datetime.now() @@ -538,6 +565,9 @@ async def main(args): all_pitching = get_pitching_stats(file_path=input_path) print(f'Processed {len(all_pitching.values)} pitchers\n') + def get_pids(df_data): + return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs') + print(f'Now pulling mlbam player IDs...') ids_and_names = all_pitching.apply(get_pids, axis=1) player_data = (ids_and_names @@ -564,7 +594,8 @@ async def main(args): 'pos_1': 'P', 'description': f'{player_description}', 'bbref_id': df_data.name, - 'fangr_id': int(float(df_data['key_fangraphs'])) + 'fangr_id': int(float(df_data['key_fangraphs'])), + 'strat_code': int(float(df_data['key_mlbam'])) }) player_data[player_data['player_id'].isnull()].apply(create_pitchers, axis=1) @@ -675,6 +706,13 @@ async def main(args): resp = await db_put('pitchingcardratings', payload={'ratings': pitching_ratings}, timeout=30) print(f'Response: {resp}\n\nPulling all positions to set player positions...') + print(f'Pitcher updates are complete') + start_time_three = datetime.datetime.now() + p_run_time = datetime.datetime.now() - start_time_two + print(f'Total pitching cards: {len(pitching_cards)}\nNew cardset pitchers: {len(new_players)}\n' + f'Pitcher runtime: {round(p_run_time.total_seconds())} seconds\n') + print(f'Checking for player updates...') + """ Pull fresh pd_players and set_index to player_id Pull fresh battingcards and set_index to player @@ -731,7 +769,16 @@ async def main(args): ).set_index('player_id', drop=False) del total_ratings, pitching_stats - # player_updates = {} # { : [ (param pairs) ] } + def get_pids(df_data): + return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref') + + ids_and_names = player_data.apply(get_pids, axis=1) + player_data = (ids_and_names + .merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id') + .query('key_mlbam == key_mlbam') + .set_index('key_bbref', drop=False)) + + player_updates = {} # { : [ (param pairs) ] } rarity_group = player_data.query('rarity == new_rarity_id').groupby('rarity') average_ops = rarity_group['total_OPS'].mean().to_dict() # cost_groups = rarity_group['cost'].mean() @@ -747,9 +794,17 @@ async def main(args): } params = [('description', f'{player_description}')] - if release_directory not in df_data['image']: - params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/pitchingcard' - f'{urllib.parse.quote("?d=")}{release_directory}')]) + if 'is_liveseries' in arg_data and arg_data['is_liveseries'].lower() == 'true': + team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam']))) + + if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None: + params.extend([('mlbclub', team_data['mlbclub'])]) + if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None: + params.extend([('franchise', team_data['franchise'])]) + + # if release_directory not in df_data['image']: + params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/pitchingcard' + f'{urllib.parse.quote("?d=")}{release_directory}')]) if df_data['cost'] == 99999: params.extend([ @@ -836,10 +891,10 @@ async def main(args): params.extend([('cost', new_cost), ('rarity_id', new_rarity)]) if len(params) > 0: - if df_data.name not in player_updates.keys(): - player_updates[df_data.name] = params + if df_data.player_id not in player_updates.keys(): + player_updates[df_data.player_id] = params else: - player_updates[df_data.name].extend(params) + player_updates[df_data.player_id].extend(params) player_data.apply(get_player_updates, axis=1) @@ -848,13 +903,7 @@ async def main(args): # for x in player_updates: # await db_patch('players', object_id=x, params=player_updates[x]) - print(f'Pitcher updates are complete') - start_time_three = datetime.datetime.now() - p_run_time = datetime.datetime.now() - start_time_two - print(f'Total pitching cards: {len(pitching_cards)}\nNew cardset pitchers: {len(new_players)}\n' - f'Pitcher runtime: {round(p_run_time.total_seconds())} seconds\n') - - print(f'Running player position updates') + print(f'Running player position updates..') all_pos = await pd_positions_df(cardset['id']) def set_all_positions(df_data): @@ -863,7 +912,7 @@ async def main(args): count = 1 for this_pos in pos_series: if this_pos == 'P': - this_pitcher = player_data.loc[df_data['player_id']] + this_pitcher = player_data.loc[df_data['bbref_id']] if this_pitcher['starter_rating'] > 3: pos_updates.append((f'pos_{count}', 'SP')) count += 1 @@ -879,7 +928,7 @@ async def main(args): count += 1 else: pos_updates.append((f'pos_{count}', this_pos)) - count += 1 + count += 1 if count == 1: pos_updates.append(('pos_1', 'DH'))