import datetime import urllib.parse import pandas as pd from creation_helpers import get_all_pybaseball_ids, sanitize_name, CLUB_LIST, FRANCHISE_LIST, pd_players_df, \ mlbteam_and_franchise from db_calls import db_post, db_get, db_put, db_patch from defenders import calcs_defense as cde from . import calcs_pitcher as cpi from exceptions import logger def get_pitching_stats( file_path: str = None, start_date: datetime.datetime = None, end_date: datetime.datetime = None, ignore_limits: bool = False): print('Reading pitching stats...') min_vl = 20 if not ignore_limits else 1 min_vr = 40 if not ignore_limits else 1 if file_path is not None: vl_basic = pd.read_csv(f'{file_path}vlhh-basic.csv').query(f'TBF >= {min_vl}') vr_basic = pd.read_csv(f'{file_path}vrhh-basic.csv').query(f'TBF >= {min_vr}') total_basic = pd.merge(vl_basic, vr_basic, on="playerId", suffixes=('_vL', '_vR')) vl_rate = pd.read_csv(f'{file_path}vlhh-rate.csv').query(f'TBF >= {min_vl}') vr_rate = pd.read_csv(f'{file_path}vrhh-rate.csv').query(f'TBF >= {min_vr}') total_rate = pd.merge(vl_rate, vr_rate, on="playerId", suffixes=('_vL', '_vR')) return pd.merge(total_basic, total_rate, on="playerId", suffixes=('', '_rate')) else: raise LookupError(f'Date-based stat pulls not implemented, yet. Please provide batting csv files.') # vrb_url = f'https://www.fangraphs.com/leaders/splits-leaderboards?splitArr=6&splitArrPitch=&position=P' \ # f'&autoPt=false&splitTeams=false&statType=player&statgroup=1' \ # f'&startDate={start_date.year}-{start_date.month}-{start_date.day}' \ # f'&endDate={end_date.year}-{end_date.month}-{end_date.day}' \ # f'&players=&filter=&groupBy=season&sort=4,1&wxTemperature=&wxPressure=&wxAirDensity=' \ # f'&wxElevation=&wxWindSpeed=' # vrr_url = f'https://www.fangraphs.com/leaders/splits-leaderboards?splitArr=6&splitArrPitch=&position=P' \ # f'&autoPt=false&splitTeams=false&statType=player&statgroup=3' \ # f'&startDate={start_date.year}-{start_date.month}-{start_date.day}' \ # f'&endDate={end_date.year}-{end_date.month}-{end_date.day}' \ # f'&players=&filter=&groupBy=season&sort=4,1&wxTemperature=&wxPressure=&wxAirDensity=' \ # f'&wxElevation=&wxWindSpeed=' # vlb_url = f'https://www.fangraphs.com/leaders/splits-leaderboards?splitArr=5&splitArrPitch=&position=P' \ # f'&autoPt=false&splitTeams=false&statType=player&statgroup=1' \ # f'&startDate={start_date.year}-{start_date.month}-{start_date.day}' \ # f'&endDate={end_date.year}-{end_date.month}-{end_date.day}' \ # f'&players=&filter=&groupBy=season&sort=4,1&wxTemperature=&wxPressure=&wxAirDensity=' \ # f'&wxElevation=&wxWindSpeed=' # vlr_url = f'https://www.fangraphs.com/leaders/splits-leaderboards?splitArr=5&splitArrPitch=&position=P' \ # f'&autoPt=false&splitTeams=false&statType=player&statgroup=3' \ # f'&startDate={start_date.year}-{start_date.month}-{start_date.day}' \ # f'&endDate={end_date.year}-{end_date.month}-{end_date.day}' \ # f'&players=&filter=&groupBy=season&sort=4,1&wxTemperature=&wxPressure=&wxAirDensity=' \ # f'&wxElevation=&wxWindSpeed=' # # soup = BeautifulSoup(requests.get(vrb_url).text, 'html.parser') # time.sleep(3) # table = soup.find('a', {'class': 'data-export'}) async def pd_pitchingcards_df(cardset_id: int): bc_query = await db_get('pitchingcards', params=[('cardset_id', cardset_id), ('short_output', True)]) if bc_query['count'] == 0: raise ValueError(f'No pitching cards returned from Paper Dynasty API') return pd.DataFrame(bc_query['cards']).rename(columns={'id': 'pitchingcard_id', 'player': 'player_id'}) async def pd_pitchingcardratings_df(cardset_id: int): vl_query = await db_get( 'pitchingcardratings', params=[('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True)]) vr_query = await db_get( 'pitchingcardratings', params=[('cardset_id', cardset_id), ('vs_hand', 'R'), ('short_output', True)]) if 0 in [vl_query['count'], vr_query['count']]: raise ValueError(f'No pitching card ratings returned from Paper Dynasty API') vl = pd.DataFrame(vl_query['ratings']) vr = pd.DataFrame(vr_query['ratings']) ratings = (pd.merge(vl, vr, on='pitchingcard', suffixes=('_vL', '_vR')) .rename(columns={'pitchingcard': 'pitchingcard_id'})) def get_total_ops(df_data): ops_vl = df_data['obp_vL'] + df_data['slg_vL'] ops_vr = df_data['obp_vR'] + df_data['slg_vR'] return (ops_vr + ops_vl + max(ops_vl, ops_vr)) / 3 ratings['total_OPS'] = ratings.apply(get_total_ops, axis=1) return ratings def match_player_lines( all_pitching: pd.DataFrame, all_players: pd.DataFrame, df_p: pd.DataFrame, is_custom: bool = False): def get_pids(df_data): return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs', is_custom, df_data['Name_vL']) print(f'Now pulling mlbam player IDs...') ids_and_names = all_pitching.apply(get_pids, axis=1) player_data = (ids_and_names .merge(all_players, how='left', left_on='key_bbref', right_on='bbref_id') .query('key_mlbam == key_mlbam') .set_index('key_bbref', drop=False)) print(f'Matched mlbam to pd players.') step_pitching = pd.merge( player_data, all_pitching, left_on='key_fangraphs', right_on='playerId', sort=False ).set_index('key_bbref', drop=False) final_pitching = step_pitching.join(df_p, rsuffix='_r') return final_pitching async def create_new_players( final_pitching: pd.DataFrame, cardset: dict, card_base_url: str, release_dir: str, player_desc: str): new_players = [] new_mlbplayers = {} def create_pitchers(df_data): f_name = sanitize_name(df_data["name_first"]).title() l_name = sanitize_name(df_data["name_last"]).title() new_players.append({ 'p_name': f'{f_name} {l_name}', 'cost': 99999, 'image': f'{card_base_url}/{df_data["player_id"]}/' f'pitchingcard{urllib.parse.quote("?d=")}{release_dir}', 'mlbclub': CLUB_LIST[df_data['Tm_vL']], 'franchise': FRANCHISE_LIST[df_data['Tm_vL']], 'cardset_id': cardset['id'], 'set_num': int(float(df_data['key_fangraphs'])), 'rarity_id': 99, 'pos_1': 'P', 'description': f'{player_desc}', 'bbref_id': df_data.name, 'fangr_id': int(float(df_data['key_fangraphs'])), 'strat_code': int(float(df_data['key_mlbam'])) }) new_mlbplayers[df_data.name] = { 'first_name': sanitize_name(df_data["name_first"]).title(), 'last_name': sanitize_name(df_data["name_last"]).title(), 'key_mlbam': int(float(df_data['key_mlbam'])), 'key_fangraphs': int(float(df_data['key_fangraphs'])), 'key_bbref': df_data['key_bbref'], 'key_retro': df_data['key_retro'] } final_pitching[final_pitching['player_id'].isnull()].apply(create_pitchers, axis=1) print(f'Creating {len(new_players)} new players...') for x in new_players: mlb_query = await db_get('mlbplayers', params=[('key_bbref', x['bbref_id'])]) if mlb_query['count'] > 0: x['mlbplayer_id'] = mlb_query['players'][0]['id'] else: new_mlb = await db_post('mlbplayers/one', payload=new_mlbplayers[x['bbref_id']]) x['mlbplayer_id'] = new_mlb['id'] this_player = await db_post('players', payload=x) final_pitching.at[x['bbref_id'], 'player_id'] = this_player['player_id'] final_pitching.at[x['bbref_id'], 'p_name'] = this_player['p_name'] print(f'Player IDs linked to pitching stats.\n{len(final_pitching.values)} players remain\n') return len(new_players) def get_stat_df(input_path: str, final_pitching: pd.DataFrame): def get_hand(df_data): if df_data['Name'][-1] == '*': return 'L' else: return 'R' print(f'Reading pitching peripheral stats...') pit_data = (pd.read_csv(f'{input_path}pitching.csv') .drop_duplicates(subset=['Name-additional'], keep='first') .set_index('Name-additional')) pit_data['pitch_hand'] = pit_data.apply(get_hand, axis=1) pitching_stats = final_pitching.join(pit_data, lsuffix='_l') print(f'Stats are tallied\n{len(pitching_stats.values)} players remain\n') return pitching_stats async def calculate_pitching_cards(pitching_stats: pd.DataFrame, cardset: dict, season_pct: float, post_pitchers: bool): pitching_cards = [] def create_pitching_card(df_data): pow_data = cde.pow_ratings(float(df_data['Inn_def']), int(df_data['GS']), int(df_data['G'])) pitching_cards.append({ "player_id": int(float(df_data['player_id'])), "key_bbref": df_data.name, "key_fangraphs": int(float(df_data['key_fangraphs'])), "key_mlbam": int(float(df_data['key_mlbam'])), "key_retro": df_data['key_retro'], "name_first": df_data["name_first"].title(), "name_last": df_data["name_last"].title(), "balk": cpi.balks(df_data['BK'], df_data['IP'], season_pct), "wild_pitch": cpi.wild_pitches(df_data['WP'], df_data['IP'], season_pct), "hold": cde.hold_pitcher(df_data['caught_stealing_perc'], int(df_data['pickoffs']), season_pct), "starter_rating": pow_data[0], "relief_rating": pow_data[1], "closer_rating": cpi.closer_rating(int(df_data['GF']), int(df_data['SV']), int(df_data['G'])), "hand": df_data['pitch_hand'], "batting": f'#1W{df_data["pitch_hand"]}-C' }) print(f'Calculating pitching cards...') pitching_stats.apply(create_pitching_card, axis=1) print(f'Cards are complete.\n\nPosting cards now...') if post_pitchers: resp = await db_put('pitchingcards', payload={'cards': pitching_cards}, timeout=30) print(f'Response: {resp}\n\nMatching pitching card database IDs to player stats...') pc_df = await pd_pitchingcards_df(cardset['id']) pitching_stats = pitching_stats.merge(pc_df, how='left', on='player_id').set_index('key_bbref', drop=False) return pitching_stats async def create_position(season_pct: float, pitching_stats: pd.DataFrame, post_pitchers: bool, df_p: pd.DataFrame): pit_positions = [] def create_pit_position(df_data): if df_data["key_bbref"] in df_p.index: logger.debug(f'Running P stats for {df_data["p_name"]}') pit_positions.append({ "player_id": int(df_data['player_id']), "position": 'P', "innings": float(df_p.at[df_data["key_bbref"], 'Inn_def']), "range": cde.range_pitcher( rs_value=int(df_p.at[df_data["key_bbref"], 'bis_runs_total']), season_pct=season_pct ), "error": cde.get_any_error( pos_code='p', errors=int(df_p.at[df_data["key_bbref"], 'E_def']), chances=int(df_p.at[df_data["key_bbref"], 'chances']), season_pct=season_pct ) }) else: pit_positions.append({ "player_id": int(df_data['key_bbref']), "position": 'P', "innings": 1, "range": 5, "error": 51 }) print(f'Calculating pitcher fielding lines now...') pitching_stats.apply(create_pit_position, axis=1) print(f'Fielding is complete.\n\nPosting positions now...') if post_pitchers: resp = await db_put('cardpositions', payload={'positions': pit_positions}, timeout=30) print(f'Response: {resp}\n') async def calculate_pitcher_ratings(pitching_stats: pd.DataFrame, post_pitchers: bool): pitching_ratings = [] def create_pitching_card_ratings(df_data): logger.info(f'Calculating pitching card ratings for {df_data.name}') pitching_ratings.extend(cpi.get_pitcher_ratings(df_data)) print(f'Calculating card ratings...') pitching_stats.apply(create_pitching_card_ratings, axis=1) print(f'Ratings are complete\n\nPosting ratings now...') if post_pitchers: resp = await db_put('pitchingcardratings', payload={'ratings': pitching_ratings}, timeout=30) print(f'Response: {resp}\n\nPulling all positions to set player positions...') async def post_player_updates( cardset: dict, player_description: str, card_base_url: str, release_dir: str, is_liveseries: bool, post_players: bool): def new_rarity_id(df_data): if df_data['starter_rating'] > 3: if df_data['total_OPS'] <= 0.4: return 99 elif df_data['total_OPS'] <= 0.475: return 1 elif df_data['total_OPS'] <= 0.53: return 2 elif df_data['total_OPS'] <= 0.6: return 3 elif df_data['total_OPS'] <= 0.675: return 4 else: return 5 else: if df_data['total_OPS'] <= 0.325: return 99 elif df_data['total_OPS'] <= 0.4: return 1 elif df_data['total_OPS'] <= 0.475: return 2 elif df_data['total_OPS'] <= 0.55: return 3 elif df_data['total_OPS'] <= 0.625: return 4 else: return 5 p_data = await pd_players_df(cardset['id']) p_data.set_index('player_id', drop=False) total_ratings = pd.merge( await pd_pitchingcards_df(cardset['id']), await pd_pitchingcardratings_df(cardset['id']), on='pitchingcard_id' ) total_ratings['new_rarity_id'] = total_ratings.apply(new_rarity_id, axis=1) player_data = pd.merge( p_data, total_ratings, on='player_id' ).set_index('player_id', drop=False) del total_ratings # p_query = await db_get('mlbplayers') # mlb_players = pd.DataFrame(p_query['players']) def get_pids(df_data): # if df_data['key_mlbam'] in return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref') ids_and_names = player_data.apply(get_pids, axis=1) player_data = (ids_and_names .merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id') .query('key_mlbam == key_mlbam') .set_index('key_bbref', drop=False)) player_updates = {} # { : [ (param pairs) ] } sp_rarity_group = player_data.query('rarity == new_rarity_id and starter_rating >= 4').groupby('rarity') sp_average_ops = sp_rarity_group['total_OPS'].mean().to_dict() rp_rarity_group = player_data.query('rarity == new_rarity_id and starter_rating < 4').groupby('rarity') rp_average_ops = rp_rarity_group['total_OPS'].mean().to_dict() # cost_groups = rarity_group['cost'].mean() if 99 not in sp_average_ops: sp_average_ops[99] = 0.388 if 1 not in sp_average_ops: sp_average_ops[1] = 0.445 if 2 not in sp_average_ops: sp_average_ops[2] = 0.504 if 3 not in sp_average_ops: sp_average_ops[3] = 0.568 if 4 not in sp_average_ops: sp_average_ops[4] = 0.634 if 5 not in sp_average_ops: sp_average_ops[5] = 0.737 if 99 not in rp_average_ops: rp_average_ops[99] = 0.282 if 1 not in rp_average_ops: rp_average_ops[1] = 0.375 if 2 not in rp_average_ops: rp_average_ops[2] = 0.442 if 3 not in rp_average_ops: rp_average_ops[3] = 0.516 if 4 not in rp_average_ops: rp_average_ops[4] = 0.591 if 5 not in rp_average_ops: rp_average_ops[5] = 0.702 def get_player_updates(df_data): base_costs = { 1: 810, 2: 270, 3: 90, 4: 30, 5: 10, 99: 2400 } def avg_ops(rarity_id, starter_rating): if starter_rating >= 4: return sp_average_ops[rarity_id] else: return rp_average_ops[rarity_id] params = [] if df_data['description'] != player_description and 'potm' not in df_data['description'].lower(): params = [('description', f'{player_description}')] if is_liveseries: team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam']))) if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None: params.extend([('mlbclub', team_data['mlbclub'])]) if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None: params.extend([('franchise', team_data['franchise'])]) # if release_directory not in df_data['image']: params.extend([('image', f'{card_base_url}/{df_data["player_id"]}/pitchingcard' f'{urllib.parse.quote("?d=")}{release_dir}')]) if df_data['cost'] == 99999: params.extend([ ('cost', round(base_costs[df_data['new_rarity_id']] * df_data['total_OPS'] / avg_ops(df_data['new_rarity_id'], df_data['starter_rating']))), ('rarity_id', df_data['new_rarity_id']) ]) elif df_data['rarity'] != df_data['new_rarity_id']: old_rarity = df_data['rarity'] new_rarity = df_data['new_rarity_id'] old_cost = df_data['cost'] new_cost = 0 if old_rarity == 1: if new_rarity == 2: new_cost = max(old_cost - 540, 100) elif new_rarity == 3: new_cost = max(old_cost - 720, 50) elif new_rarity == 4: new_cost = max(old_cost - 780, 15) elif new_rarity == 5: new_cost = max(old_cost - 800, 5) elif new_rarity == 99: new_cost = old_cost + 1600 elif old_rarity == 2: if new_rarity == 1: new_cost = old_cost + 540 elif new_rarity == 3: new_cost = max(old_cost - 180, 50) elif new_rarity == 4: new_cost = max(old_cost - 240, 15) elif new_rarity == 5: new_cost = max(old_cost - 260, 5) elif new_rarity == 99: new_cost = old_cost + 2140 elif old_rarity == 3: if new_rarity == 1: new_cost = old_cost + 720 elif new_rarity == 2: new_cost = old_cost + 180 elif new_rarity == 4: new_cost = max(old_cost - 60, 15) elif new_rarity == 5: new_cost = max(old_cost - 80, 5) elif new_rarity == 99: new_cost = old_cost + 2320 elif old_rarity == 4: if new_rarity == 1: new_cost = old_cost + 780 elif new_rarity == 2: new_cost = old_cost + 240 elif new_rarity == 3: new_cost = old_cost + 60 elif new_rarity == 5: new_cost = max(old_cost - 20, 5) elif new_rarity == 99: new_cost = old_cost + 2380 elif old_rarity == 5: if new_rarity == 1: new_cost = old_cost + 800 elif new_rarity == 2: new_cost = old_cost + 260 elif new_rarity == 3: new_cost = old_cost + 80 elif new_rarity == 4: new_cost = old_cost + 20 elif new_rarity == 99: new_cost = old_cost + 2400 elif old_rarity == 99: if new_rarity == 1: new_cost = max(old_cost - 1600, 800) elif new_rarity == 2: new_cost = max(old_cost - 2140, 100) elif new_rarity == 3: new_cost = max(old_cost - 2320, 50) elif new_rarity == 4: new_cost = max(old_cost - 2380, 15) elif new_rarity == 5: new_cost = max(old_cost - 2400, 5) if new_cost != 0: params.extend([('cost', new_cost), ('rarity_id', new_rarity)]) if len(params) > 0: if df_data.player_id not in player_updates.keys(): player_updates[df_data.player_id] = params else: player_updates[df_data.player_id].extend(params) player_data.apply(get_player_updates, axis=1) print(f'Sending {len(player_updates)} player updates to PD database...') if post_players: for x in player_updates: await db_patch('players', object_id=x, params=player_updates[x]) return len(player_updates) async def run_pitchers( cardset: dict, input_path: str, card_base_url: str, season: int, release_directory: str, player_description: str, season_pct: float, post_players: bool, post_pitchers: bool, is_liveseries: bool, ignore_limits: bool, pull_fielding: bool = True, is_custom: bool = False): print(f'Pulling PD player IDs...') pd_players = await pd_players_df(cardset['id']) all_stats = get_pitching_stats(file_path=input_path, ignore_limits=ignore_limits) print(f'Processed {len(all_stats.values)} pitchers\n') print(f'Pulling pitcher defense...') if pull_fielding: df_p = cde.get_bbref_fielding_df('p', season) else: df_p = pd.DataFrame() pit_step1 = match_player_lines(all_stats, pd_players, df_p, is_custom) if post_players: new_pitchers = await create_new_players( pit_step1, cardset, card_base_url, release_directory, player_description ) else: new_pitchers = 0 pitching_stats = get_stat_df(input_path, pit_step1) del all_stats, pit_step1 pitching_stats = await calculate_pitching_cards(pitching_stats, cardset, season_pct, post_pitchers) await create_position(season_pct, pitching_stats, post_pitchers, df_p) await calculate_pitcher_ratings(pitching_stats, post_pitchers) await post_player_updates( cardset, player_description, card_base_url, release_directory, is_liveseries, post_players) return { 'tot_pitchers': len(pitching_stats.index), 'new_pitchers': new_pitchers, 'pitching_stats': pitching_stats }