import logging import datetime import urllib.parse import pandas as pd from creation_helpers import get_all_pybaseball_ids, sanitize_name, CLUB_LIST, FRANCHISE_LIST, pd_players_df, \ mlbteam_and_franchise, get_hand from db_calls import db_post, db_get, db_put, db_patch from . import calcs_batter as cba from defenders import calcs_defense as cde async def pd_battingcards_df(cardset_id: int): bc_query = await db_get('battingcards', params=[('cardset_id', cardset_id), ('short_output', True)]) if bc_query['count'] == 0: raise ValueError(f'No batting cards returned from Paper Dynasty API') return pd.DataFrame(bc_query['cards']).rename(columns={'id': 'battingcard_id', 'player': 'player_id'}) async def pd_battingcardratings_df(cardset_id: int): vl_query = await db_get( 'battingcardratings', params=[ ('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True), ('team_id', 31), ('ts', 's37136685556r6135248705')]) vr_query = await db_get( 'battingcardratings', params=[ ('cardset_id', cardset_id), ('vs_hand', 'R'), ('short_output', True), ('team_id', 31), ('ts', 's37136685556r6135248705')]) if 0 in [vl_query['count'], vr_query['count']]: raise ValueError(f'No batting card ratings returned from Paper Dynasty API') vl = pd.DataFrame(vl_query['ratings']) vr = pd.DataFrame(vr_query['ratings']) ratings = (pd.merge(vl, vr, on='battingcard', suffixes=('_vL', '_vR')) .rename(columns={'battingcard': 'battingcard_id'})) def get_total_ops(df_data): ops_vl = df_data['obp_vL'] + df_data['slg_vL'] ops_vr = df_data['obp_vR'] + df_data['slg_vR'] return (ops_vr + ops_vl + min(ops_vl, ops_vr)) / 3 ratings['total_OPS'] = ratings.apply(get_total_ops, axis=1) def new_rarity_id(df_data): if df_data['total_OPS'] >= 1.2: return 99 elif df_data['total_OPS'] >= 1: return 1 elif df_data['total_OPS'] >= .9: return 2 elif df_data['total_OPS'] >= .8: return 3 elif df_data['total_OPS'] >= .7: return 4 else: return 5 ratings['new_rarity_id'] = ratings.apply(new_rarity_id, axis=1) return ratings # return pd.DataFrame(bcr_query['ratings']).rename(columns={'battingcard': 'battingcard_id'}) def get_batting_stats( file_path: str = None, start_date: datetime.datetime = None, end_date: datetime.datetime = None, ignore_limits: bool = False): min_vl = 20 if not ignore_limits else 1 min_vr = 40 if not ignore_limits else 1 if file_path is not None: vl_basic = pd.read_csv(f'{file_path}vlhp-basic.csv').query(f'PA >= {min_vl}') vr_basic = pd.read_csv(f'{file_path}vrhp-basic.csv').query(f'PA >= {min_vr}') total_basic = pd.merge(vl_basic, vr_basic, on="playerId", suffixes=('_vL', '_vR')) vl_rate = pd.read_csv(f'{file_path}vlhp-rate.csv').query(f'PA >= {min_vl}') vr_rate = pd.read_csv(f'{file_path}vrhp-rate.csv').query(f'PA >= {min_vr}') total_rate = pd.merge(vl_rate, vr_rate, on="playerId", suffixes=('_vL', '_vR')) return pd.merge(total_basic, total_rate, on="playerId", suffixes=('', '_rate')) else: raise LookupError(f'Date-based stat pulls not implemented, yet. Please provide batting csv files.') def match_player_lines(all_batting: pd.DataFrame, all_players: pd.DataFrame, is_custom: bool = False): def get_pids(df_data): return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs', is_custom, df_data['Name_vL']) print(f'Now pulling mlbam player IDs...') ids_and_names = all_batting.apply(get_pids, axis=1) player_data = (ids_and_names .merge(all_players, how='left', left_on='key_bbref', right_on='bbref_id') .query('key_mlbam == key_mlbam') .set_index('key_bbref', drop=False)) print(f'Matched mlbam to pd players.') final_batting = pd.merge( player_data, all_batting, left_on='key_fangraphs', right_on='playerId', sort=False ).set_index('key_bbref', drop=False) return final_batting async def create_new_players( final_batting: pd.DataFrame, cardset: dict, card_base_url: str, release_dir: str, player_desc: str): new_players = [] new_mlbplayers = {} def create_batters(df_data): f_name = sanitize_name(df_data["name_first"]).title() l_name = sanitize_name(df_data["name_last"]).title() new_players.append({ 'p_name': f'{f_name} {l_name}', 'cost': 99999, 'image': f'{card_base_url}/{df_data["player_id"]}/battingcard' f'{urllib.parse.quote("?d=")}{release_dir}', 'mlbclub': CLUB_LIST[df_data['Tm_vL']], 'franchise': FRANCHISE_LIST[df_data['Tm_vL']], 'cardset_id': cardset['id'], 'set_num': int(float(df_data['key_fangraphs'])), 'rarity_id': 99, 'pos_1': 'DH', 'description': f'{player_desc}', 'bbref_id': df_data.name, 'fangr_id': int(float(df_data['key_fangraphs'])), 'strat_code': int(float(df_data['key_mlbam'])) }) new_mlbplayers[df_data.name] = { 'first_name': sanitize_name(df_data["name_first"]).title(), 'last_name': sanitize_name(df_data["name_last"]).title(), 'key_mlbam': int(float(df_data['key_mlbam'])), 'key_fangraphs': int(float(df_data['key_fangraphs'])), 'key_bbref': df_data['key_bbref'], 'key_retro': df_data['key_retro'] } final_batting[final_batting['player_id'].isnull()].apply(create_batters, axis=1) print(f'Creating {len(new_players)} new players...') for x in new_players: mlb_query = await db_get('mlbplayers', params=[('key_bbref', x['bbref_id'])]) if mlb_query['count'] > 0: x['mlbplayer_id'] = mlb_query['players'][0]['id'] else: new_mlb = await db_post('mlbplayers/one', payload=new_mlbplayers[x['bbref_id']]) x['mlbplayer_id'] = new_mlb['id'] this_player = await db_post('players', payload=x) final_batting.at[x['bbref_id'], 'player_id'] = this_player['player_id'] final_batting.at[x['bbref_id'], 'p_name'] = this_player['p_name'] print(f'Player IDs linked to batting stats.\n{len(final_batting.values)} players remain\n') return len(new_players) def get_run_stat_df(final_batting: pd.DataFrame, input_path: str): print(f'Reading baserunning stats...') run_data = (pd.read_csv(f'{input_path}running.csv') .set_index('Name-additional')) run_data['bat_hand'] = run_data.apply(get_hand, axis=1) offense_stats = final_batting.join(run_data) print(f'Stats are tallied\n{len(offense_stats.values)} players remain\n\nCollecting defensive data from bbref...') return offense_stats async def calculate_batting_cards(offense_stats: pd.DataFrame, cardset: dict, season_pct: float, post_batters: bool): batting_cards = [] def create_batting_card(df_data): logging.info(df_data['player_id']) try: s_data = cba.stealing( chances=int(df_data['SBO']), sb2s=int(df_data['SB2']), cs2s=int(df_data['CS2']), sb3s=int(df_data['SB3']), cs3s=int(df_data['CS3']), season_pct=season_pct ) except ValueError as e: print(f'Stealing error for *{df_data.name}*: {e}') logging.error(e) s_data = [0, 0, 0, 0] batting_cards.append({ "player_id": df_data['player_id'], "key_bbref": df_data.name, "key_fangraphs": int(float(df_data['key_fangraphs'])), "key_mlbam": df_data['key_mlbam'], "key_retro": df_data['key_retro'], "name_first": df_data["name_first"].title(), "name_last": df_data["name_last"].title(), "steal_low": s_data[0], "steal_high": s_data[1], "steal_auto": s_data[2], "steal_jump": s_data[3], "hit_and_run": cba.hit_and_run( df_data['AB_vL'], df_data['AB_vR'], df_data['H_vL'], df_data['H_vR'], df_data['HR_vL'], df_data['HR_vR'], df_data['SO_vL'], df_data['SO_vR'] ), "running": cba.running(df_data['XBT%']), "hand": df_data['bat_hand'] }) print(f'Calculating batting cards...') offense_stats.apply(create_batting_card, axis=1) print(f'Cards are complete.\n\nPosting cards now...') if post_batters: resp = await db_put('battingcards', payload={'cards': batting_cards}, timeout=30) print(f'Response: {resp}\n\nMatching batting card database IDs to player stats...') offense_stats = pd.merge( offense_stats, await pd_battingcards_df(cardset['id']), on='player_id').set_index('key_bbref', drop=False) return offense_stats async def calculate_batting_ratings(offense_stats: pd.DataFrame, to_post: bool): batting_ratings = [] def create_batting_card_ratings(df_data): logging.debug(f'Calculating card ratings for {df_data.name}') batting_ratings.extend(cba.get_batter_ratings(df_data)) print(f'Calculating card ratings...') offense_stats.apply(create_batting_card_ratings, axis=1) print(f'Ratings are complete\n\nPosting ratings now...') if to_post: resp = await db_put('battingcardratings', payload={'ratings': batting_ratings}, timeout=30) print(f'Response: {resp}\n\nPulling fresh PD player data...') return len(batting_ratings) async def post_player_updates( cardset: dict, card_base_url: str, release_dir: str, player_desc: str, is_liveseries: bool, to_post: bool, is_custom: bool): """ Pull fresh pd_players and set_index to player_id Pull fresh battingcards and set_index to player Pull fresh battingcardratings one hand at a time and join on battingcard (suffixes _vl and vR) Join battingcards (left) with battingcardratings (right) as total_ratings on id (left) and battingcard (right) Join pd_players (left) with total_ratings (right) on indeces Output: PD player list with batting card, ratings vL, and ratings vR Calculate Total OPS as OPSvL + OPSvR + min(OPSvL, OPSvR) / 3 and assign rarity_id For players with cost of 99999, set cost to * Total OPS / """ p_data = await pd_players_df(cardset['id']) p_data.set_index('player_id', drop=False) total_ratings = pd.merge( await pd_battingcards_df(cardset['id']), await pd_battingcardratings_df(cardset['id']), on='battingcard_id' ) player_data = pd.merge( p_data, total_ratings, on='player_id' ).set_index('player_id', drop=False) del total_ratings def get_pids(df_data): if is_custom: return get_all_pybaseball_ids([df_data["fangr_id"]], 'fangraphs', is_custom) else: return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref') ids_and_names = player_data.apply(get_pids, axis=1) player_data = (ids_and_names .merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id') .query('key_mlbam == key_mlbam') .set_index('key_bbref', drop=False)) player_updates = {} # { : [ (param pairs) ] } rarity_group = player_data.query('rarity == new_rarity_id').groupby('rarity') average_ops = rarity_group['total_OPS'].mean().to_dict() if 1 not in average_ops: average_ops[1] = 1.066 if 2 not in average_ops: average_ops[2] = 0.938 if 3 not in average_ops: average_ops[3] = 0.844 if 4 not in average_ops: average_ops[4] = 0.752 if 5 not in average_ops: average_ops[5] = 0.612 def get_player_updates(df_data): base_costs = { 1: 810, 2: 270, 3: 90, 4: 30, 5: 10, 99: 2400 } params = [] if df_data['description'] != player_desc and 'potm' not in df_data['description'].lower(): params = [('description', f'{player_desc}')] if is_liveseries: team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam']))) if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None: params.extend([('mlbclub', team_data['mlbclub'])]) if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None: params.extend([('franchise', team_data['franchise'])]) # if release_directory not in df_data['image']: params.extend([('image', f'{card_base_url}/{df_data["player_id"]}/battingcard' f'{urllib.parse.quote("?d=")}{release_dir}')]) if df_data['cost'] == 99999: params.extend([ ('cost', round(base_costs[df_data['new_rarity_id']] * df_data['total_OPS'] / average_ops[df_data['new_rarity_id']])), ('rarity_id', df_data['new_rarity_id']) ]) elif df_data['rarity'] != df_data['new_rarity_id']: old_rarity = df_data['rarity'] new_rarity = df_data['new_rarity_id'] old_cost = df_data['cost'] new_cost = 0 if old_rarity == 1: if new_rarity == 2: new_cost = max(old_cost - 540, 100) elif new_rarity == 3: new_cost = max(old_cost - 720, 50) elif new_rarity == 4: new_cost = max(old_cost - 780, 15) elif new_rarity == 5: new_cost = max(old_cost - 800, 5) elif new_rarity == 99: new_cost = old_cost + 1600 elif old_rarity == 2: if new_rarity == 1: new_cost = old_cost + 540 elif new_rarity == 3: new_cost = max(old_cost - 180, 50) elif new_rarity == 4: new_cost = max(old_cost - 240, 15) elif new_rarity == 5: new_cost = max(old_cost - 260, 5) elif new_rarity == 99: new_cost = old_cost + 2140 elif old_rarity == 3: if new_rarity == 1: new_cost = old_cost + 720 elif new_rarity == 2: new_cost = old_cost + 180 elif new_rarity == 4: new_cost = max(old_cost - 60, 15) elif new_rarity == 5: new_cost = max(old_cost - 80, 5) elif new_rarity == 99: new_cost = old_cost + 2320 elif old_rarity == 4: if new_rarity == 1: new_cost = old_cost + 780 elif new_rarity == 2: new_cost = old_cost + 240 elif new_rarity == 3: new_cost = old_cost + 60 elif new_rarity == 5: new_cost = max(old_cost - 20, 5) elif new_rarity == 99: new_cost = old_cost + 2380 elif old_rarity == 5: if new_rarity == 1: new_cost = old_cost + 800 elif new_rarity == 2: new_cost = old_cost + 260 elif new_rarity == 3: new_cost = old_cost + 80 elif new_rarity == 4: new_cost = old_cost + 20 elif new_rarity == 99: new_cost = old_cost + 2400 elif old_rarity == 99: if new_rarity == 1: new_cost = max(old_cost - 1600, 800) elif new_rarity == 2: new_cost = max(old_cost - 2140, 100) elif new_rarity == 3: new_cost = max(old_cost - 2320, 50) elif new_rarity == 4: new_cost = max(old_cost - 2380, 15) elif new_rarity == 5: new_cost = max(old_cost - 2400, 5) if new_cost != 0: params.extend([('cost', new_cost), ('rarity_id', new_rarity)]) if len(params) > 0: if df_data.player_id not in player_updates.keys(): player_updates[df_data.player_id] = params else: player_updates[df_data.player_id].extend(params) player_data.apply(get_player_updates, axis=1) print(f'Sending {len(player_updates)} player updates to PD database...') if to_post: for x in player_updates: await db_patch('players', object_id=x, params=player_updates[x]) return len(player_updates) async def run_batter_fielding(season: int, offense_stats: pd.DataFrame, season_pct: float, post_batters: bool): print(f'Pulling catcher defense...') df_c = cde.get_bbref_fielding_df('c', season) print(f'Pulling first base defense...') df_1b = cde.get_bbref_fielding_df('1b', season) print(f'Pulling second base defense...') df_2b = cde.get_bbref_fielding_df('2b', season) print(f'Pulling third base defense...') df_3b = cde.get_bbref_fielding_df('3b', season) print(f'Pulling short stop defense...') df_ss = cde.get_bbref_fielding_df('ss', season) print(f'Pulling left field defense...') df_lf = cde.get_bbref_fielding_df('lf', season) print(f'Pulling center field defense...') df_cf = cde.get_bbref_fielding_df('cf', season) print(f'Pulling right field defense...') df_rf = cde.get_bbref_fielding_df('rf', season) print(f'Pulling outfield defense...') df_of = cde.get_bbref_fielding_df('of', season) print(f'Positions data is retrieved') await cde.create_positions( offense_stats, season_pct, post_batters, df_c, df_1b, df_2b, df_3b, df_ss, df_lf, df_cf, df_rf, df_of ) async def run_batters( cardset: dict, input_path: str, post_players: bool, card_base_url: str, release_directory: str, player_description: str, season_pct: float, post_batters: bool, pull_fielding: bool, season: int, is_liveseries: bool, ignore_limits: bool, is_custom: bool = False): print(f'Pulling PD player IDs...') pd_players = await pd_players_df(cardset['id']) print('Reading batting stats...') all_stats = get_batting_stats(file_path=input_path, ignore_limits=ignore_limits) print(f'Processed {len(all_stats.values)} batters\n') bat_step1 = match_player_lines(all_stats, pd_players, is_custom) if post_players: new_batters = await create_new_players( bat_step1, cardset, card_base_url, release_directory, player_description ) else: new_batters = 0 # Custom Cardsets if cardset['id'] in [16]: offense_stats = pd.merge( bat_step1, await pd_battingcards_df(cardset['id']), on='player_id').set_index('key_bbref', drop=False) else: bat_step2 = get_run_stat_df(bat_step1, input_path) offense_stats = await calculate_batting_cards(bat_step2, cardset, season_pct, post_batters) del bat_step2 del bat_step1, all_stats await calculate_batting_ratings(offense_stats, post_batters) if pull_fielding: await run_batter_fielding(season, offense_stats, season_pct, post_batters) await post_player_updates( cardset, card_base_url, release_directory, player_description, is_liveseries, post_batters, is_custom ) return { 'tot_batters': len(offense_stats.index), 'new_batters': new_batters }