From 9844fa4742872bc227e584836e47e37b430f2e3c Mon Sep 17 00:00:00 2001 From: Cal Corum Date: Sun, 10 Nov 2024 14:42:00 -0600 Subject: [PATCH] Add player update functionality Save new players and deltas to csv --- retrosheet_data.py | 106 +++++++++++++++++++++++++++++++-------------- 1 file changed, 74 insertions(+), 32 deletions(-) diff --git a/retrosheet_data.py b/retrosheet_data.py index a81ca7b..0e8451d 100644 --- a/retrosheet_data.py +++ b/retrosheet_data.py @@ -1,6 +1,7 @@ import asyncio import datetime import logging +from logging.handlers import RotatingFileHandler import math import sys @@ -14,19 +15,19 @@ import urllib from creation_helpers import get_args, CLUB_LIST, FRANCHISE_LIST, sanitize_name from batters.stat_prep import DataMismatchError from db_calls import DB_URL, db_get, db_patch, db_post, db_put -from exceptions import log_exception +from exceptions import log_exception, logger import batters.calcs_batter as cba import defenders.calcs_defense as cde import pitchers.calcs_pitcher as cpi cache.enable() -date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}' -log_level = logging.INFO -logging.basicConfig( - filename=f'logs/{date}.log', - format='%(asctime)s - retrosheet_data - %(levelname)s - %(message)s', - level=log_level -) +# date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}' +# log_level = logger.INFO +# logger.basicConfig( +# filename=f'logs/{date}.log', +# format='%(asctime)s - retrosheet_data - %(levelname)s - %(message)s', +# level=log_level +# ) RETRO_FILE_PATH = 'data-input/retrosheet/' @@ -46,14 +47,20 @@ CARDSET_ID = 20 # 20: 1998 Live, 21: 1998 Promos PLAYER_DESCRIPTION = 'Live' # Live for Live Series # PLAYER_DESCRIPTION = 'April PotM' # PotM for promos PROMO_INCLUSION_RETRO_IDS = [ - # 'johnj006', - # 'rodri001' + # 'justd001', + # 'rodri001', + # 'martp001', + # 'yan-e001', + # 'jonec004', + # 'belld001', + # 'schic002', + # 'johnj006' ] # Per-Update Parameters -SEASON_PCT = 26 / 162 +SEASON_PCT = 32 / 162 START_DATE = 19980331 # YYYYMMDD format -END_DATE = 19980430 # YYYYMMDD format +END_DATE = 19980507 # YYYYMMDD format POST_DATA = True LAST_WEEK_RATIO = 0.5 if PLAYER_DESCRIPTION == 'Live' else 0.0 LAST_TWOWEEKS_RATIO = 0.0 @@ -173,7 +180,7 @@ def get_player_ids(plays: pd.DataFrame, which: Literal['batters', 'pitchers']) - if PLAYER_DESCRIPTION != 'Live': msg = f'Player description is *{PLAYER_DESCRIPTION}* so dropping players not in PROMO_INCLUSION_RETRO_IDS' print(msg) - logging.info(msg) + logger.info(msg) # players = players.drop(players[players.index not in PROMO_INCLUSION_RETRO_IDS].index) players = players[players[id_key].isin(PROMO_INCLUSION_RETRO_IDS)] @@ -322,7 +329,11 @@ def get_base_pitching_df(file_path: str, start_date: int, end_date: int) -> list abr_series = date_plays[(date_plays.ab == 't') & (date_plays.batter_hand == 'r')].groupby('pitcher_id').count()['event_type'].astype(int).rename('AB_vR') ps = pd.concat([ps, abr_series], axis=1) - core_df = ps.dropna().query(f'TBF_vL >= {MIN_TBF_VL} & TBF_vR >= {MIN_TBF_VR}') + if PLAYER_DESCRIPTION == 'Live': + core_df = ps.dropna().query(f'TBF_vL >= {MIN_TBF_VL} & TBF_vR >= {MIN_TBF_VR}') + else: + core_df = ps.dropna() + if LAST_WEEK_RATIO == 0.0 and LAST_TWOWEEKS_RATIO == 0.0 and LAST_MONTH_RATIO == 0.0: return [date_plays, core_df] @@ -722,6 +733,8 @@ def calc_pitching_cards(ps: pd.DataFrame, season_pct: float) -> pd.DataFrame: def calc_batter_ratings(bs: pd.DataFrame) -> pd.DataFrame: def create_batting_rating(row): + if row['key_bbref'] == 'galaran01': + pass ratings = cba.get_batter_ratings(row) ops_vl = ratings[0]['obp'] + ratings[0]['slg'] ops_vr = ratings[1]['obp'] + ratings[1]['slg'] @@ -866,7 +879,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame: no_data = True for pos_df, position in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]: if row['key_bbref'] in pos_df.index: - logging.info(f'Running {position} stats for {row["use_name"]} {row["last_name"]}') + logger.info(f'Running {position} stats for {row["use_name"]} {row["last_name"]}') try: if 'tz_runs_total' in row: average_range = (int(pos_df.at[row["key_bbref"], 'tz_runs_total']) + @@ -898,7 +911,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame: }) no_data = False except Exception as e: - logging.info(f'Infield position failed: {e}') + logger.info(f'Infield position failed: {e}') of_arms = [] of_payloads = [] @@ -930,7 +943,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame: of_arms.append(int(pos_df.at[row["key_bbref"], of_run_rating])) no_data = False except Exception as e: - logging.info(f'Outfield position failed: {e}') + logger.info(f'Outfield position failed: {e}') if row["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0: try: @@ -947,7 +960,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame: all_pos.append(f) no_data = False except Exception as e: - logging.info(f'Outfield position failed: {e}') + logger.info(f'Outfield position failed: {e}') if row["key_bbref"] in df_c.index: try: @@ -991,7 +1004,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame: }) no_data = False except Exception as e: - logging.info(f'Catcher position failed: {e}') + logger.info(f'Catcher position failed: {e}') if no_data: all_pos.append({ @@ -1049,6 +1062,8 @@ def calc_pitcher_defense(ps: pd.DataFrame) -> pd.DataFrame: async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.DataFrame = None, def_rat_df: pd.DataFrame = None, pstat_df: pd.DataFrame = None, pit_rat_df: pd.DataFrame = None) -> pd.DataFrame: all_players = [] + player_deltas = [['player_id', 'player_name', 'old-cost', 'new-cost', 'old-rarity', 'new-rarity']] + new_players = [['player_id', 'player_name', 'cost', 'rarity', 'pos1']] async def player_search(bbref_id: str): p_query = await db_get('players', params=[('bbref_id', bbref_id), ('cardset_id', CARDSET_ID)]) @@ -1087,7 +1102,7 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data 'rarity_id': int(ratings_df.loc[row['key_bbref']]['rarity_id']), 'description': PLAYER_DESCRIPTION, 'bbref_id': row['key_bbref'], - 'fangr_id': row['key_fangraphs'], + 'fangr_id': int(float(row['key_fangraphs'])), 'mlbplayer_id': mlb_player['id'] } @@ -1100,10 +1115,10 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data all_pos[count] = pos_row.position count += 1 except KeyError: - logging.info(f'No positions found for {row['use_name']} {row['last_name']}') + logger.info(f'No positions found for {row['use_name']} {row['last_name']}') all_pos[0] = 'DH' except TypeError: - logging.info(f'Only one position found for {row['use_name']} {row['last_name']}') + logger.info(f'Only one position found for {row['use_name']} {row['last_name']}') all_pos[0] = def_rat_df.loc[row['key_bbref']].position return all_pos @@ -1116,7 +1131,18 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data p_search = await player_search(row['key_bbref']) if p_search is not None: - all_players.append(p_search) + if 'id' in p_search: + player_id = p_search['id'] + else: + player_id = p_search['player_id'] + + new_player = await db_patch('players', object_id=player_id, params=[ + ('cost', f'{bat_rat_df.loc[row['key_bbref']]["cost"]}'), ('rarity_id', int(bat_rat_df.loc[row['key_bbref']]['rarity_id'])), ('image', f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}') + ]) + all_players.append(new_player) + player_deltas.append([ + new_player['player_id'], new_player['p_name'], p_search['cost'], new_player['cost'], p_search['rarity']['name'], new_player['rarity']['name'] + ]) else: mlb_player = await mlb_search_or_post(row['key_retro']) @@ -1124,7 +1150,7 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data all_pos = get_player_record_pos(def_rat_df, row) for x in enumerate(all_pos): - new_player[f'pos_{x[0] + 1}'] = x[1] + player_payload[f'pos_{x[0] + 1}'] = x[1] new_player = await db_post('players', payload=player_payload) @@ -1140,11 +1166,12 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data # all_bbref_ids.append(row['key_bbref']) # all_player_ids.append(player_id) all_players.append(new_player) + new_players.append([new_player['player_id'], new_player['p_name'], new_player['cost'], new_player['rarity']['name'], new_player['pos_1']]) dev_count += 1 elif pstat_df is not None and pit_rat_df is not None and def_rat_df is not None: - starter_index = def_rat_df.columns.get_loc('starter_rating') - closer_index = def_rat_df.columns.get_loc('closer_rating') + starter_index = pstat_df.columns.get_loc('starter_rating') + closer_index = pstat_df.columns.get_loc('closer_rating') for index, row in pstat_df.iterrows(): if dev_count < 0: @@ -1152,7 +1179,18 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data p_search = await player_search(row['key_bbref']) if p_search is not None: - all_players.append(p_search) + if 'id' in p_search: + player_id = p_search['id'] + else: + player_id = p_search['player_id'] + + new_player = await db_patch('players', object_id=player_id, params=[ + ('cost', f'{pit_rat_df.loc[row['key_bbref']]["cost"]}'), ('rarity_id', int(pit_rat_df.loc[row['key_bbref']]['rarity_id'])), ('image', f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}') + ]) + all_players.append(new_player) + player_deltas.append([ + new_player['player_id'], new_player['p_name'], p_search['cost'], new_player['cost'], p_search['rarity']['name'], new_player['rarity']['name'] + ]) else: mlb_player = await mlb_search_or_post(row['key_retro']) @@ -1181,11 +1219,15 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data del new_player['paperdex'] all_players.append(new_player) + new_players.append([new_player['player_id'], new_player['p_name'], new_player['cost'], new_player['rarity']['name'], new_player['pos_1']]) dev_count += 1 else: raise KeyError(f'Could not get players - not enough stat DFs were supplied') + pd.DataFrame(player_deltas[1:], columns=player_deltas[0]).to_csv(f'{"batter" if bstat_df is not None else "pitcher"}-deltas.csv') + pd.DataFrame(new_players[1:], columns=new_players[0]).to_csv(f'new-{"batter" if bstat_df is not None else "pitcher"}s.csv') + players_df = pd.DataFrame(all_players).set_index('bbref_id') return players_df @@ -1228,7 +1270,7 @@ async def post_pitching_cards(cards_df: pd.DataFrame): all_cards = [] def get_closer_rating(raw_rating): try: - if raw_rating.isnull(): + if pd.isnull(raw_rating): return None else: return raw_rating @@ -1461,11 +1503,11 @@ async def run_batters(data_input_path: str, start_date: int, end_date: int, post print(f'Post player data: {(post_end - post_start).total_seconds()}s') post_msg = f'Posted {num_players} players to the database' - logging.info(post_msg) + logger.info(post_msg) print(post_msg) else: post_msg = f'{batting_stats.index.size} total batters\n\nPlayers are NOT being posted to the database' - logging.warning(post_msg) + logger.warning(post_msg) print(post_msg) return batting_stats @@ -1534,11 +1576,11 @@ async def run_pitchers(data_input_path: str, start_date: int, end_date: int, pos print(f'Post player data: {(post_end - post_start).total_seconds()}s') post_msg = f'\nPosted {num_players} pitchers to the database' - logging.info(post_msg) + logger.info(post_msg) print(post_msg) else: post_msg = f'{pitching_stats.index.size} total pitchers\n\nPlayers are NOT being posted to the database' - logging.warning(post_msg) + logger.warning(post_msg) print(post_msg) return pitching_stats