Add player update functionality

Save new players and deltas to csv
This commit is contained in:
Cal Corum 2024-11-10 14:42:00 -06:00
parent d7922a138c
commit 9844fa4742

View File

@ -1,6 +1,7 @@
import asyncio
import datetime
import logging
from logging.handlers import RotatingFileHandler
import math
import sys
@ -14,19 +15,19 @@ import urllib
from creation_helpers import get_args, CLUB_LIST, FRANCHISE_LIST, sanitize_name
from batters.stat_prep import DataMismatchError
from db_calls import DB_URL, db_get, db_patch, db_post, db_put
from exceptions import log_exception
from exceptions import log_exception, logger
import batters.calcs_batter as cba
import defenders.calcs_defense as cde
import pitchers.calcs_pitcher as cpi
cache.enable()
date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}'
log_level = logging.INFO
logging.basicConfig(
filename=f'logs/{date}.log',
format='%(asctime)s - retrosheet_data - %(levelname)s - %(message)s',
level=log_level
)
# date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}'
# log_level = logger.INFO
# logger.basicConfig(
# filename=f'logs/{date}.log',
# format='%(asctime)s - retrosheet_data - %(levelname)s - %(message)s',
# level=log_level
# )
RETRO_FILE_PATH = 'data-input/retrosheet/'
@ -46,14 +47,20 @@ CARDSET_ID = 20 # 20: 1998 Live, 21: 1998 Promos
PLAYER_DESCRIPTION = 'Live' # Live for Live Series
# PLAYER_DESCRIPTION = 'April PotM' # <Month> PotM for promos
PROMO_INCLUSION_RETRO_IDS = [
# 'johnj006',
# 'rodri001'
# 'justd001',
# 'rodri001',
# 'martp001',
# 'yan-e001',
# 'jonec004',
# 'belld001',
# 'schic002',
# 'johnj006'
]
# Per-Update Parameters
SEASON_PCT = 26 / 162
SEASON_PCT = 32 / 162
START_DATE = 19980331 # YYYYMMDD format
END_DATE = 19980430 # YYYYMMDD format
END_DATE = 19980507 # YYYYMMDD format
POST_DATA = True
LAST_WEEK_RATIO = 0.5 if PLAYER_DESCRIPTION == 'Live' else 0.0
LAST_TWOWEEKS_RATIO = 0.0
@ -173,7 +180,7 @@ def get_player_ids(plays: pd.DataFrame, which: Literal['batters', 'pitchers']) -
if PLAYER_DESCRIPTION != 'Live':
msg = f'Player description is *{PLAYER_DESCRIPTION}* so dropping players not in PROMO_INCLUSION_RETRO_IDS'
print(msg)
logging.info(msg)
logger.info(msg)
# players = players.drop(players[players.index not in PROMO_INCLUSION_RETRO_IDS].index)
players = players[players[id_key].isin(PROMO_INCLUSION_RETRO_IDS)]
@ -322,7 +329,11 @@ def get_base_pitching_df(file_path: str, start_date: int, end_date: int) -> list
abr_series = date_plays[(date_plays.ab == 't') & (date_plays.batter_hand == 'r')].groupby('pitcher_id').count()['event_type'].astype(int).rename('AB_vR')
ps = pd.concat([ps, abr_series], axis=1)
core_df = ps.dropna().query(f'TBF_vL >= {MIN_TBF_VL} & TBF_vR >= {MIN_TBF_VR}')
if PLAYER_DESCRIPTION == 'Live':
core_df = ps.dropna().query(f'TBF_vL >= {MIN_TBF_VL} & TBF_vR >= {MIN_TBF_VR}')
else:
core_df = ps.dropna()
if LAST_WEEK_RATIO == 0.0 and LAST_TWOWEEKS_RATIO == 0.0 and LAST_MONTH_RATIO == 0.0:
return [date_plays, core_df]
@ -722,6 +733,8 @@ def calc_pitching_cards(ps: pd.DataFrame, season_pct: float) -> pd.DataFrame:
def calc_batter_ratings(bs: pd.DataFrame) -> pd.DataFrame:
def create_batting_rating(row):
if row['key_bbref'] == 'galaran01':
pass
ratings = cba.get_batter_ratings(row)
ops_vl = ratings[0]['obp'] + ratings[0]['slg']
ops_vr = ratings[1]['obp'] + ratings[1]['slg']
@ -866,7 +879,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
no_data = True
for pos_df, position in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]:
if row['key_bbref'] in pos_df.index:
logging.info(f'Running {position} stats for {row["use_name"]} {row["last_name"]}')
logger.info(f'Running {position} stats for {row["use_name"]} {row["last_name"]}')
try:
if 'tz_runs_total' in row:
average_range = (int(pos_df.at[row["key_bbref"], 'tz_runs_total']) +
@ -898,7 +911,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
})
no_data = False
except Exception as e:
logging.info(f'Infield position failed: {e}')
logger.info(f'Infield position failed: {e}')
of_arms = []
of_payloads = []
@ -930,7 +943,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
of_arms.append(int(pos_df.at[row["key_bbref"], of_run_rating]))
no_data = False
except Exception as e:
logging.info(f'Outfield position failed: {e}')
logger.info(f'Outfield position failed: {e}')
if row["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0:
try:
@ -947,7 +960,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
all_pos.append(f)
no_data = False
except Exception as e:
logging.info(f'Outfield position failed: {e}')
logger.info(f'Outfield position failed: {e}')
if row["key_bbref"] in df_c.index:
try:
@ -991,7 +1004,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
})
no_data = False
except Exception as e:
logging.info(f'Catcher position failed: {e}')
logger.info(f'Catcher position failed: {e}')
if no_data:
all_pos.append({
@ -1049,6 +1062,8 @@ def calc_pitcher_defense(ps: pd.DataFrame) -> pd.DataFrame:
async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.DataFrame = None, def_rat_df: pd.DataFrame = None, pstat_df: pd.DataFrame = None, pit_rat_df: pd.DataFrame = None) -> pd.DataFrame:
all_players = []
player_deltas = [['player_id', 'player_name', 'old-cost', 'new-cost', 'old-rarity', 'new-rarity']]
new_players = [['player_id', 'player_name', 'cost', 'rarity', 'pos1']]
async def player_search(bbref_id: str):
p_query = await db_get('players', params=[('bbref_id', bbref_id), ('cardset_id', CARDSET_ID)])
@ -1087,7 +1102,7 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
'rarity_id': int(ratings_df.loc[row['key_bbref']]['rarity_id']),
'description': PLAYER_DESCRIPTION,
'bbref_id': row['key_bbref'],
'fangr_id': row['key_fangraphs'],
'fangr_id': int(float(row['key_fangraphs'])),
'mlbplayer_id': mlb_player['id']
}
@ -1100,10 +1115,10 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
all_pos[count] = pos_row.position
count += 1
except KeyError:
logging.info(f'No positions found for {row['use_name']} {row['last_name']}')
logger.info(f'No positions found for {row['use_name']} {row['last_name']}')
all_pos[0] = 'DH'
except TypeError:
logging.info(f'Only one position found for {row['use_name']} {row['last_name']}')
logger.info(f'Only one position found for {row['use_name']} {row['last_name']}')
all_pos[0] = def_rat_df.loc[row['key_bbref']].position
return all_pos
@ -1116,7 +1131,18 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
p_search = await player_search(row['key_bbref'])
if p_search is not None:
all_players.append(p_search)
if 'id' in p_search:
player_id = p_search['id']
else:
player_id = p_search['player_id']
new_player = await db_patch('players', object_id=player_id, params=[
('cost', f'{bat_rat_df.loc[row['key_bbref']]["cost"]}'), ('rarity_id', int(bat_rat_df.loc[row['key_bbref']]['rarity_id'])), ('image', f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}')
])
all_players.append(new_player)
player_deltas.append([
new_player['player_id'], new_player['p_name'], p_search['cost'], new_player['cost'], p_search['rarity']['name'], new_player['rarity']['name']
])
else:
mlb_player = await mlb_search_or_post(row['key_retro'])
@ -1124,7 +1150,7 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
all_pos = get_player_record_pos(def_rat_df, row)
for x in enumerate(all_pos):
new_player[f'pos_{x[0] + 1}'] = x[1]
player_payload[f'pos_{x[0] + 1}'] = x[1]
new_player = await db_post('players', payload=player_payload)
@ -1140,11 +1166,12 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
# all_bbref_ids.append(row['key_bbref'])
# all_player_ids.append(player_id)
all_players.append(new_player)
new_players.append([new_player['player_id'], new_player['p_name'], new_player['cost'], new_player['rarity']['name'], new_player['pos_1']])
dev_count += 1
elif pstat_df is not None and pit_rat_df is not None and def_rat_df is not None:
starter_index = def_rat_df.columns.get_loc('starter_rating')
closer_index = def_rat_df.columns.get_loc('closer_rating')
starter_index = pstat_df.columns.get_loc('starter_rating')
closer_index = pstat_df.columns.get_loc('closer_rating')
for index, row in pstat_df.iterrows():
if dev_count < 0:
@ -1152,7 +1179,18 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
p_search = await player_search(row['key_bbref'])
if p_search is not None:
all_players.append(p_search)
if 'id' in p_search:
player_id = p_search['id']
else:
player_id = p_search['player_id']
new_player = await db_patch('players', object_id=player_id, params=[
('cost', f'{pit_rat_df.loc[row['key_bbref']]["cost"]}'), ('rarity_id', int(pit_rat_df.loc[row['key_bbref']]['rarity_id'])), ('image', f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}')
])
all_players.append(new_player)
player_deltas.append([
new_player['player_id'], new_player['p_name'], p_search['cost'], new_player['cost'], p_search['rarity']['name'], new_player['rarity']['name']
])
else:
mlb_player = await mlb_search_or_post(row['key_retro'])
@ -1181,11 +1219,15 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
del new_player['paperdex']
all_players.append(new_player)
new_players.append([new_player['player_id'], new_player['p_name'], new_player['cost'], new_player['rarity']['name'], new_player['pos_1']])
dev_count += 1
else:
raise KeyError(f'Could not get players - not enough stat DFs were supplied')
pd.DataFrame(player_deltas[1:], columns=player_deltas[0]).to_csv(f'{"batter" if bstat_df is not None else "pitcher"}-deltas.csv')
pd.DataFrame(new_players[1:], columns=new_players[0]).to_csv(f'new-{"batter" if bstat_df is not None else "pitcher"}s.csv')
players_df = pd.DataFrame(all_players).set_index('bbref_id')
return players_df
@ -1228,7 +1270,7 @@ async def post_pitching_cards(cards_df: pd.DataFrame):
all_cards = []
def get_closer_rating(raw_rating):
try:
if raw_rating.isnull():
if pd.isnull(raw_rating):
return None
else:
return raw_rating
@ -1461,11 +1503,11 @@ async def run_batters(data_input_path: str, start_date: int, end_date: int, post
print(f'Post player data: {(post_end - post_start).total_seconds()}s')
post_msg = f'Posted {num_players} players to the database'
logging.info(post_msg)
logger.info(post_msg)
print(post_msg)
else:
post_msg = f'{batting_stats.index.size} total batters\n\nPlayers are NOT being posted to the database'
logging.warning(post_msg)
logger.warning(post_msg)
print(post_msg)
return batting_stats
@ -1534,11 +1576,11 @@ async def run_pitchers(data_input_path: str, start_date: int, end_date: int, pos
print(f'Post player data: {(post_end - post_start).total_seconds()}s')
post_msg = f'\nPosted {num_players} pitchers to the database'
logging.info(post_msg)
logger.info(post_msg)
print(post_msg)
else:
post_msg = f'{pitching_stats.index.size} total pitchers\n\nPlayers are NOT being posted to the database'
logging.warning(post_msg)
logger.warning(post_msg)
print(post_msg)
return pitching_stats