Add player update functionality
Save new players and deltas to csv
This commit is contained in:
parent
d7922a138c
commit
9844fa4742
@ -1,6 +1,7 @@
|
||||
import asyncio
|
||||
import datetime
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
import math
|
||||
import sys
|
||||
|
||||
@ -14,19 +15,19 @@ import urllib
|
||||
from creation_helpers import get_args, CLUB_LIST, FRANCHISE_LIST, sanitize_name
|
||||
from batters.stat_prep import DataMismatchError
|
||||
from db_calls import DB_URL, db_get, db_patch, db_post, db_put
|
||||
from exceptions import log_exception
|
||||
from exceptions import log_exception, logger
|
||||
import batters.calcs_batter as cba
|
||||
import defenders.calcs_defense as cde
|
||||
import pitchers.calcs_pitcher as cpi
|
||||
|
||||
cache.enable()
|
||||
date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}'
|
||||
log_level = logging.INFO
|
||||
logging.basicConfig(
|
||||
filename=f'logs/{date}.log',
|
||||
format='%(asctime)s - retrosheet_data - %(levelname)s - %(message)s',
|
||||
level=log_level
|
||||
)
|
||||
# date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}'
|
||||
# log_level = logger.INFO
|
||||
# logger.basicConfig(
|
||||
# filename=f'logs/{date}.log',
|
||||
# format='%(asctime)s - retrosheet_data - %(levelname)s - %(message)s',
|
||||
# level=log_level
|
||||
# )
|
||||
|
||||
|
||||
RETRO_FILE_PATH = 'data-input/retrosheet/'
|
||||
@ -46,14 +47,20 @@ CARDSET_ID = 20 # 20: 1998 Live, 21: 1998 Promos
|
||||
PLAYER_DESCRIPTION = 'Live' # Live for Live Series
|
||||
# PLAYER_DESCRIPTION = 'April PotM' # <Month> PotM for promos
|
||||
PROMO_INCLUSION_RETRO_IDS = [
|
||||
# 'johnj006',
|
||||
# 'rodri001'
|
||||
# 'justd001',
|
||||
# 'rodri001',
|
||||
# 'martp001',
|
||||
# 'yan-e001',
|
||||
# 'jonec004',
|
||||
# 'belld001',
|
||||
# 'schic002',
|
||||
# 'johnj006'
|
||||
]
|
||||
|
||||
# Per-Update Parameters
|
||||
SEASON_PCT = 26 / 162
|
||||
SEASON_PCT = 32 / 162
|
||||
START_DATE = 19980331 # YYYYMMDD format
|
||||
END_DATE = 19980430 # YYYYMMDD format
|
||||
END_DATE = 19980507 # YYYYMMDD format
|
||||
POST_DATA = True
|
||||
LAST_WEEK_RATIO = 0.5 if PLAYER_DESCRIPTION == 'Live' else 0.0
|
||||
LAST_TWOWEEKS_RATIO = 0.0
|
||||
@ -173,7 +180,7 @@ def get_player_ids(plays: pd.DataFrame, which: Literal['batters', 'pitchers']) -
|
||||
if PLAYER_DESCRIPTION != 'Live':
|
||||
msg = f'Player description is *{PLAYER_DESCRIPTION}* so dropping players not in PROMO_INCLUSION_RETRO_IDS'
|
||||
print(msg)
|
||||
logging.info(msg)
|
||||
logger.info(msg)
|
||||
# players = players.drop(players[players.index not in PROMO_INCLUSION_RETRO_IDS].index)
|
||||
players = players[players[id_key].isin(PROMO_INCLUSION_RETRO_IDS)]
|
||||
|
||||
@ -322,7 +329,11 @@ def get_base_pitching_df(file_path: str, start_date: int, end_date: int) -> list
|
||||
abr_series = date_plays[(date_plays.ab == 't') & (date_plays.batter_hand == 'r')].groupby('pitcher_id').count()['event_type'].astype(int).rename('AB_vR')
|
||||
ps = pd.concat([ps, abr_series], axis=1)
|
||||
|
||||
if PLAYER_DESCRIPTION == 'Live':
|
||||
core_df = ps.dropna().query(f'TBF_vL >= {MIN_TBF_VL} & TBF_vR >= {MIN_TBF_VR}')
|
||||
else:
|
||||
core_df = ps.dropna()
|
||||
|
||||
if LAST_WEEK_RATIO == 0.0 and LAST_TWOWEEKS_RATIO == 0.0 and LAST_MONTH_RATIO == 0.0:
|
||||
return [date_plays, core_df]
|
||||
|
||||
@ -722,6 +733,8 @@ def calc_pitching_cards(ps: pd.DataFrame, season_pct: float) -> pd.DataFrame:
|
||||
|
||||
def calc_batter_ratings(bs: pd.DataFrame) -> pd.DataFrame:
|
||||
def create_batting_rating(row):
|
||||
if row['key_bbref'] == 'galaran01':
|
||||
pass
|
||||
ratings = cba.get_batter_ratings(row)
|
||||
ops_vl = ratings[0]['obp'] + ratings[0]['slg']
|
||||
ops_vr = ratings[1]['obp'] + ratings[1]['slg']
|
||||
@ -866,7 +879,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
||||
no_data = True
|
||||
for pos_df, position in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]:
|
||||
if row['key_bbref'] in pos_df.index:
|
||||
logging.info(f'Running {position} stats for {row["use_name"]} {row["last_name"]}')
|
||||
logger.info(f'Running {position} stats for {row["use_name"]} {row["last_name"]}')
|
||||
try:
|
||||
if 'tz_runs_total' in row:
|
||||
average_range = (int(pos_df.at[row["key_bbref"], 'tz_runs_total']) +
|
||||
@ -898,7 +911,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
||||
})
|
||||
no_data = False
|
||||
except Exception as e:
|
||||
logging.info(f'Infield position failed: {e}')
|
||||
logger.info(f'Infield position failed: {e}')
|
||||
|
||||
of_arms = []
|
||||
of_payloads = []
|
||||
@ -930,7 +943,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
||||
of_arms.append(int(pos_df.at[row["key_bbref"], of_run_rating]))
|
||||
no_data = False
|
||||
except Exception as e:
|
||||
logging.info(f'Outfield position failed: {e}')
|
||||
logger.info(f'Outfield position failed: {e}')
|
||||
|
||||
if row["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0:
|
||||
try:
|
||||
@ -947,7 +960,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
||||
all_pos.append(f)
|
||||
no_data = False
|
||||
except Exception as e:
|
||||
logging.info(f'Outfield position failed: {e}')
|
||||
logger.info(f'Outfield position failed: {e}')
|
||||
|
||||
if row["key_bbref"] in df_c.index:
|
||||
try:
|
||||
@ -991,7 +1004,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
||||
})
|
||||
no_data = False
|
||||
except Exception as e:
|
||||
logging.info(f'Catcher position failed: {e}')
|
||||
logger.info(f'Catcher position failed: {e}')
|
||||
|
||||
if no_data:
|
||||
all_pos.append({
|
||||
@ -1049,6 +1062,8 @@ def calc_pitcher_defense(ps: pd.DataFrame) -> pd.DataFrame:
|
||||
|
||||
async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.DataFrame = None, def_rat_df: pd.DataFrame = None, pstat_df: pd.DataFrame = None, pit_rat_df: pd.DataFrame = None) -> pd.DataFrame:
|
||||
all_players = []
|
||||
player_deltas = [['player_id', 'player_name', 'old-cost', 'new-cost', 'old-rarity', 'new-rarity']]
|
||||
new_players = [['player_id', 'player_name', 'cost', 'rarity', 'pos1']]
|
||||
|
||||
async def player_search(bbref_id: str):
|
||||
p_query = await db_get('players', params=[('bbref_id', bbref_id), ('cardset_id', CARDSET_ID)])
|
||||
@ -1087,7 +1102,7 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
||||
'rarity_id': int(ratings_df.loc[row['key_bbref']]['rarity_id']),
|
||||
'description': PLAYER_DESCRIPTION,
|
||||
'bbref_id': row['key_bbref'],
|
||||
'fangr_id': row['key_fangraphs'],
|
||||
'fangr_id': int(float(row['key_fangraphs'])),
|
||||
'mlbplayer_id': mlb_player['id']
|
||||
}
|
||||
|
||||
@ -1100,10 +1115,10 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
||||
all_pos[count] = pos_row.position
|
||||
count += 1
|
||||
except KeyError:
|
||||
logging.info(f'No positions found for {row['use_name']} {row['last_name']}')
|
||||
logger.info(f'No positions found for {row['use_name']} {row['last_name']}')
|
||||
all_pos[0] = 'DH'
|
||||
except TypeError:
|
||||
logging.info(f'Only one position found for {row['use_name']} {row['last_name']}')
|
||||
logger.info(f'Only one position found for {row['use_name']} {row['last_name']}')
|
||||
all_pos[0] = def_rat_df.loc[row['key_bbref']].position
|
||||
|
||||
return all_pos
|
||||
@ -1116,7 +1131,18 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
||||
|
||||
p_search = await player_search(row['key_bbref'])
|
||||
if p_search is not None:
|
||||
all_players.append(p_search)
|
||||
if 'id' in p_search:
|
||||
player_id = p_search['id']
|
||||
else:
|
||||
player_id = p_search['player_id']
|
||||
|
||||
new_player = await db_patch('players', object_id=player_id, params=[
|
||||
('cost', f'{bat_rat_df.loc[row['key_bbref']]["cost"]}'), ('rarity_id', int(bat_rat_df.loc[row['key_bbref']]['rarity_id'])), ('image', f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}')
|
||||
])
|
||||
all_players.append(new_player)
|
||||
player_deltas.append([
|
||||
new_player['player_id'], new_player['p_name'], p_search['cost'], new_player['cost'], p_search['rarity']['name'], new_player['rarity']['name']
|
||||
])
|
||||
else:
|
||||
mlb_player = await mlb_search_or_post(row['key_retro'])
|
||||
|
||||
@ -1124,7 +1150,7 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
||||
|
||||
all_pos = get_player_record_pos(def_rat_df, row)
|
||||
for x in enumerate(all_pos):
|
||||
new_player[f'pos_{x[0] + 1}'] = x[1]
|
||||
player_payload[f'pos_{x[0] + 1}'] = x[1]
|
||||
|
||||
new_player = await db_post('players', payload=player_payload)
|
||||
|
||||
@ -1140,11 +1166,12 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
||||
# all_bbref_ids.append(row['key_bbref'])
|
||||
# all_player_ids.append(player_id)
|
||||
all_players.append(new_player)
|
||||
new_players.append([new_player['player_id'], new_player['p_name'], new_player['cost'], new_player['rarity']['name'], new_player['pos_1']])
|
||||
|
||||
dev_count += 1
|
||||
elif pstat_df is not None and pit_rat_df is not None and def_rat_df is not None:
|
||||
starter_index = def_rat_df.columns.get_loc('starter_rating')
|
||||
closer_index = def_rat_df.columns.get_loc('closer_rating')
|
||||
starter_index = pstat_df.columns.get_loc('starter_rating')
|
||||
closer_index = pstat_df.columns.get_loc('closer_rating')
|
||||
|
||||
for index, row in pstat_df.iterrows():
|
||||
if dev_count < 0:
|
||||
@ -1152,7 +1179,18 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
||||
|
||||
p_search = await player_search(row['key_bbref'])
|
||||
if p_search is not None:
|
||||
all_players.append(p_search)
|
||||
if 'id' in p_search:
|
||||
player_id = p_search['id']
|
||||
else:
|
||||
player_id = p_search['player_id']
|
||||
|
||||
new_player = await db_patch('players', object_id=player_id, params=[
|
||||
('cost', f'{pit_rat_df.loc[row['key_bbref']]["cost"]}'), ('rarity_id', int(pit_rat_df.loc[row['key_bbref']]['rarity_id'])), ('image', f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}')
|
||||
])
|
||||
all_players.append(new_player)
|
||||
player_deltas.append([
|
||||
new_player['player_id'], new_player['p_name'], p_search['cost'], new_player['cost'], p_search['rarity']['name'], new_player['rarity']['name']
|
||||
])
|
||||
else:
|
||||
mlb_player = await mlb_search_or_post(row['key_retro'])
|
||||
|
||||
@ -1181,11 +1219,15 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
||||
del new_player['paperdex']
|
||||
|
||||
all_players.append(new_player)
|
||||
new_players.append([new_player['player_id'], new_player['p_name'], new_player['cost'], new_player['rarity']['name'], new_player['pos_1']])
|
||||
|
||||
dev_count += 1
|
||||
else:
|
||||
raise KeyError(f'Could not get players - not enough stat DFs were supplied')
|
||||
|
||||
pd.DataFrame(player_deltas[1:], columns=player_deltas[0]).to_csv(f'{"batter" if bstat_df is not None else "pitcher"}-deltas.csv')
|
||||
pd.DataFrame(new_players[1:], columns=new_players[0]).to_csv(f'new-{"batter" if bstat_df is not None else "pitcher"}s.csv')
|
||||
|
||||
players_df = pd.DataFrame(all_players).set_index('bbref_id')
|
||||
return players_df
|
||||
|
||||
@ -1228,7 +1270,7 @@ async def post_pitching_cards(cards_df: pd.DataFrame):
|
||||
all_cards = []
|
||||
def get_closer_rating(raw_rating):
|
||||
try:
|
||||
if raw_rating.isnull():
|
||||
if pd.isnull(raw_rating):
|
||||
return None
|
||||
else:
|
||||
return raw_rating
|
||||
@ -1461,11 +1503,11 @@ async def run_batters(data_input_path: str, start_date: int, end_date: int, post
|
||||
print(f'Post player data: {(post_end - post_start).total_seconds()}s')
|
||||
|
||||
post_msg = f'Posted {num_players} players to the database'
|
||||
logging.info(post_msg)
|
||||
logger.info(post_msg)
|
||||
print(post_msg)
|
||||
else:
|
||||
post_msg = f'{batting_stats.index.size} total batters\n\nPlayers are NOT being posted to the database'
|
||||
logging.warning(post_msg)
|
||||
logger.warning(post_msg)
|
||||
print(post_msg)
|
||||
|
||||
return batting_stats
|
||||
@ -1534,11 +1576,11 @@ async def run_pitchers(data_input_path: str, start_date: int, end_date: int, pos
|
||||
print(f'Post player data: {(post_end - post_start).total_seconds()}s')
|
||||
|
||||
post_msg = f'\nPosted {num_players} pitchers to the database'
|
||||
logging.info(post_msg)
|
||||
logger.info(post_msg)
|
||||
print(post_msg)
|
||||
else:
|
||||
post_msg = f'{pitching_stats.index.size} total pitchers\n\nPlayers are NOT being posted to the database'
|
||||
logging.warning(post_msg)
|
||||
logger.warning(post_msg)
|
||||
print(post_msg)
|
||||
|
||||
return pitching_stats
|
||||
|
||||
Loading…
Reference in New Issue
Block a user