Add player update functionality
Save new players and deltas to csv
This commit is contained in:
parent
d7922a138c
commit
9844fa4742
@ -1,6 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
import math
|
import math
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
@ -14,19 +15,19 @@ import urllib
|
|||||||
from creation_helpers import get_args, CLUB_LIST, FRANCHISE_LIST, sanitize_name
|
from creation_helpers import get_args, CLUB_LIST, FRANCHISE_LIST, sanitize_name
|
||||||
from batters.stat_prep import DataMismatchError
|
from batters.stat_prep import DataMismatchError
|
||||||
from db_calls import DB_URL, db_get, db_patch, db_post, db_put
|
from db_calls import DB_URL, db_get, db_patch, db_post, db_put
|
||||||
from exceptions import log_exception
|
from exceptions import log_exception, logger
|
||||||
import batters.calcs_batter as cba
|
import batters.calcs_batter as cba
|
||||||
import defenders.calcs_defense as cde
|
import defenders.calcs_defense as cde
|
||||||
import pitchers.calcs_pitcher as cpi
|
import pitchers.calcs_pitcher as cpi
|
||||||
|
|
||||||
cache.enable()
|
cache.enable()
|
||||||
date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}'
|
# date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}'
|
||||||
log_level = logging.INFO
|
# log_level = logger.INFO
|
||||||
logging.basicConfig(
|
# logger.basicConfig(
|
||||||
filename=f'logs/{date}.log',
|
# filename=f'logs/{date}.log',
|
||||||
format='%(asctime)s - retrosheet_data - %(levelname)s - %(message)s',
|
# format='%(asctime)s - retrosheet_data - %(levelname)s - %(message)s',
|
||||||
level=log_level
|
# level=log_level
|
||||||
)
|
# )
|
||||||
|
|
||||||
|
|
||||||
RETRO_FILE_PATH = 'data-input/retrosheet/'
|
RETRO_FILE_PATH = 'data-input/retrosheet/'
|
||||||
@ -46,14 +47,20 @@ CARDSET_ID = 20 # 20: 1998 Live, 21: 1998 Promos
|
|||||||
PLAYER_DESCRIPTION = 'Live' # Live for Live Series
|
PLAYER_DESCRIPTION = 'Live' # Live for Live Series
|
||||||
# PLAYER_DESCRIPTION = 'April PotM' # <Month> PotM for promos
|
# PLAYER_DESCRIPTION = 'April PotM' # <Month> PotM for promos
|
||||||
PROMO_INCLUSION_RETRO_IDS = [
|
PROMO_INCLUSION_RETRO_IDS = [
|
||||||
# 'johnj006',
|
# 'justd001',
|
||||||
# 'rodri001'
|
# 'rodri001',
|
||||||
|
# 'martp001',
|
||||||
|
# 'yan-e001',
|
||||||
|
# 'jonec004',
|
||||||
|
# 'belld001',
|
||||||
|
# 'schic002',
|
||||||
|
# 'johnj006'
|
||||||
]
|
]
|
||||||
|
|
||||||
# Per-Update Parameters
|
# Per-Update Parameters
|
||||||
SEASON_PCT = 26 / 162
|
SEASON_PCT = 32 / 162
|
||||||
START_DATE = 19980331 # YYYYMMDD format
|
START_DATE = 19980331 # YYYYMMDD format
|
||||||
END_DATE = 19980430 # YYYYMMDD format
|
END_DATE = 19980507 # YYYYMMDD format
|
||||||
POST_DATA = True
|
POST_DATA = True
|
||||||
LAST_WEEK_RATIO = 0.5 if PLAYER_DESCRIPTION == 'Live' else 0.0
|
LAST_WEEK_RATIO = 0.5 if PLAYER_DESCRIPTION == 'Live' else 0.0
|
||||||
LAST_TWOWEEKS_RATIO = 0.0
|
LAST_TWOWEEKS_RATIO = 0.0
|
||||||
@ -173,7 +180,7 @@ def get_player_ids(plays: pd.DataFrame, which: Literal['batters', 'pitchers']) -
|
|||||||
if PLAYER_DESCRIPTION != 'Live':
|
if PLAYER_DESCRIPTION != 'Live':
|
||||||
msg = f'Player description is *{PLAYER_DESCRIPTION}* so dropping players not in PROMO_INCLUSION_RETRO_IDS'
|
msg = f'Player description is *{PLAYER_DESCRIPTION}* so dropping players not in PROMO_INCLUSION_RETRO_IDS'
|
||||||
print(msg)
|
print(msg)
|
||||||
logging.info(msg)
|
logger.info(msg)
|
||||||
# players = players.drop(players[players.index not in PROMO_INCLUSION_RETRO_IDS].index)
|
# players = players.drop(players[players.index not in PROMO_INCLUSION_RETRO_IDS].index)
|
||||||
players = players[players[id_key].isin(PROMO_INCLUSION_RETRO_IDS)]
|
players = players[players[id_key].isin(PROMO_INCLUSION_RETRO_IDS)]
|
||||||
|
|
||||||
@ -322,7 +329,11 @@ def get_base_pitching_df(file_path: str, start_date: int, end_date: int) -> list
|
|||||||
abr_series = date_plays[(date_plays.ab == 't') & (date_plays.batter_hand == 'r')].groupby('pitcher_id').count()['event_type'].astype(int).rename('AB_vR')
|
abr_series = date_plays[(date_plays.ab == 't') & (date_plays.batter_hand == 'r')].groupby('pitcher_id').count()['event_type'].astype(int).rename('AB_vR')
|
||||||
ps = pd.concat([ps, abr_series], axis=1)
|
ps = pd.concat([ps, abr_series], axis=1)
|
||||||
|
|
||||||
core_df = ps.dropna().query(f'TBF_vL >= {MIN_TBF_VL} & TBF_vR >= {MIN_TBF_VR}')
|
if PLAYER_DESCRIPTION == 'Live':
|
||||||
|
core_df = ps.dropna().query(f'TBF_vL >= {MIN_TBF_VL} & TBF_vR >= {MIN_TBF_VR}')
|
||||||
|
else:
|
||||||
|
core_df = ps.dropna()
|
||||||
|
|
||||||
if LAST_WEEK_RATIO == 0.0 and LAST_TWOWEEKS_RATIO == 0.0 and LAST_MONTH_RATIO == 0.0:
|
if LAST_WEEK_RATIO == 0.0 and LAST_TWOWEEKS_RATIO == 0.0 and LAST_MONTH_RATIO == 0.0:
|
||||||
return [date_plays, core_df]
|
return [date_plays, core_df]
|
||||||
|
|
||||||
@ -722,6 +733,8 @@ def calc_pitching_cards(ps: pd.DataFrame, season_pct: float) -> pd.DataFrame:
|
|||||||
|
|
||||||
def calc_batter_ratings(bs: pd.DataFrame) -> pd.DataFrame:
|
def calc_batter_ratings(bs: pd.DataFrame) -> pd.DataFrame:
|
||||||
def create_batting_rating(row):
|
def create_batting_rating(row):
|
||||||
|
if row['key_bbref'] == 'galaran01':
|
||||||
|
pass
|
||||||
ratings = cba.get_batter_ratings(row)
|
ratings = cba.get_batter_ratings(row)
|
||||||
ops_vl = ratings[0]['obp'] + ratings[0]['slg']
|
ops_vl = ratings[0]['obp'] + ratings[0]['slg']
|
||||||
ops_vr = ratings[1]['obp'] + ratings[1]['slg']
|
ops_vr = ratings[1]['obp'] + ratings[1]['slg']
|
||||||
@ -866,7 +879,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
|||||||
no_data = True
|
no_data = True
|
||||||
for pos_df, position in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]:
|
for pos_df, position in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]:
|
||||||
if row['key_bbref'] in pos_df.index:
|
if row['key_bbref'] in pos_df.index:
|
||||||
logging.info(f'Running {position} stats for {row["use_name"]} {row["last_name"]}')
|
logger.info(f'Running {position} stats for {row["use_name"]} {row["last_name"]}')
|
||||||
try:
|
try:
|
||||||
if 'tz_runs_total' in row:
|
if 'tz_runs_total' in row:
|
||||||
average_range = (int(pos_df.at[row["key_bbref"], 'tz_runs_total']) +
|
average_range = (int(pos_df.at[row["key_bbref"], 'tz_runs_total']) +
|
||||||
@ -898,7 +911,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
|||||||
})
|
})
|
||||||
no_data = False
|
no_data = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info(f'Infield position failed: {e}')
|
logger.info(f'Infield position failed: {e}')
|
||||||
|
|
||||||
of_arms = []
|
of_arms = []
|
||||||
of_payloads = []
|
of_payloads = []
|
||||||
@ -930,7 +943,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
|||||||
of_arms.append(int(pos_df.at[row["key_bbref"], of_run_rating]))
|
of_arms.append(int(pos_df.at[row["key_bbref"], of_run_rating]))
|
||||||
no_data = False
|
no_data = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info(f'Outfield position failed: {e}')
|
logger.info(f'Outfield position failed: {e}')
|
||||||
|
|
||||||
if row["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0:
|
if row["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0:
|
||||||
try:
|
try:
|
||||||
@ -947,7 +960,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
|||||||
all_pos.append(f)
|
all_pos.append(f)
|
||||||
no_data = False
|
no_data = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info(f'Outfield position failed: {e}')
|
logger.info(f'Outfield position failed: {e}')
|
||||||
|
|
||||||
if row["key_bbref"] in df_c.index:
|
if row["key_bbref"] in df_c.index:
|
||||||
try:
|
try:
|
||||||
@ -991,7 +1004,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
|||||||
})
|
})
|
||||||
no_data = False
|
no_data = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info(f'Catcher position failed: {e}')
|
logger.info(f'Catcher position failed: {e}')
|
||||||
|
|
||||||
if no_data:
|
if no_data:
|
||||||
all_pos.append({
|
all_pos.append({
|
||||||
@ -1049,6 +1062,8 @@ def calc_pitcher_defense(ps: pd.DataFrame) -> pd.DataFrame:
|
|||||||
|
|
||||||
async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.DataFrame = None, def_rat_df: pd.DataFrame = None, pstat_df: pd.DataFrame = None, pit_rat_df: pd.DataFrame = None) -> pd.DataFrame:
|
async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.DataFrame = None, def_rat_df: pd.DataFrame = None, pstat_df: pd.DataFrame = None, pit_rat_df: pd.DataFrame = None) -> pd.DataFrame:
|
||||||
all_players = []
|
all_players = []
|
||||||
|
player_deltas = [['player_id', 'player_name', 'old-cost', 'new-cost', 'old-rarity', 'new-rarity']]
|
||||||
|
new_players = [['player_id', 'player_name', 'cost', 'rarity', 'pos1']]
|
||||||
|
|
||||||
async def player_search(bbref_id: str):
|
async def player_search(bbref_id: str):
|
||||||
p_query = await db_get('players', params=[('bbref_id', bbref_id), ('cardset_id', CARDSET_ID)])
|
p_query = await db_get('players', params=[('bbref_id', bbref_id), ('cardset_id', CARDSET_ID)])
|
||||||
@ -1087,7 +1102,7 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
|||||||
'rarity_id': int(ratings_df.loc[row['key_bbref']]['rarity_id']),
|
'rarity_id': int(ratings_df.loc[row['key_bbref']]['rarity_id']),
|
||||||
'description': PLAYER_DESCRIPTION,
|
'description': PLAYER_DESCRIPTION,
|
||||||
'bbref_id': row['key_bbref'],
|
'bbref_id': row['key_bbref'],
|
||||||
'fangr_id': row['key_fangraphs'],
|
'fangr_id': int(float(row['key_fangraphs'])),
|
||||||
'mlbplayer_id': mlb_player['id']
|
'mlbplayer_id': mlb_player['id']
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1100,10 +1115,10 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
|||||||
all_pos[count] = pos_row.position
|
all_pos[count] = pos_row.position
|
||||||
count += 1
|
count += 1
|
||||||
except KeyError:
|
except KeyError:
|
||||||
logging.info(f'No positions found for {row['use_name']} {row['last_name']}')
|
logger.info(f'No positions found for {row['use_name']} {row['last_name']}')
|
||||||
all_pos[0] = 'DH'
|
all_pos[0] = 'DH'
|
||||||
except TypeError:
|
except TypeError:
|
||||||
logging.info(f'Only one position found for {row['use_name']} {row['last_name']}')
|
logger.info(f'Only one position found for {row['use_name']} {row['last_name']}')
|
||||||
all_pos[0] = def_rat_df.loc[row['key_bbref']].position
|
all_pos[0] = def_rat_df.loc[row['key_bbref']].position
|
||||||
|
|
||||||
return all_pos
|
return all_pos
|
||||||
@ -1116,7 +1131,18 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
|||||||
|
|
||||||
p_search = await player_search(row['key_bbref'])
|
p_search = await player_search(row['key_bbref'])
|
||||||
if p_search is not None:
|
if p_search is not None:
|
||||||
all_players.append(p_search)
|
if 'id' in p_search:
|
||||||
|
player_id = p_search['id']
|
||||||
|
else:
|
||||||
|
player_id = p_search['player_id']
|
||||||
|
|
||||||
|
new_player = await db_patch('players', object_id=player_id, params=[
|
||||||
|
('cost', f'{bat_rat_df.loc[row['key_bbref']]["cost"]}'), ('rarity_id', int(bat_rat_df.loc[row['key_bbref']]['rarity_id'])), ('image', f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}')
|
||||||
|
])
|
||||||
|
all_players.append(new_player)
|
||||||
|
player_deltas.append([
|
||||||
|
new_player['player_id'], new_player['p_name'], p_search['cost'], new_player['cost'], p_search['rarity']['name'], new_player['rarity']['name']
|
||||||
|
])
|
||||||
else:
|
else:
|
||||||
mlb_player = await mlb_search_or_post(row['key_retro'])
|
mlb_player = await mlb_search_or_post(row['key_retro'])
|
||||||
|
|
||||||
@ -1124,7 +1150,7 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
|||||||
|
|
||||||
all_pos = get_player_record_pos(def_rat_df, row)
|
all_pos = get_player_record_pos(def_rat_df, row)
|
||||||
for x in enumerate(all_pos):
|
for x in enumerate(all_pos):
|
||||||
new_player[f'pos_{x[0] + 1}'] = x[1]
|
player_payload[f'pos_{x[0] + 1}'] = x[1]
|
||||||
|
|
||||||
new_player = await db_post('players', payload=player_payload)
|
new_player = await db_post('players', payload=player_payload)
|
||||||
|
|
||||||
@ -1140,11 +1166,12 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
|||||||
# all_bbref_ids.append(row['key_bbref'])
|
# all_bbref_ids.append(row['key_bbref'])
|
||||||
# all_player_ids.append(player_id)
|
# all_player_ids.append(player_id)
|
||||||
all_players.append(new_player)
|
all_players.append(new_player)
|
||||||
|
new_players.append([new_player['player_id'], new_player['p_name'], new_player['cost'], new_player['rarity']['name'], new_player['pos_1']])
|
||||||
|
|
||||||
dev_count += 1
|
dev_count += 1
|
||||||
elif pstat_df is not None and pit_rat_df is not None and def_rat_df is not None:
|
elif pstat_df is not None and pit_rat_df is not None and def_rat_df is not None:
|
||||||
starter_index = def_rat_df.columns.get_loc('starter_rating')
|
starter_index = pstat_df.columns.get_loc('starter_rating')
|
||||||
closer_index = def_rat_df.columns.get_loc('closer_rating')
|
closer_index = pstat_df.columns.get_loc('closer_rating')
|
||||||
|
|
||||||
for index, row in pstat_df.iterrows():
|
for index, row in pstat_df.iterrows():
|
||||||
if dev_count < 0:
|
if dev_count < 0:
|
||||||
@ -1152,7 +1179,18 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
|||||||
|
|
||||||
p_search = await player_search(row['key_bbref'])
|
p_search = await player_search(row['key_bbref'])
|
||||||
if p_search is not None:
|
if p_search is not None:
|
||||||
all_players.append(p_search)
|
if 'id' in p_search:
|
||||||
|
player_id = p_search['id']
|
||||||
|
else:
|
||||||
|
player_id = p_search['player_id']
|
||||||
|
|
||||||
|
new_player = await db_patch('players', object_id=player_id, params=[
|
||||||
|
('cost', f'{pit_rat_df.loc[row['key_bbref']]["cost"]}'), ('rarity_id', int(pit_rat_df.loc[row['key_bbref']]['rarity_id'])), ('image', f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}')
|
||||||
|
])
|
||||||
|
all_players.append(new_player)
|
||||||
|
player_deltas.append([
|
||||||
|
new_player['player_id'], new_player['p_name'], p_search['cost'], new_player['cost'], p_search['rarity']['name'], new_player['rarity']['name']
|
||||||
|
])
|
||||||
else:
|
else:
|
||||||
mlb_player = await mlb_search_or_post(row['key_retro'])
|
mlb_player = await mlb_search_or_post(row['key_retro'])
|
||||||
|
|
||||||
@ -1181,11 +1219,15 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
|
|||||||
del new_player['paperdex']
|
del new_player['paperdex']
|
||||||
|
|
||||||
all_players.append(new_player)
|
all_players.append(new_player)
|
||||||
|
new_players.append([new_player['player_id'], new_player['p_name'], new_player['cost'], new_player['rarity']['name'], new_player['pos_1']])
|
||||||
|
|
||||||
dev_count += 1
|
dev_count += 1
|
||||||
else:
|
else:
|
||||||
raise KeyError(f'Could not get players - not enough stat DFs were supplied')
|
raise KeyError(f'Could not get players - not enough stat DFs were supplied')
|
||||||
|
|
||||||
|
pd.DataFrame(player_deltas[1:], columns=player_deltas[0]).to_csv(f'{"batter" if bstat_df is not None else "pitcher"}-deltas.csv')
|
||||||
|
pd.DataFrame(new_players[1:], columns=new_players[0]).to_csv(f'new-{"batter" if bstat_df is not None else "pitcher"}s.csv')
|
||||||
|
|
||||||
players_df = pd.DataFrame(all_players).set_index('bbref_id')
|
players_df = pd.DataFrame(all_players).set_index('bbref_id')
|
||||||
return players_df
|
return players_df
|
||||||
|
|
||||||
@ -1228,7 +1270,7 @@ async def post_pitching_cards(cards_df: pd.DataFrame):
|
|||||||
all_cards = []
|
all_cards = []
|
||||||
def get_closer_rating(raw_rating):
|
def get_closer_rating(raw_rating):
|
||||||
try:
|
try:
|
||||||
if raw_rating.isnull():
|
if pd.isnull(raw_rating):
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
return raw_rating
|
return raw_rating
|
||||||
@ -1461,11 +1503,11 @@ async def run_batters(data_input_path: str, start_date: int, end_date: int, post
|
|||||||
print(f'Post player data: {(post_end - post_start).total_seconds()}s')
|
print(f'Post player data: {(post_end - post_start).total_seconds()}s')
|
||||||
|
|
||||||
post_msg = f'Posted {num_players} players to the database'
|
post_msg = f'Posted {num_players} players to the database'
|
||||||
logging.info(post_msg)
|
logger.info(post_msg)
|
||||||
print(post_msg)
|
print(post_msg)
|
||||||
else:
|
else:
|
||||||
post_msg = f'{batting_stats.index.size} total batters\n\nPlayers are NOT being posted to the database'
|
post_msg = f'{batting_stats.index.size} total batters\n\nPlayers are NOT being posted to the database'
|
||||||
logging.warning(post_msg)
|
logger.warning(post_msg)
|
||||||
print(post_msg)
|
print(post_msg)
|
||||||
|
|
||||||
return batting_stats
|
return batting_stats
|
||||||
@ -1534,11 +1576,11 @@ async def run_pitchers(data_input_path: str, start_date: int, end_date: int, pos
|
|||||||
print(f'Post player data: {(post_end - post_start).total_seconds()}s')
|
print(f'Post player data: {(post_end - post_start).total_seconds()}s')
|
||||||
|
|
||||||
post_msg = f'\nPosted {num_players} pitchers to the database'
|
post_msg = f'\nPosted {num_players} pitchers to the database'
|
||||||
logging.info(post_msg)
|
logger.info(post_msg)
|
||||||
print(post_msg)
|
print(post_msg)
|
||||||
else:
|
else:
|
||||||
post_msg = f'{pitching_stats.index.size} total pitchers\n\nPlayers are NOT being posted to the database'
|
post_msg = f'{pitching_stats.index.size} total pitchers\n\nPlayers are NOT being posted to the database'
|
||||||
logging.warning(post_msg)
|
logger.warning(post_msg)
|
||||||
print(post_msg)
|
print(post_msg)
|
||||||
|
|
||||||
return pitching_stats
|
return pitching_stats
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user