Add support for custom batting cards

This commit is contained in:
Cal Corum 2024-03-03 17:57:30 -06:00
parent 1b0153682a
commit 9182556e44
3 changed files with 226 additions and 11 deletions

View File

@ -79,9 +79,9 @@ def get_batting_stats(
raise LookupError(f'Date-based stat pulls not implemented, yet. Please provide batting csv files.') raise LookupError(f'Date-based stat pulls not implemented, yet. Please provide batting csv files.')
def match_player_lines(all_batting: pd.DataFrame, all_players: pd.DataFrame): def match_player_lines(all_batting: pd.DataFrame, all_players: pd.DataFrame, is_custom: bool = False):
def get_pids(df_data): def get_pids(df_data):
return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs') return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs', is_custom)
print(f'Now pulling mlbam player IDs...') print(f'Now pulling mlbam player IDs...')
ids_and_names = all_batting.apply(get_pids, axis=1) ids_and_names = all_batting.apply(get_pids, axis=1)
@ -133,7 +133,7 @@ async def create_new_players(
return len(new_players) return len(new_players)
def get_stat_df(final_batting: pd.DataFrame, input_path: str): def get_run_stat_df(final_batting: pd.DataFrame, input_path: str):
print(f'Reading baserunning stats...') print(f'Reading baserunning stats...')
run_data = (pd.read_csv(f'{input_path}running.csv') run_data = (pd.read_csv(f'{input_path}running.csv')
@ -208,7 +208,8 @@ async def calculate_batting_ratings(offense_stats: pd.DataFrame, to_post: bool):
async def post_player_updates( async def post_player_updates(
cardset: dict, card_base_url: str, release_dir: str, player_desc: str, is_liveseries: bool, to_post: bool): cardset: dict, card_base_url: str, release_dir: str, player_desc: str, is_liveseries: bool, to_post: bool,
is_custom: bool):
""" """
Pull fresh pd_players and set_index to player_id Pull fresh pd_players and set_index to player_id
Pull fresh battingcards and set_index to player Pull fresh battingcards and set_index to player
@ -237,7 +238,10 @@ async def post_player_updates(
del total_ratings del total_ratings
def get_pids(df_data): def get_pids(df_data):
return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref') if is_custom:
return get_all_pybaseball_ids([df_data["fangr_id"]], 'fangraphs', is_custom)
else:
return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref')
ids_and_names = player_data.apply(get_pids, axis=1) ids_and_names = player_data.apply(get_pids, axis=1)
player_data = (ids_and_names player_data = (ids_and_names
@ -388,14 +392,14 @@ async def post_player_updates(
async def run_batters( async def run_batters(
cardset: dict, input_path: str, post_players: bool, card_base_url: str, release_directory: str, cardset: dict, input_path: str, post_players: bool, card_base_url: str, release_directory: str,
player_description: str, season_pct: float, post_batters: bool, pull_fielding: bool, season: int, player_description: str, season_pct: float, post_batters: bool, pull_fielding: bool, season: int,
is_liveseries: bool, ignore_limits: bool): is_liveseries: bool, ignore_limits: bool, is_custom: bool = False):
print(f'Pulling PD player IDs...') print(f'Pulling PD player IDs...')
pd_players = await pd_players_df(cardset['id']) pd_players = await pd_players_df(cardset['id'])
print('Reading batting stats...') print('Reading batting stats...')
all_stats = get_batting_stats(file_path=input_path, ignore_limits=ignore_limits) all_stats = get_batting_stats(file_path=input_path, ignore_limits=ignore_limits)
print(f'Processed {len(all_stats.values)} batters\n') print(f'Processed {len(all_stats.values)} batters\n')
bat_step1 = match_player_lines(all_stats, pd_players) bat_step1 = match_player_lines(all_stats, pd_players, is_custom)
if post_players: if post_players:
new_batters = await create_new_players( new_batters = await create_new_players(
bat_step1, cardset, card_base_url, release_directory, player_description bat_step1, cardset, card_base_url, release_directory, player_description
@ -403,10 +407,16 @@ async def run_batters(
else: else:
new_batters = 0 new_batters = 0
offense_stats = get_stat_df(bat_step1, input_path) # Custom Cardsets
if cardset['id'] in [16]:
offense_stats = pd.merge(
bat_step1, await pd_battingcards_df(cardset['id']), on='player_id').set_index('key_bbref', drop=False)
else:
bat_step2 = get_run_stat_df(bat_step1, input_path)
offense_stats = await calculate_batting_cards(bat_step2, cardset, season_pct, post_batters)
del bat_step2
del bat_step1, all_stats del bat_step1, all_stats
offense_stats = await calculate_batting_cards(offense_stats, cardset, season_pct, post_batters)
await calculate_batting_ratings(offense_stats, post_batters) await calculate_batting_ratings(offense_stats, post_batters)
if pull_fielding: if pull_fielding:
print(f'Pulling catcher defense...') print(f'Pulling catcher defense...')
@ -434,7 +444,7 @@ async def run_batters(
) )
await post_player_updates( await post_player_updates(
cardset, card_base_url, release_directory, player_description, is_liveseries, post_batters cardset, card_base_url, release_directory, player_description, is_liveseries, post_batters, is_custom
) )
return { return {

View File

@ -939,7 +939,55 @@ def mlbteam_and_franchise(mlbam_playerid):
return p_data return p_data
def get_all_pybaseball_ids(player_id: list, key_type: str): def get_all_pybaseball_ids(player_id: list, key_type: str, is_custom: bool = False):
if is_custom:
try:
long_player_id = int(player_id[0])
if long_player_id >= 999942001:
backyard_players = [
'akhan',
'amkhan',
'adelvecchio',
'afrazier',
'awebber',
'bblackwood',
'drobinson',
'dpetrovich',
'esteele',
'ghasselhoff',
'jsmith',
'jgarcia',
'kkawaguchi',
'kphillips',
'keckman',
'lcrocket',
'llui',
'mluna',
'mdubois',
'mthomas',
'psanchez',
'pwheeler',
'rworthington',
'rjohnson',
'rdobbs',
'sdobbs',
'swebber',
'smorgan',
'tdelvecchio',
'vkawaguchi'
]
return pd.Series(
{'key_bbref': backyard_players[long_player_id - 999942001],
'key_fangraphs': player_id[0],
'key_mlbam': player_id[0],
'bat_hand': 'L' if long_player_id in [
999942004, 999942007, 999942010, 999942018, 999942019, 999942020, 999942022
] else 'R'
},
)
except Exception as e:
logging.warning(e)
q = pb.playerid_reverse_lookup(player_id, key_type=key_type) q = pb.playerid_reverse_lookup(player_id, key_type=key_type)
if len(q.values) > 0: if len(q.values) > 0:
return_val = q.loc[0] return_val = q.loc[0]

157
custom_card_creation.py Normal file
View File

@ -0,0 +1,157 @@
import asyncio
import datetime
import logging
import batters.creation
import pitchers.creation
import pandas as pd
import sys
from creation_helpers import pd_players_df, pd_positions_df, get_args
from db_calls import db_get, db_patch, DB_URL
date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}'
log_level = logging.INFO
logging.basicConfig(
filename=f'logs/{date}.log',
format='%(asctime)s - card-creation - %(levelname)s - %(message)s',
level=log_level
)
CARD_BASE_URL = f'{DB_URL}/v2/players'
async def main(args):
"""
params:
cardset_name: str - to be searched in pd database
games_played: int - always 162
pull_fielding: bool - always False
post_batters: bool - whether or not to post batting cards, batting card ratings, and batter updates
post_pitchers: bool - whether or not to post pitching cards, pitching card ratings, and pitching updates
post_players: bool - whether or not to post player updates
player_description: str - shows as cardset on card image and prefixes player name in discord
is_liveseries: str - always False
"""
arg_data = get_args(args)
# cardset_name = input(f'What is the name of this Cardset? ')
cardset_name = arg_data['cardset_name']
print(f'Searching for cardset: {cardset_name}')
c_query = await db_get('cardsets', params=[('name', cardset_name)])
if c_query['count'] == 0:
print(f'I do not see a cardset named {cardset_name}')
return
cardset = c_query['cardsets'][0]
del c_query
input_path = f'data-input/{cardset["name"]} Cardset/'
if 'season' in arg_data:
season = arg_data['season']
else:
season = int(cardset['name'][:4])
season_pct = 1
print(f'Cardset ID: {cardset["id"]} / Season: {season}\nSeason %: {season_pct}\n')
if 'player_description' in arg_data:
player_description = arg_data['player_description']
elif season_pct < 1:
player_description = f'Live'
else:
player_description = f'{season}'
post_batters = True if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true' else False
post_pitchers = True if 'post_pitchers' not in arg_data or arg_data['post_pitchers'].lower() == 'true' else False
post_players = True if 'post_players' not in arg_data or arg_data['post_players'].lower() == 'true' else False
pull_fielding = False
is_liveseries = False
ignore_limits = True
is_custom = True
start_time = datetime.datetime.now()
release_directory = f'{start_time.year}-{start_time.month}-{start_time.day}'
data = await batters.creation.run_batters(
cardset, input_path, post_players, CARD_BASE_URL, release_directory, player_description, season_pct,
post_batters, pull_fielding, season, is_liveseries, ignore_limits, is_custom
)
print(f'Batter updates are complete')
start_time_two = datetime.datetime.now()
run_time = start_time_two - start_time
print(f'Total batting cards: {data["tot_batters"]}\nNew cardset batters: {data["new_batters"]}\n'
f'Batter runtime: {round(run_time.total_seconds())} seconds\n')
data = await pitchers.creation.run_pitchers(
cardset, input_path, CARD_BASE_URL, season, release_directory, player_description, season_pct, post_players,
post_pitchers, is_liveseries, ignore_limits, pull_fielding
)
pitching_stats = data['pitching_stats']
print(f'Pitcher updates are complete')
start_time_three = datetime.datetime.now()
p_run_time = datetime.datetime.now() - start_time_two
print(f'Total pitching cards: {data["tot_pitchers"]}\nNew cardset pitchers: {data["new_pitchers"]}\n'
f'Pitcher runtime: {round(p_run_time.total_seconds())} seconds\n')
print(f'Running player position updates..')
all_pos = await pd_positions_df(cardset['id'])
player_updates = {}
def set_all_positions(df_data):
pos_series = all_pos.query(f'player_id == {df_data["player_id"]}')['position']
pos_updates = []
count = 1
for this_pos in pos_series:
if this_pos == 'P':
this_pitcher = pitching_stats.loc[df_data['bbref_id']]
if this_pitcher['starter_rating'] > 3:
pos_updates.append((f'pos_{count}', 'SP'))
count += 1
if this_pitcher['relief_rating'] > 1 or not pd.isna(this_pitcher['closer_rating']):
pos_updates.append((f'pos_{count}', 'RP'))
count += 1
else:
pos_updates.append((f'pos_{count}', 'RP'))
count += 1
if not pd.isna(this_pitcher['closer_rating']):
pos_updates.append((f'pos_{count}', 'CP'))
count += 1
else:
pos_updates.append((f'pos_{count}', this_pos))
count += 1
if count == 1:
pos_updates.append(('pos_1', 'DH'))
count += 1
while count <= 9:
pos_updates.append((f'pos_{count}', 'False'))
count += 1
if len(pos_updates) > 0:
if df_data.player_id not in player_updates.keys():
player_updates[df_data.player_id] = pos_updates
else:
player_updates[df_data.player_id].extend(pos_updates)
all_players = await pd_players_df(cardset['id'])
all_players.apply(set_all_positions, axis=1)
print(f'Sending {len(player_updates)} player updates to PD database...')
if post_players:
for x in player_updates:
await db_patch('players', object_id=x, params=player_updates[x])
print(f'Player updates are complete\n')
p_run_time = datetime.datetime.now() - start_time_three
print(f'Player update runtime: {round(p_run_time.total_seconds())} seconds')
t_run_time = datetime.datetime.now() - start_time
print(f'Total runtime: {round(t_run_time.total_seconds())} seconds')
if __name__ == '__main__':
asyncio.run(main(sys.argv[1:]))