From 9182556e447c7517810d9f76b2c8442c49969398 Mon Sep 17 00:00:00 2001 From: Cal Corum Date: Sun, 3 Mar 2024 17:57:30 -0600 Subject: [PATCH] Add support for custom batting cards --- batters/creation.py | 30 +++++--- creation_helpers.py | 50 ++++++++++++- custom_card_creation.py | 157 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+), 11 deletions(-) create mode 100644 custom_card_creation.py diff --git a/batters/creation.py b/batters/creation.py index 9dc5e13..d1ff273 100644 --- a/batters/creation.py +++ b/batters/creation.py @@ -79,9 +79,9 @@ def get_batting_stats( raise LookupError(f'Date-based stat pulls not implemented, yet. Please provide batting csv files.') -def match_player_lines(all_batting: pd.DataFrame, all_players: pd.DataFrame): +def match_player_lines(all_batting: pd.DataFrame, all_players: pd.DataFrame, is_custom: bool = False): def get_pids(df_data): - return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs') + return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs', is_custom) print(f'Now pulling mlbam player IDs...') ids_and_names = all_batting.apply(get_pids, axis=1) @@ -133,7 +133,7 @@ async def create_new_players( return len(new_players) -def get_stat_df(final_batting: pd.DataFrame, input_path: str): +def get_run_stat_df(final_batting: pd.DataFrame, input_path: str): print(f'Reading baserunning stats...') run_data = (pd.read_csv(f'{input_path}running.csv') @@ -208,7 +208,8 @@ async def calculate_batting_ratings(offense_stats: pd.DataFrame, to_post: bool): async def post_player_updates( - cardset: dict, card_base_url: str, release_dir: str, player_desc: str, is_liveseries: bool, to_post: bool): + cardset: dict, card_base_url: str, release_dir: str, player_desc: str, is_liveseries: bool, to_post: bool, + is_custom: bool): """ Pull fresh pd_players and set_index to player_id Pull fresh battingcards and set_index to player @@ -237,7 +238,10 @@ async def post_player_updates( del total_ratings def get_pids(df_data): - return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref') + if is_custom: + return get_all_pybaseball_ids([df_data["fangr_id"]], 'fangraphs', is_custom) + else: + return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref') ids_and_names = player_data.apply(get_pids, axis=1) player_data = (ids_and_names @@ -388,14 +392,14 @@ async def post_player_updates( async def run_batters( cardset: dict, input_path: str, post_players: bool, card_base_url: str, release_directory: str, player_description: str, season_pct: float, post_batters: bool, pull_fielding: bool, season: int, - is_liveseries: bool, ignore_limits: bool): + is_liveseries: bool, ignore_limits: bool, is_custom: bool = False): print(f'Pulling PD player IDs...') pd_players = await pd_players_df(cardset['id']) print('Reading batting stats...') all_stats = get_batting_stats(file_path=input_path, ignore_limits=ignore_limits) print(f'Processed {len(all_stats.values)} batters\n') - bat_step1 = match_player_lines(all_stats, pd_players) + bat_step1 = match_player_lines(all_stats, pd_players, is_custom) if post_players: new_batters = await create_new_players( bat_step1, cardset, card_base_url, release_directory, player_description @@ -403,10 +407,16 @@ async def run_batters( else: new_batters = 0 - offense_stats = get_stat_df(bat_step1, input_path) + # Custom Cardsets + if cardset['id'] in [16]: + offense_stats = pd.merge( + bat_step1, await pd_battingcards_df(cardset['id']), on='player_id').set_index('key_bbref', drop=False) + else: + bat_step2 = get_run_stat_df(bat_step1, input_path) + offense_stats = await calculate_batting_cards(bat_step2, cardset, season_pct, post_batters) + del bat_step2 del bat_step1, all_stats - offense_stats = await calculate_batting_cards(offense_stats, cardset, season_pct, post_batters) await calculate_batting_ratings(offense_stats, post_batters) if pull_fielding: print(f'Pulling catcher defense...') @@ -434,7 +444,7 @@ async def run_batters( ) await post_player_updates( - cardset, card_base_url, release_directory, player_description, is_liveseries, post_batters + cardset, card_base_url, release_directory, player_description, is_liveseries, post_batters, is_custom ) return { diff --git a/creation_helpers.py b/creation_helpers.py index 5a11c58..cc6deec 100644 --- a/creation_helpers.py +++ b/creation_helpers.py @@ -939,7 +939,55 @@ def mlbteam_and_franchise(mlbam_playerid): return p_data -def get_all_pybaseball_ids(player_id: list, key_type: str): +def get_all_pybaseball_ids(player_id: list, key_type: str, is_custom: bool = False): + if is_custom: + try: + long_player_id = int(player_id[0]) + if long_player_id >= 999942001: + backyard_players = [ + 'akhan', + 'amkhan', + 'adelvecchio', + 'afrazier', + 'awebber', + 'bblackwood', + 'drobinson', + 'dpetrovich', + 'esteele', + 'ghasselhoff', + 'jsmith', + 'jgarcia', + 'kkawaguchi', + 'kphillips', + 'keckman', + 'lcrocket', + 'llui', + 'mluna', + 'mdubois', + 'mthomas', + 'psanchez', + 'pwheeler', + 'rworthington', + 'rjohnson', + 'rdobbs', + 'sdobbs', + 'swebber', + 'smorgan', + 'tdelvecchio', + 'vkawaguchi' + ] + return pd.Series( + {'key_bbref': backyard_players[long_player_id - 999942001], + 'key_fangraphs': player_id[0], + 'key_mlbam': player_id[0], + 'bat_hand': 'L' if long_player_id in [ + 999942004, 999942007, 999942010, 999942018, 999942019, 999942020, 999942022 + ] else 'R' + }, + ) + except Exception as e: + logging.warning(e) + q = pb.playerid_reverse_lookup(player_id, key_type=key_type) if len(q.values) > 0: return_val = q.loc[0] diff --git a/custom_card_creation.py b/custom_card_creation.py new file mode 100644 index 0000000..f8e46f1 --- /dev/null +++ b/custom_card_creation.py @@ -0,0 +1,157 @@ +import asyncio +import datetime +import logging + +import batters.creation +import pitchers.creation +import pandas as pd +import sys + +from creation_helpers import pd_players_df, pd_positions_df, get_args +from db_calls import db_get, db_patch, DB_URL + +date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}' +log_level = logging.INFO +logging.basicConfig( + filename=f'logs/{date}.log', + format='%(asctime)s - card-creation - %(levelname)s - %(message)s', + level=log_level +) +CARD_BASE_URL = f'{DB_URL}/v2/players' + + +async def main(args): + """ + params: + cardset_name: str - to be searched in pd database + games_played: int - always 162 + pull_fielding: bool - always False + post_batters: bool - whether or not to post batting cards, batting card ratings, and batter updates + post_pitchers: bool - whether or not to post pitching cards, pitching card ratings, and pitching updates + post_players: bool - whether or not to post player updates + player_description: str - shows as cardset on card image and prefixes player name in discord + is_liveseries: str - always False + """ + arg_data = get_args(args) + + # cardset_name = input(f'What is the name of this Cardset? ') + cardset_name = arg_data['cardset_name'] + print(f'Searching for cardset: {cardset_name}') + c_query = await db_get('cardsets', params=[('name', cardset_name)]) + + if c_query['count'] == 0: + print(f'I do not see a cardset named {cardset_name}') + return + cardset = c_query['cardsets'][0] + del c_query + input_path = f'data-input/{cardset["name"]} Cardset/' + + if 'season' in arg_data: + season = arg_data['season'] + else: + season = int(cardset['name'][:4]) + + season_pct = 1 + print(f'Cardset ID: {cardset["id"]} / Season: {season}\nSeason %: {season_pct}\n') + + if 'player_description' in arg_data: + player_description = arg_data['player_description'] + elif season_pct < 1: + player_description = f'Live' + else: + player_description = f'{season}' + + post_batters = True if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true' else False + post_pitchers = True if 'post_pitchers' not in arg_data or arg_data['post_pitchers'].lower() == 'true' else False + post_players = True if 'post_players' not in arg_data or arg_data['post_players'].lower() == 'true' else False + pull_fielding = False + is_liveseries = False + ignore_limits = True + is_custom = True + + start_time = datetime.datetime.now() + release_directory = f'{start_time.year}-{start_time.month}-{start_time.day}' + + data = await batters.creation.run_batters( + cardset, input_path, post_players, CARD_BASE_URL, release_directory, player_description, season_pct, + post_batters, pull_fielding, season, is_liveseries, ignore_limits, is_custom + ) + + print(f'Batter updates are complete') + start_time_two = datetime.datetime.now() + run_time = start_time_two - start_time + print(f'Total batting cards: {data["tot_batters"]}\nNew cardset batters: {data["new_batters"]}\n' + f'Batter runtime: {round(run_time.total_seconds())} seconds\n') + + data = await pitchers.creation.run_pitchers( + cardset, input_path, CARD_BASE_URL, season, release_directory, player_description, season_pct, post_players, + post_pitchers, is_liveseries, ignore_limits, pull_fielding + ) + pitching_stats = data['pitching_stats'] + + print(f'Pitcher updates are complete') + start_time_three = datetime.datetime.now() + p_run_time = datetime.datetime.now() - start_time_two + print(f'Total pitching cards: {data["tot_pitchers"]}\nNew cardset pitchers: {data["new_pitchers"]}\n' + f'Pitcher runtime: {round(p_run_time.total_seconds())} seconds\n') + + print(f'Running player position updates..') + all_pos = await pd_positions_df(cardset['id']) + + player_updates = {} + + def set_all_positions(df_data): + pos_series = all_pos.query(f'player_id == {df_data["player_id"]}')['position'] + pos_updates = [] + count = 1 + for this_pos in pos_series: + if this_pos == 'P': + this_pitcher = pitching_stats.loc[df_data['bbref_id']] + if this_pitcher['starter_rating'] > 3: + pos_updates.append((f'pos_{count}', 'SP')) + count += 1 + if this_pitcher['relief_rating'] > 1 or not pd.isna(this_pitcher['closer_rating']): + pos_updates.append((f'pos_{count}', 'RP')) + count += 1 + else: + pos_updates.append((f'pos_{count}', 'RP')) + count += 1 + + if not pd.isna(this_pitcher['closer_rating']): + pos_updates.append((f'pos_{count}', 'CP')) + count += 1 + else: + pos_updates.append((f'pos_{count}', this_pos)) + count += 1 + + if count == 1: + pos_updates.append(('pos_1', 'DH')) + count += 1 + + while count <= 9: + pos_updates.append((f'pos_{count}', 'False')) + count += 1 + + if len(pos_updates) > 0: + if df_data.player_id not in player_updates.keys(): + player_updates[df_data.player_id] = pos_updates + else: + player_updates[df_data.player_id].extend(pos_updates) + + all_players = await pd_players_df(cardset['id']) + all_players.apply(set_all_positions, axis=1) + + print(f'Sending {len(player_updates)} player updates to PD database...') + if post_players: + for x in player_updates: + await db_patch('players', object_id=x, params=player_updates[x]) + print(f'Player updates are complete\n') + + p_run_time = datetime.datetime.now() - start_time_three + print(f'Player update runtime: {round(p_run_time.total_seconds())} seconds') + t_run_time = datetime.datetime.now() - start_time + print(f'Total runtime: {round(t_run_time.total_seconds())} seconds') + + +if __name__ == '__main__': + asyncio.run(main(sys.argv[1:]))