import asyncio import copy import datetime from functools import partial import multiprocessing import sys from db_calls import db_get from exceptions import logger, log_exception from scouting_batters import log_time, fetch_data from typing import Literal import pandas as pd async def get_scouting_dfs(cardset_id: list = None): cardset_params = [('cardset_id', x) for x in cardset_id] ratings_params = [('team_id', 31), ('ts', 's37136685556r6135248705'), *cardset_params] API_CALLS = [ ('pitchingcardratings', [('vs_hand', 'vL'), *ratings_params]), ('pitchingcardratings', [('vs_hand', 'vR'), *ratings_params]), ('cardpositions', [('position', 'P'), *cardset_params]) ] start_time = log_time('start', message='Pulling all pitching card ratings and positions') tasks = [fetch_data(params) for params in API_CALLS] api_data = await asyncio.gather(*tasks) log_time('end', f'Pulled {api_data[0]['count'] + api_data[1]['count']} batting card ratings and {api_data[2]['count']} positions', start_time=start_time) start_time = log_time('start', message='Building base dataframes') vl_vals = api_data[0]['ratings'] for x in vl_vals: x.update(x['pitchingcard']) x['player_id'] = x['pitchingcard']['player']['player_id'] x['player_name'] = x['pitchingcard']['player']['p_name'] x['rarity'] = x['pitchingcard']['player']['rarity']['name'] x['cardset_id'] = x['pitchingcard']['player']['cardset']['id'] x['cardset_name'] = x['pitchingcard']['player']['cardset']['name'] x['starter_rating'] = x['pitchingcard']['starter_rating'] x['relief_rating'] = x['pitchingcard']['relief_rating'] x['closer_rating'] = x['pitchingcard']['closer_rating'] del x['pitchingcard'], x['player'] vr_vals = api_data[1]['ratings'] for x in vr_vals: x['player_id'] = x['pitchingcard']['player']['player_id'] del x['pitchingcard'] vl = pd.DataFrame(vl_vals) vr = pd.DataFrame(vr_vals) pit_df = pd.merge(vl, vr, on='player_id', suffixes=('_vl', '_vr')).set_index('player_id', drop=False) log_time('end', f'Base dataframes are complete', start_time=start_time) start_time = log_time('start', message='Building defense series') positions = api_data[2]['positions'] series_list = [pd.Series( dict([(x['player']['player_id'], x['range']) for x in positions]), name=f'Range P' ), pd.Series( dict([(x['player']['player_id'], x['error']) for x in positions]), name=f'Error P' )] log_time('end', f'Processed {len(positions)} defense series', start_time=start_time) logger.info(f'series_list: {series_list}') return pit_df.join(series_list) async def post_calc_basic(pitching_dfs: pd.DataFrame): raw_data = pitching_dfs def get_raw_leftcontrol(df_data): return ((1 - (df_data['obp_vl'] - df_data['avg_vl'])) * 100) + (1 - (df_data['wild_pitch'] / 20)) start_time = log_time('start', 'Beginning Control L calcs') raw_series = raw_data.apply(get_raw_leftcontrol, axis=1) rank_series = raw_series.rank(pct=True) raw_data['Control L'] = round(rank_series * 100) log_time('end', 'Done Control L calcs', start_time=start_time) start_time = log_time('start', 'Beginning Control R calcs') def get_raw_rightcontrol(df_data): return ((1 - (df_data['obp_vr'] - df_data['avg_vr'])) * 100) + (1 - (df_data['wild_pitch'] / 20)) raw_series = raw_data.apply(get_raw_rightcontrol, axis=1) rank_series = raw_series.rank(pct=True) raw_data['Control R'] = round(rank_series * 100) log_time('end', 'Done Control R calcs', start_time=start_time) start_time = log_time('start', 'Beginning Stuff L calcs') def get_raw_leftstuff(df_data): return 10 - (df_data['slg_vl'] + df_data['slg_vl'] + ((df_data['homerun_vl'] + df_data['bp_homerun_vl']) / 108)) raw_series = raw_data.apply(get_raw_leftstuff, axis=1) rank_series = raw_series.rank(pct=True) raw_data['Stuff L'] = round(rank_series * 100) log_time('end', 'Done Stuff L calcs', start_time=start_time) start_time = log_time('start', 'Beginning Stuff R calcs') def get_raw_rightstuff(df_data): return 10 - (df_data['slg_vr'] + df_data['slg_vr'] + ((df_data['homerun_vr'] + df_data['bp_homerun_vr']) / 108)) raw_series = raw_data.apply(get_raw_rightstuff, axis=1) rank_series = raw_series.rank(pct=True) raw_data['Stuff R'] = round(rank_series * 100) log_time('end', 'Done Stuff R calcs', start_time=start_time) start_time = log_time('start', 'Beginning Fielding calcs') def get_raw_fielding(df_data): return ((6 - df_data['Range P']) * 10) + (50 - df_data['Error P']) raw_series = raw_data.apply(get_raw_fielding, axis=1) rank_series = raw_series.rank(pct=True) logger.info(f'max fld: {raw_series.max()} / min fld: {raw_series.min()}') raw_data['Fielding'] = round(rank_series * 100) log_time('end', 'Done Fielding calcs', start_time=start_time) start_time = log_time('start', 'Beginning Stamina calcs') def get_raw_stamina(df_data): spow = df_data['starter_rating'] if pd.isna(df_data['starter_rating']) else -1 rpow = df_data['relief_rating'] if pd.isna(df_data['relief_rating']) else -1 this_pow = spow if spow > rpow else rpow return (((this_pow * (df_data['obp_vr'] * (2 / 3))) + (this_pow * (df_data['obp_vl'] / 3))) * 4.5) + this_pow raw_series = raw_data.apply(get_raw_stamina, axis=1) rank_series = raw_series.rank(pct=True) raw_data['Stamina'] = round(rank_series * 100) log_time('end', 'Done Stamina calcs', start_time=start_time) start_time = log_time('start', 'Beginning H/9 calcs') def get_raw_hit(df_data): return 1 - (df_data['avg_vr'] * (2 / 3)) + (df_data['avg_vl'] / 3) raw_series = raw_data.apply(get_raw_hit, axis=1) rank_series = raw_series.rank(pct=True) raw_data['H/9'] = round(rank_series * 100) log_time('end', 'Done H/9 calcs', start_time=start_time) start_time = log_time('start', 'Beginning H/9 calcs') def get_raw_k(df_data): return ((df_data['strikeout_vr'] / 108) * (2 / 3)) + ((df_data['strikeout_vl'] / 108) / 3) raw_series = raw_data.apply(get_raw_k, axis=1) rank_series = raw_series.rank(pct=True) raw_data['K/9'] = round(rank_series * 100) log_time('end', 'Done H/9 calcs', start_time=start_time) start_time = log_time('start', 'Beginning BB/9 calcs') def get_raw_bb(df_data): return ((df_data['walk_vr'] / 108) * (2 / 3)) + ((df_data['walk_vl'] / 108) / 3) raw_series = raw_data.apply(get_raw_bb, axis=1) rank_series = raw_series.rank(pct=True, ascending=False) raw_data['BB/9'] = round(rank_series * 100) log_time('end', 'Done BB/9 calcs', start_time=start_time) start_time = log_time('start', 'Beginning BB/9 calcs') def get_raw_hr(df_data): return 1 - ( (((df_data['homerun_vr'] + df_data['bp_homerun_vr']) / 108) * (2 / 3)) + (((df_data['homerun_vl'] + df_data['bp_homerun_vl']) / 108) / 3)) raw_series = raw_data.apply(get_raw_hr, axis=1) rank_series = raw_series.rank(pct=True) raw_data['HR/9'] = round(rank_series * 100) log_time('end', 'Done HR/9 calcs', start_time=start_time) start_time = log_time('start', 'Beginning Rating calcs') def get_raw_rating(df_data): spow = df_data['starter_rating'] if pd.isna(df_data['starter_rating']) else -1 rpow = df_data['relief_rating'] if pd.isna(df_data['relief_rating']) else -1 if spow > rpow and spow >= 4: return ( ((df_data['H/9'] + df_data['K/9'] + df_data['BB/9'] + df_data['HR/9']) * 5) + (df_data['Fielding']) + (df_data['Stamina'] * 5) + (((df_data['Stuff L'] / 3) + (df_data['Stuff R'] * (2 / 3))) * 4) + (((df_data['Control L'] / 3) + (df_data['Control R'] * (2 / 3))) * 2) ) else: return ( ((df_data['H/9'] + df_data['K/9'] + df_data['BB/9'] + df_data['HR/9']) * 5) + (df_data['Fielding']) + (df_data['Stamina'] * 5) + (((df_data['Stuff L'] / 3) + (df_data['Stuff R'] * (2 / 3))) * 4) + (((df_data['Control L'] / 3) + (df_data['Control R'] * (2 / 3))) * 2) ) raw_series = raw_data.apply(get_raw_rating, axis=1) rank_series = raw_series.rank(pct=True) raw_data['Rating'] = round(rank_series * 100) output = raw_data[[ 'player_id', 'player_name', 'Rating', 'Control R', 'Control L', 'Stuff R', 'Stuff L', 'Stamina', 'Fielding', 'H/9', 'K/9', 'BB/9', 'HR/9', 'hand', 'cardset_name' ]] log_time('end', 'Done Rating calcs', start_time=start_time) start_time = log_time('start', 'Beginning write csv') csv_file = pd.DataFrame(output).to_csv(index=False) with open('scouting/pitching-basic.csv', 'w') as file: file.write(csv_file) log_time('end', 'Done writing to file', start_time=start_time) async def post_calc_ratings(pitching_dfs: pd.DataFrame): start_time = log_time('start', 'Beginning Ratings filtering') output = pitching_dfs first = ['player_id', 'player_name', 'cardset_name', 'rarity', 'hand', 'variant'] exclude = first + ['id_vl', 'id_vr', 'vs_hand_vl', 'vs_hand_vr'] output = output[first + [col for col in output.columns if col not in exclude]] log_time('end', 'Done filtering ratings', start_time=start_time) start_time = log_time('start', 'Beginning write to file') csv_file = pd.DataFrame(output).to_csv(index=False) with open('scouting/pitching-ratings.csv', 'w') as file: file.write(csv_file) log_time('end', 'Done writing to file', start_time=start_time) async def main(): start_time = log_time('start', 'Pulling scouting data') overall_start_time = start_time pitching_dfs = await get_scouting_dfs(range(1, 28)) print(f'Received {pitching_dfs} rows') log_time('end', 'Pulled scouting data', start_time=start_time) start_time = log_time('start', 'Beginning basic scouting') await post_calc_basic(copy.deepcopy(pitching_dfs)) log_time('end', 'Completed pitching scouting', start_time=start_time) start_time = log_time('start', 'Beginning ratings guide') await post_calc_ratings(copy.deepcopy(pitching_dfs)) log_time('end', 'Completed ratings guide', start_time=start_time) log_time('end', 'Total pitcher scouting', print_to_console=False, start_time=overall_start_time) print('All done with pitchers!') if __name__ == '__main__': asyncio.run(main())