import asyncio import copy from exceptions import logger from scouting_batters import log_time, fetch_data import pandas as pd async def get_scouting_dfs(cardset_id: list = None): cardset_params = [("cardset_id", x) for x in cardset_id] ratings_params = [ ("team_id", 31), ("ts", "s37136685556r6135248705"), *cardset_params, ] API_CALLS = [ ("pitchingcardratings", [("vs_hand", "vL"), *ratings_params]), ("pitchingcardratings", [("vs_hand", "vR"), *ratings_params]), ("cardpositions", [("position", "P"), *cardset_params]), ] start_time = log_time( "start", message="Pulling all pitching card ratings and positions" ) tasks = [fetch_data(params) for params in API_CALLS] api_data = await asyncio.gather(*tasks) log_time( "end", f"Pulled {api_data[0]['count'] + api_data[1]['count']} batting card ratings and {api_data[2]['count']} positions", start_time=start_time, ) start_time = log_time("start", message="Building base dataframes") vl_vals = api_data[0]["ratings"] for x in vl_vals: x.update(x["pitchingcard"]) x["player_id"] = x["pitchingcard"]["player"]["player_id"] x["player_name"] = x["pitchingcard"]["player"]["p_name"] x["rarity"] = x["pitchingcard"]["player"]["rarity"]["name"] x["cardset_id"] = x["pitchingcard"]["player"]["cardset"]["id"] x["cardset_name"] = x["pitchingcard"]["player"]["cardset"]["name"] x["starter_rating"] = x["pitchingcard"]["starter_rating"] x["relief_rating"] = x["pitchingcard"]["relief_rating"] x["closer_rating"] = x["pitchingcard"]["closer_rating"] del x["pitchingcard"], x["player"] vr_vals = api_data[1]["ratings"] for x in vr_vals: x["player_id"] = x["pitchingcard"]["player"]["player_id"] del x["pitchingcard"] vl = pd.DataFrame(vl_vals) vr = pd.DataFrame(vr_vals) pit_df = pd.merge(vl, vr, on="player_id", suffixes=("_vl", "_vr")).set_index( "player_id", drop=False ) log_time("end", "Base dataframes are complete", start_time=start_time) start_time = log_time("start", message="Building defense series") positions = api_data[2]["positions"] series_list = [ pd.Series( dict([(x["player"]["player_id"], x["range"]) for x in positions]), name="Range P", ), pd.Series( dict([(x["player"]["player_id"], x["error"]) for x in positions]), name="Error P", ), ] log_time("end", f"Processed {len(positions)} defense series", start_time=start_time) logger.info(f"series_list: {series_list}") return pit_df.join(series_list) async def post_calc_basic(pitching_dfs: pd.DataFrame): raw_data = pitching_dfs def get_raw_leftcontrol(df_data): return ((1 - (df_data["obp_vl"] - df_data["avg_vl"])) * 100) + ( 1 - (df_data["wild_pitch"] / 20) ) start_time = log_time("start", "Beginning Control L calcs") raw_series = raw_data.apply(get_raw_leftcontrol, axis=1) rank_series = raw_series.rank(pct=True) raw_data["Control L"] = round(rank_series * 100) log_time("end", "Done Control L calcs", start_time=start_time) start_time = log_time("start", "Beginning Control R calcs") def get_raw_rightcontrol(df_data): return ((1 - (df_data["obp_vr"] - df_data["avg_vr"])) * 100) + ( 1 - (df_data["wild_pitch"] / 20) ) raw_series = raw_data.apply(get_raw_rightcontrol, axis=1) rank_series = raw_series.rank(pct=True) raw_data["Control R"] = round(rank_series * 100) log_time("end", "Done Control R calcs", start_time=start_time) start_time = log_time("start", "Beginning Stuff L calcs") def get_raw_leftstuff(df_data): return 10 - ( df_data["slg_vl"] + df_data["slg_vl"] + ((df_data["homerun_vl"] + df_data["bp_homerun_vl"]) / 108) ) raw_series = raw_data.apply(get_raw_leftstuff, axis=1) rank_series = raw_series.rank(pct=True) raw_data["Stuff L"] = round(rank_series * 100) log_time("end", "Done Stuff L calcs", start_time=start_time) start_time = log_time("start", "Beginning Stuff R calcs") def get_raw_rightstuff(df_data): return 10 - ( df_data["slg_vr"] + df_data["slg_vr"] + ((df_data["homerun_vr"] + df_data["bp_homerun_vr"]) / 108) ) raw_series = raw_data.apply(get_raw_rightstuff, axis=1) rank_series = raw_series.rank(pct=True) raw_data["Stuff R"] = round(rank_series * 100) log_time("end", "Done Stuff R calcs", start_time=start_time) start_time = log_time("start", "Beginning Fielding calcs") def get_raw_fielding(df_data): return ((6 - df_data["Range P"]) * 10) + (50 - df_data["Error P"]) raw_series = raw_data.apply(get_raw_fielding, axis=1) rank_series = raw_series.rank(pct=True) logger.info(f"max fld: {raw_series.max()} / min fld: {raw_series.min()}") raw_data["Fielding"] = round(rank_series * 100) log_time("end", "Done Fielding calcs", start_time=start_time) start_time = log_time("start", "Beginning Stamina calcs") def get_raw_stamina(df_data): spow = df_data["starter_rating"] if pd.isna(df_data["starter_rating"]) else -1 rpow = df_data["relief_rating"] if pd.isna(df_data["relief_rating"]) else -1 this_pow = spow if spow > rpow else rpow return ( ( (this_pow * (df_data["obp_vr"] * (2 / 3))) + (this_pow * (df_data["obp_vl"] / 3)) ) * 4.5 ) + this_pow raw_series = raw_data.apply(get_raw_stamina, axis=1) rank_series = raw_series.rank(pct=True) raw_data["Stamina"] = round(rank_series * 100) log_time("end", "Done Stamina calcs", start_time=start_time) start_time = log_time("start", "Beginning H/9 calcs") def get_raw_hit(df_data): return 1 - (df_data["avg_vr"] * (2 / 3)) + (df_data["avg_vl"] / 3) raw_series = raw_data.apply(get_raw_hit, axis=1) rank_series = raw_series.rank(pct=True) raw_data["H/9"] = round(rank_series * 100) log_time("end", "Done H/9 calcs", start_time=start_time) start_time = log_time("start", "Beginning H/9 calcs") def get_raw_k(df_data): return ((df_data["strikeout_vr"] / 108) * (2 / 3)) + ( (df_data["strikeout_vl"] / 108) / 3 ) raw_series = raw_data.apply(get_raw_k, axis=1) rank_series = raw_series.rank(pct=True) raw_data["K/9"] = round(rank_series * 100) log_time("end", "Done H/9 calcs", start_time=start_time) start_time = log_time("start", "Beginning BB/9 calcs") def get_raw_bb(df_data): return ((df_data["walk_vr"] / 108) * (2 / 3)) + ((df_data["walk_vl"] / 108) / 3) raw_series = raw_data.apply(get_raw_bb, axis=1) rank_series = raw_series.rank(pct=True, ascending=False) raw_data["BB/9"] = round(rank_series * 100) log_time("end", "Done BB/9 calcs", start_time=start_time) start_time = log_time("start", "Beginning BB/9 calcs") def get_raw_hr(df_data): return 1 - ( (((df_data["homerun_vr"] + df_data["bp_homerun_vr"]) / 108) * (2 / 3)) + (((df_data["homerun_vl"] + df_data["bp_homerun_vl"]) / 108) / 3) ) raw_series = raw_data.apply(get_raw_hr, axis=1) rank_series = raw_series.rank(pct=True) raw_data["HR/9"] = round(rank_series * 100) log_time("end", "Done HR/9 calcs", start_time=start_time) start_time = log_time("start", "Beginning Rating calcs") def get_raw_rating(df_data): spow = df_data["starter_rating"] if pd.isna(df_data["starter_rating"]) else -1 rpow = df_data["relief_rating"] if pd.isna(df_data["relief_rating"]) else -1 if spow > rpow and spow >= 4: return ( ( ( df_data["H/9"] + df_data["K/9"] + df_data["BB/9"] + df_data["HR/9"] ) * 5 ) + (df_data["Fielding"]) + (df_data["Stamina"] * 5) + (((df_data["Stuff L"] / 3) + (df_data["Stuff R"] * (2 / 3))) * 4) + (((df_data["Control L"] / 3) + (df_data["Control R"] * (2 / 3))) * 2) ) else: return ( ( ( df_data["H/9"] + df_data["K/9"] + df_data["BB/9"] + df_data["HR/9"] ) * 5 ) + (df_data["Fielding"]) + (df_data["Stamina"] * 5) + (((df_data["Stuff L"] / 3) + (df_data["Stuff R"] * (2 / 3))) * 4) + (((df_data["Control L"] / 3) + (df_data["Control R"] * (2 / 3))) * 2) ) raw_series = raw_data.apply(get_raw_rating, axis=1) rank_series = raw_series.rank(pct=True) raw_data["Rating"] = round(rank_series * 100) output = raw_data[ [ "player_id", "player_name", "Rating", "Control R", "Control L", "Stuff R", "Stuff L", "Stamina", "Fielding", "H/9", "K/9", "BB/9", "HR/9", "hand", "cardset_name", ] ] log_time("end", "Done Rating calcs", start_time=start_time) start_time = log_time("start", "Beginning write csv") csv_file = pd.DataFrame(output).to_csv(index=False) with open("scouting/pitching-basic.csv", "w") as file: file.write(csv_file) log_time("end", "Done writing to file", start_time=start_time) async def post_calc_ratings(pitching_dfs: pd.DataFrame): start_time = log_time("start", "Beginning Ratings filtering") output = pitching_dfs first = ["player_id", "player_name", "cardset_name", "rarity", "hand", "variant"] exclude = first + ["id_vl", "id_vr", "vs_hand_vl", "vs_hand_vr"] output = output[first + [col for col in output.columns if col not in exclude]] log_time("end", "Done filtering ratings", start_time=start_time) start_time = log_time("start", "Beginning write to file") csv_file = pd.DataFrame(output).to_csv(index=False) with open("scouting/pitching-ratings.csv", "w") as file: file.write(csv_file) log_time("end", "Done writing to file", start_time=start_time) async def main(): start_time = log_time("start", "Pulling scouting data") overall_start_time = start_time pitching_dfs = await get_scouting_dfs([]) print(f"Received {pitching_dfs} rows") log_time("end", "Pulled scouting data", start_time=start_time) start_time = log_time("start", "Beginning basic scouting") await post_calc_basic(copy.deepcopy(pitching_dfs)) log_time("end", "Completed pitching scouting", start_time=start_time) start_time = log_time("start", "Beginning ratings guide") await post_calc_ratings(copy.deepcopy(pitching_dfs)) log_time("end", "Completed ratings guide", start_time=start_time) log_time( "end", "Total pitcher scouting", print_to_console=False, start_time=overall_start_time, ) print("All done with pitchers!") if __name__ == "__main__": asyncio.run(main())