paper-dynasty-card-creation/scouting_pitchers.py
Cal Corum 0a17745389 Run black and ruff across entire codebase
Standardize formatting with black and apply ruff auto-fixes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 14:24:33 -05:00

334 lines
11 KiB
Python

import asyncio
import copy
from exceptions import logger
from scouting_batters import log_time, fetch_data
import pandas as pd
async def get_scouting_dfs(cardset_id: list = None):
cardset_params = [("cardset_id", x) for x in cardset_id]
ratings_params = [
("team_id", 31),
("ts", "s37136685556r6135248705"),
*cardset_params,
]
API_CALLS = [
("pitchingcardratings", [("vs_hand", "vL"), *ratings_params]),
("pitchingcardratings", [("vs_hand", "vR"), *ratings_params]),
("cardpositions", [("position", "P"), *cardset_params]),
]
start_time = log_time(
"start", message="Pulling all pitching card ratings and positions"
)
tasks = [fetch_data(params) for params in API_CALLS]
api_data = await asyncio.gather(*tasks)
log_time(
"end",
f"Pulled {api_data[0]['count'] + api_data[1]['count']} batting card ratings and {api_data[2]['count']} positions",
start_time=start_time,
)
start_time = log_time("start", message="Building base dataframes")
vl_vals = api_data[0]["ratings"]
for x in vl_vals:
x.update(x["pitchingcard"])
x["player_id"] = x["pitchingcard"]["player"]["player_id"]
x["player_name"] = x["pitchingcard"]["player"]["p_name"]
x["rarity"] = x["pitchingcard"]["player"]["rarity"]["name"]
x["cardset_id"] = x["pitchingcard"]["player"]["cardset"]["id"]
x["cardset_name"] = x["pitchingcard"]["player"]["cardset"]["name"]
x["starter_rating"] = x["pitchingcard"]["starter_rating"]
x["relief_rating"] = x["pitchingcard"]["relief_rating"]
x["closer_rating"] = x["pitchingcard"]["closer_rating"]
del x["pitchingcard"], x["player"]
vr_vals = api_data[1]["ratings"]
for x in vr_vals:
x["player_id"] = x["pitchingcard"]["player"]["player_id"]
del x["pitchingcard"]
vl = pd.DataFrame(vl_vals)
vr = pd.DataFrame(vr_vals)
pit_df = pd.merge(vl, vr, on="player_id", suffixes=("_vl", "_vr")).set_index(
"player_id", drop=False
)
log_time("end", "Base dataframes are complete", start_time=start_time)
start_time = log_time("start", message="Building defense series")
positions = api_data[2]["positions"]
series_list = [
pd.Series(
dict([(x["player"]["player_id"], x["range"]) for x in positions]),
name="Range P",
),
pd.Series(
dict([(x["player"]["player_id"], x["error"]) for x in positions]),
name="Error P",
),
]
log_time("end", f"Processed {len(positions)} defense series", start_time=start_time)
logger.info(f"series_list: {series_list}")
return pit_df.join(series_list)
async def post_calc_basic(pitching_dfs: pd.DataFrame):
raw_data = pitching_dfs
def get_raw_leftcontrol(df_data):
return ((1 - (df_data["obp_vl"] - df_data["avg_vl"])) * 100) + (
1 - (df_data["wild_pitch"] / 20)
)
start_time = log_time("start", "Beginning Control L calcs")
raw_series = raw_data.apply(get_raw_leftcontrol, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data["Control L"] = round(rank_series * 100)
log_time("end", "Done Control L calcs", start_time=start_time)
start_time = log_time("start", "Beginning Control R calcs")
def get_raw_rightcontrol(df_data):
return ((1 - (df_data["obp_vr"] - df_data["avg_vr"])) * 100) + (
1 - (df_data["wild_pitch"] / 20)
)
raw_series = raw_data.apply(get_raw_rightcontrol, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data["Control R"] = round(rank_series * 100)
log_time("end", "Done Control R calcs", start_time=start_time)
start_time = log_time("start", "Beginning Stuff L calcs")
def get_raw_leftstuff(df_data):
return 10 - (
df_data["slg_vl"]
+ df_data["slg_vl"]
+ ((df_data["homerun_vl"] + df_data["bp_homerun_vl"]) / 108)
)
raw_series = raw_data.apply(get_raw_leftstuff, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data["Stuff L"] = round(rank_series * 100)
log_time("end", "Done Stuff L calcs", start_time=start_time)
start_time = log_time("start", "Beginning Stuff R calcs")
def get_raw_rightstuff(df_data):
return 10 - (
df_data["slg_vr"]
+ df_data["slg_vr"]
+ ((df_data["homerun_vr"] + df_data["bp_homerun_vr"]) / 108)
)
raw_series = raw_data.apply(get_raw_rightstuff, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data["Stuff R"] = round(rank_series * 100)
log_time("end", "Done Stuff R calcs", start_time=start_time)
start_time = log_time("start", "Beginning Fielding calcs")
def get_raw_fielding(df_data):
return ((6 - df_data["Range P"]) * 10) + (50 - df_data["Error P"])
raw_series = raw_data.apply(get_raw_fielding, axis=1)
rank_series = raw_series.rank(pct=True)
logger.info(f"max fld: {raw_series.max()} / min fld: {raw_series.min()}")
raw_data["Fielding"] = round(rank_series * 100)
log_time("end", "Done Fielding calcs", start_time=start_time)
start_time = log_time("start", "Beginning Stamina calcs")
def get_raw_stamina(df_data):
spow = df_data["starter_rating"] if pd.isna(df_data["starter_rating"]) else -1
rpow = df_data["relief_rating"] if pd.isna(df_data["relief_rating"]) else -1
this_pow = spow if spow > rpow else rpow
return (
(
(this_pow * (df_data["obp_vr"] * (2 / 3)))
+ (this_pow * (df_data["obp_vl"] / 3))
)
* 4.5
) + this_pow
raw_series = raw_data.apply(get_raw_stamina, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data["Stamina"] = round(rank_series * 100)
log_time("end", "Done Stamina calcs", start_time=start_time)
start_time = log_time("start", "Beginning H/9 calcs")
def get_raw_hit(df_data):
return 1 - (df_data["avg_vr"] * (2 / 3)) + (df_data["avg_vl"] / 3)
raw_series = raw_data.apply(get_raw_hit, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data["H/9"] = round(rank_series * 100)
log_time("end", "Done H/9 calcs", start_time=start_time)
start_time = log_time("start", "Beginning H/9 calcs")
def get_raw_k(df_data):
return ((df_data["strikeout_vr"] / 108) * (2 / 3)) + (
(df_data["strikeout_vl"] / 108) / 3
)
raw_series = raw_data.apply(get_raw_k, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data["K/9"] = round(rank_series * 100)
log_time("end", "Done H/9 calcs", start_time=start_time)
start_time = log_time("start", "Beginning BB/9 calcs")
def get_raw_bb(df_data):
return ((df_data["walk_vr"] / 108) * (2 / 3)) + ((df_data["walk_vl"] / 108) / 3)
raw_series = raw_data.apply(get_raw_bb, axis=1)
rank_series = raw_series.rank(pct=True, ascending=False)
raw_data["BB/9"] = round(rank_series * 100)
log_time("end", "Done BB/9 calcs", start_time=start_time)
start_time = log_time("start", "Beginning BB/9 calcs")
def get_raw_hr(df_data):
return 1 - (
(((df_data["homerun_vr"] + df_data["bp_homerun_vr"]) / 108) * (2 / 3))
+ (((df_data["homerun_vl"] + df_data["bp_homerun_vl"]) / 108) / 3)
)
raw_series = raw_data.apply(get_raw_hr, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data["HR/9"] = round(rank_series * 100)
log_time("end", "Done HR/9 calcs", start_time=start_time)
start_time = log_time("start", "Beginning Rating calcs")
def get_raw_rating(df_data):
spow = df_data["starter_rating"] if pd.isna(df_data["starter_rating"]) else -1
rpow = df_data["relief_rating"] if pd.isna(df_data["relief_rating"]) else -1
if spow > rpow and spow >= 4:
return (
(
(
df_data["H/9"]
+ df_data["K/9"]
+ df_data["BB/9"]
+ df_data["HR/9"]
)
* 5
)
+ (df_data["Fielding"])
+ (df_data["Stamina"] * 5)
+ (((df_data["Stuff L"] / 3) + (df_data["Stuff R"] * (2 / 3))) * 4)
+ (((df_data["Control L"] / 3) + (df_data["Control R"] * (2 / 3))) * 2)
)
else:
return (
(
(
df_data["H/9"]
+ df_data["K/9"]
+ df_data["BB/9"]
+ df_data["HR/9"]
)
* 5
)
+ (df_data["Fielding"])
+ (df_data["Stamina"] * 5)
+ (((df_data["Stuff L"] / 3) + (df_data["Stuff R"] * (2 / 3))) * 4)
+ (((df_data["Control L"] / 3) + (df_data["Control R"] * (2 / 3))) * 2)
)
raw_series = raw_data.apply(get_raw_rating, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data["Rating"] = round(rank_series * 100)
output = raw_data[
[
"player_id",
"player_name",
"Rating",
"Control R",
"Control L",
"Stuff R",
"Stuff L",
"Stamina",
"Fielding",
"H/9",
"K/9",
"BB/9",
"HR/9",
"hand",
"cardset_name",
]
]
log_time("end", "Done Rating calcs", start_time=start_time)
start_time = log_time("start", "Beginning write csv")
csv_file = pd.DataFrame(output).to_csv(index=False)
with open("scouting/pitching-basic.csv", "w") as file:
file.write(csv_file)
log_time("end", "Done writing to file", start_time=start_time)
async def post_calc_ratings(pitching_dfs: pd.DataFrame):
start_time = log_time("start", "Beginning Ratings filtering")
output = pitching_dfs
first = ["player_id", "player_name", "cardset_name", "rarity", "hand", "variant"]
exclude = first + ["id_vl", "id_vr", "vs_hand_vl", "vs_hand_vr"]
output = output[first + [col for col in output.columns if col not in exclude]]
log_time("end", "Done filtering ratings", start_time=start_time)
start_time = log_time("start", "Beginning write to file")
csv_file = pd.DataFrame(output).to_csv(index=False)
with open("scouting/pitching-ratings.csv", "w") as file:
file.write(csv_file)
log_time("end", "Done writing to file", start_time=start_time)
async def main():
start_time = log_time("start", "Pulling scouting data")
overall_start_time = start_time
pitching_dfs = await get_scouting_dfs([])
print(f"Received {pitching_dfs} rows")
log_time("end", "Pulled scouting data", start_time=start_time)
start_time = log_time("start", "Beginning basic scouting")
await post_calc_basic(copy.deepcopy(pitching_dfs))
log_time("end", "Completed pitching scouting", start_time=start_time)
start_time = log_time("start", "Beginning ratings guide")
await post_calc_ratings(copy.deepcopy(pitching_dfs))
log_time("end", "Completed ratings guide", start_time=start_time)
log_time(
"end",
"Total pitcher scouting",
print_to_console=False,
start_time=overall_start_time,
)
print("All done with pitchers!")
if __name__ == "__main__":
asyncio.run(main())