Standardize formatting with black and apply ruff auto-fixes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
334 lines
11 KiB
Python
334 lines
11 KiB
Python
import asyncio
|
|
import copy
|
|
|
|
from exceptions import logger
|
|
from scouting_batters import log_time, fetch_data
|
|
|
|
import pandas as pd
|
|
|
|
|
|
async def get_scouting_dfs(cardset_id: list = None):
|
|
cardset_params = [("cardset_id", x) for x in cardset_id]
|
|
ratings_params = [
|
|
("team_id", 31),
|
|
("ts", "s37136685556r6135248705"),
|
|
*cardset_params,
|
|
]
|
|
API_CALLS = [
|
|
("pitchingcardratings", [("vs_hand", "vL"), *ratings_params]),
|
|
("pitchingcardratings", [("vs_hand", "vR"), *ratings_params]),
|
|
("cardpositions", [("position", "P"), *cardset_params]),
|
|
]
|
|
|
|
start_time = log_time(
|
|
"start", message="Pulling all pitching card ratings and positions"
|
|
)
|
|
|
|
tasks = [fetch_data(params) for params in API_CALLS]
|
|
api_data = await asyncio.gather(*tasks)
|
|
|
|
log_time(
|
|
"end",
|
|
f"Pulled {api_data[0]['count'] + api_data[1]['count']} batting card ratings and {api_data[2]['count']} positions",
|
|
start_time=start_time,
|
|
)
|
|
start_time = log_time("start", message="Building base dataframes")
|
|
|
|
vl_vals = api_data[0]["ratings"]
|
|
for x in vl_vals:
|
|
x.update(x["pitchingcard"])
|
|
x["player_id"] = x["pitchingcard"]["player"]["player_id"]
|
|
x["player_name"] = x["pitchingcard"]["player"]["p_name"]
|
|
x["rarity"] = x["pitchingcard"]["player"]["rarity"]["name"]
|
|
x["cardset_id"] = x["pitchingcard"]["player"]["cardset"]["id"]
|
|
x["cardset_name"] = x["pitchingcard"]["player"]["cardset"]["name"]
|
|
x["starter_rating"] = x["pitchingcard"]["starter_rating"]
|
|
x["relief_rating"] = x["pitchingcard"]["relief_rating"]
|
|
x["closer_rating"] = x["pitchingcard"]["closer_rating"]
|
|
del x["pitchingcard"], x["player"]
|
|
|
|
vr_vals = api_data[1]["ratings"]
|
|
for x in vr_vals:
|
|
x["player_id"] = x["pitchingcard"]["player"]["player_id"]
|
|
del x["pitchingcard"]
|
|
|
|
vl = pd.DataFrame(vl_vals)
|
|
vr = pd.DataFrame(vr_vals)
|
|
|
|
pit_df = pd.merge(vl, vr, on="player_id", suffixes=("_vl", "_vr")).set_index(
|
|
"player_id", drop=False
|
|
)
|
|
|
|
log_time("end", "Base dataframes are complete", start_time=start_time)
|
|
start_time = log_time("start", message="Building defense series")
|
|
|
|
positions = api_data[2]["positions"]
|
|
|
|
series_list = [
|
|
pd.Series(
|
|
dict([(x["player"]["player_id"], x["range"]) for x in positions]),
|
|
name="Range P",
|
|
),
|
|
pd.Series(
|
|
dict([(x["player"]["player_id"], x["error"]) for x in positions]),
|
|
name="Error P",
|
|
),
|
|
]
|
|
log_time("end", f"Processed {len(positions)} defense series", start_time=start_time)
|
|
logger.info(f"series_list: {series_list}")
|
|
|
|
return pit_df.join(series_list)
|
|
|
|
|
|
async def post_calc_basic(pitching_dfs: pd.DataFrame):
|
|
raw_data = pitching_dfs
|
|
|
|
def get_raw_leftcontrol(df_data):
|
|
return ((1 - (df_data["obp_vl"] - df_data["avg_vl"])) * 100) + (
|
|
1 - (df_data["wild_pitch"] / 20)
|
|
)
|
|
|
|
start_time = log_time("start", "Beginning Control L calcs")
|
|
|
|
raw_series = raw_data.apply(get_raw_leftcontrol, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data["Control L"] = round(rank_series * 100)
|
|
|
|
log_time("end", "Done Control L calcs", start_time=start_time)
|
|
start_time = log_time("start", "Beginning Control R calcs")
|
|
|
|
def get_raw_rightcontrol(df_data):
|
|
return ((1 - (df_data["obp_vr"] - df_data["avg_vr"])) * 100) + (
|
|
1 - (df_data["wild_pitch"] / 20)
|
|
)
|
|
|
|
raw_series = raw_data.apply(get_raw_rightcontrol, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data["Control R"] = round(rank_series * 100)
|
|
|
|
log_time("end", "Done Control R calcs", start_time=start_time)
|
|
start_time = log_time("start", "Beginning Stuff L calcs")
|
|
|
|
def get_raw_leftstuff(df_data):
|
|
return 10 - (
|
|
df_data["slg_vl"]
|
|
+ df_data["slg_vl"]
|
|
+ ((df_data["homerun_vl"] + df_data["bp_homerun_vl"]) / 108)
|
|
)
|
|
|
|
raw_series = raw_data.apply(get_raw_leftstuff, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data["Stuff L"] = round(rank_series * 100)
|
|
|
|
log_time("end", "Done Stuff L calcs", start_time=start_time)
|
|
start_time = log_time("start", "Beginning Stuff R calcs")
|
|
|
|
def get_raw_rightstuff(df_data):
|
|
return 10 - (
|
|
df_data["slg_vr"]
|
|
+ df_data["slg_vr"]
|
|
+ ((df_data["homerun_vr"] + df_data["bp_homerun_vr"]) / 108)
|
|
)
|
|
|
|
raw_series = raw_data.apply(get_raw_rightstuff, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data["Stuff R"] = round(rank_series * 100)
|
|
|
|
log_time("end", "Done Stuff R calcs", start_time=start_time)
|
|
start_time = log_time("start", "Beginning Fielding calcs")
|
|
|
|
def get_raw_fielding(df_data):
|
|
return ((6 - df_data["Range P"]) * 10) + (50 - df_data["Error P"])
|
|
|
|
raw_series = raw_data.apply(get_raw_fielding, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
logger.info(f"max fld: {raw_series.max()} / min fld: {raw_series.min()}")
|
|
raw_data["Fielding"] = round(rank_series * 100)
|
|
|
|
log_time("end", "Done Fielding calcs", start_time=start_time)
|
|
start_time = log_time("start", "Beginning Stamina calcs")
|
|
|
|
def get_raw_stamina(df_data):
|
|
spow = df_data["starter_rating"] if pd.isna(df_data["starter_rating"]) else -1
|
|
rpow = df_data["relief_rating"] if pd.isna(df_data["relief_rating"]) else -1
|
|
this_pow = spow if spow > rpow else rpow
|
|
|
|
return (
|
|
(
|
|
(this_pow * (df_data["obp_vr"] * (2 / 3)))
|
|
+ (this_pow * (df_data["obp_vl"] / 3))
|
|
)
|
|
* 4.5
|
|
) + this_pow
|
|
|
|
raw_series = raw_data.apply(get_raw_stamina, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data["Stamina"] = round(rank_series * 100)
|
|
|
|
log_time("end", "Done Stamina calcs", start_time=start_time)
|
|
start_time = log_time("start", "Beginning H/9 calcs")
|
|
|
|
def get_raw_hit(df_data):
|
|
return 1 - (df_data["avg_vr"] * (2 / 3)) + (df_data["avg_vl"] / 3)
|
|
|
|
raw_series = raw_data.apply(get_raw_hit, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data["H/9"] = round(rank_series * 100)
|
|
|
|
log_time("end", "Done H/9 calcs", start_time=start_time)
|
|
start_time = log_time("start", "Beginning H/9 calcs")
|
|
|
|
def get_raw_k(df_data):
|
|
return ((df_data["strikeout_vr"] / 108) * (2 / 3)) + (
|
|
(df_data["strikeout_vl"] / 108) / 3
|
|
)
|
|
|
|
raw_series = raw_data.apply(get_raw_k, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data["K/9"] = round(rank_series * 100)
|
|
|
|
log_time("end", "Done H/9 calcs", start_time=start_time)
|
|
start_time = log_time("start", "Beginning BB/9 calcs")
|
|
|
|
def get_raw_bb(df_data):
|
|
return ((df_data["walk_vr"] / 108) * (2 / 3)) + ((df_data["walk_vl"] / 108) / 3)
|
|
|
|
raw_series = raw_data.apply(get_raw_bb, axis=1)
|
|
rank_series = raw_series.rank(pct=True, ascending=False)
|
|
raw_data["BB/9"] = round(rank_series * 100)
|
|
|
|
log_time("end", "Done BB/9 calcs", start_time=start_time)
|
|
start_time = log_time("start", "Beginning BB/9 calcs")
|
|
|
|
def get_raw_hr(df_data):
|
|
return 1 - (
|
|
(((df_data["homerun_vr"] + df_data["bp_homerun_vr"]) / 108) * (2 / 3))
|
|
+ (((df_data["homerun_vl"] + df_data["bp_homerun_vl"]) / 108) / 3)
|
|
)
|
|
|
|
raw_series = raw_data.apply(get_raw_hr, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data["HR/9"] = round(rank_series * 100)
|
|
|
|
log_time("end", "Done HR/9 calcs", start_time=start_time)
|
|
start_time = log_time("start", "Beginning Rating calcs")
|
|
|
|
def get_raw_rating(df_data):
|
|
spow = df_data["starter_rating"] if pd.isna(df_data["starter_rating"]) else -1
|
|
rpow = df_data["relief_rating"] if pd.isna(df_data["relief_rating"]) else -1
|
|
|
|
if spow > rpow and spow >= 4:
|
|
return (
|
|
(
|
|
(
|
|
df_data["H/9"]
|
|
+ df_data["K/9"]
|
|
+ df_data["BB/9"]
|
|
+ df_data["HR/9"]
|
|
)
|
|
* 5
|
|
)
|
|
+ (df_data["Fielding"])
|
|
+ (df_data["Stamina"] * 5)
|
|
+ (((df_data["Stuff L"] / 3) + (df_data["Stuff R"] * (2 / 3))) * 4)
|
|
+ (((df_data["Control L"] / 3) + (df_data["Control R"] * (2 / 3))) * 2)
|
|
)
|
|
else:
|
|
return (
|
|
(
|
|
(
|
|
df_data["H/9"]
|
|
+ df_data["K/9"]
|
|
+ df_data["BB/9"]
|
|
+ df_data["HR/9"]
|
|
)
|
|
* 5
|
|
)
|
|
+ (df_data["Fielding"])
|
|
+ (df_data["Stamina"] * 5)
|
|
+ (((df_data["Stuff L"] / 3) + (df_data["Stuff R"] * (2 / 3))) * 4)
|
|
+ (((df_data["Control L"] / 3) + (df_data["Control R"] * (2 / 3))) * 2)
|
|
)
|
|
|
|
raw_series = raw_data.apply(get_raw_rating, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data["Rating"] = round(rank_series * 100)
|
|
|
|
output = raw_data[
|
|
[
|
|
"player_id",
|
|
"player_name",
|
|
"Rating",
|
|
"Control R",
|
|
"Control L",
|
|
"Stuff R",
|
|
"Stuff L",
|
|
"Stamina",
|
|
"Fielding",
|
|
"H/9",
|
|
"K/9",
|
|
"BB/9",
|
|
"HR/9",
|
|
"hand",
|
|
"cardset_name",
|
|
]
|
|
]
|
|
|
|
log_time("end", "Done Rating calcs", start_time=start_time)
|
|
start_time = log_time("start", "Beginning write csv")
|
|
|
|
csv_file = pd.DataFrame(output).to_csv(index=False)
|
|
with open("scouting/pitching-basic.csv", "w") as file:
|
|
file.write(csv_file)
|
|
|
|
log_time("end", "Done writing to file", start_time=start_time)
|
|
|
|
|
|
async def post_calc_ratings(pitching_dfs: pd.DataFrame):
|
|
start_time = log_time("start", "Beginning Ratings filtering")
|
|
|
|
output = pitching_dfs
|
|
first = ["player_id", "player_name", "cardset_name", "rarity", "hand", "variant"]
|
|
exclude = first + ["id_vl", "id_vr", "vs_hand_vl", "vs_hand_vr"]
|
|
output = output[first + [col for col in output.columns if col not in exclude]]
|
|
|
|
log_time("end", "Done filtering ratings", start_time=start_time)
|
|
start_time = log_time("start", "Beginning write to file")
|
|
|
|
csv_file = pd.DataFrame(output).to_csv(index=False)
|
|
with open("scouting/pitching-ratings.csv", "w") as file:
|
|
file.write(csv_file)
|
|
|
|
log_time("end", "Done writing to file", start_time=start_time)
|
|
|
|
|
|
async def main():
|
|
start_time = log_time("start", "Pulling scouting data")
|
|
overall_start_time = start_time
|
|
|
|
pitching_dfs = await get_scouting_dfs([])
|
|
print(f"Received {pitching_dfs} rows")
|
|
|
|
log_time("end", "Pulled scouting data", start_time=start_time)
|
|
start_time = log_time("start", "Beginning basic scouting")
|
|
|
|
await post_calc_basic(copy.deepcopy(pitching_dfs))
|
|
|
|
log_time("end", "Completed pitching scouting", start_time=start_time)
|
|
start_time = log_time("start", "Beginning ratings guide")
|
|
|
|
await post_calc_ratings(copy.deepcopy(pitching_dfs))
|
|
|
|
log_time("end", "Completed ratings guide", start_time=start_time)
|
|
log_time(
|
|
"end",
|
|
"Total pitcher scouting",
|
|
print_to_console=False,
|
|
start_time=overall_start_time,
|
|
)
|
|
print("All done with pitchers!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|