paper-dynasty-card-creation/defenders/calcs_defense.py
Cal Corum 0a17745389 Run black and ruff across entire codebase
Standardize formatting with black and apply ruff auto-fixes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 14:24:33 -05:00

823 lines
24 KiB
Python

import pandas as pd
import requests
from bs4 import BeautifulSoup
from typing import Literal
from db_calls import db_put
from exceptions import logger
async def create_positions(
all_stats: pd.DataFrame,
season_pct: float,
post_pos: bool,
df_c: pd.DataFrame,
df_1b: pd.DataFrame,
df_2b: pd.DataFrame,
df_3b: pd.DataFrame,
df_ss: pd.DataFrame,
df_lf: pd.DataFrame,
df_cf: pd.DataFrame,
df_rf: pd.DataFrame,
df_of: pd.DataFrame,
):
position_payload = []
def process_pos(df_data):
no_data = True
for pos_data in [(df_1b, "1b"), (df_2b, "2b"), (df_3b, "3b"), (df_ss, "ss")]:
if df_data["key_bbref"] in pos_data[0].index:
logger.info(f'Running {pos_data[1]} stats for {df_data["p_name"]}')
try:
average_range = (
int(pos_data[0].at[df_data["key_bbref"], "tz_runs_total"])
+ int(pos_data[0].at[df_data["key_bbref"], "bis_runs_total"])
+ min(
int(pos_data[0].at[df_data["key_bbref"], "tz_runs_total"]),
int(pos_data[0].at[df_data["key_bbref"], "bis_runs_total"]),
)
) / 3
position_payload.append(
{
"player_id": int(df_data["player_id"]),
"position": pos_data[1].upper(),
"innings": float(
pos_data[0].at[df_data["key_bbref"], "Inn_def"]
),
"range": get_if_range(
pos_code=pos_data[1],
tz_runs=round(average_range),
r_dp=0,
season_pct=season_pct,
),
"error": get_any_error(
pos_code=pos_data[1],
errors=int(
pos_data[0].at[df_data["key_bbref"], "E_def"]
),
chances=int(
pos_data[0].at[df_data["key_bbref"], "chances"]
),
season_pct=season_pct,
),
}
)
no_data = False
except Exception as e:
logger.info(f"Infield position failed: {e}")
of_arms = []
of_payloads = []
for pos_data in [(df_lf, "lf"), (df_cf, "cf"), (df_rf, "rf")]:
if df_data["key_bbref"] in pos_data[0].index:
try:
average_range = (
int(pos_data[0].at[df_data["key_bbref"], "tz_runs_total"])
+ int(pos_data[0].at[df_data["key_bbref"], "bis_runs_total"])
+ min(
int(pos_data[0].at[df_data["key_bbref"], "tz_runs_total"]),
int(pos_data[0].at[df_data["key_bbref"], "bis_runs_total"]),
)
) / 3
of_payloads.append(
{
"player_id": int(df_data["player_id"]),
"position": pos_data[1].upper(),
"innings": float(
pos_data[0].at[df_data["key_bbref"], "Inn_def"]
),
"range": get_of_range(
pos_code=pos_data[1],
tz_runs=round(average_range),
season_pct=season_pct,
),
}
)
of_arms.append(
int(pos_data[0].at[df_data["key_bbref"], "bis_runs_outfield"])
)
no_data = False
except Exception as e:
logger.info(f"Outfield position failed: {e}")
if (
df_data["key_bbref"] in df_of.index
and len(of_arms) > 0
and len(of_payloads) > 0
):
try:
error_rating = get_any_error(
pos_code=pos_data[1],
errors=int(df_of.at[df_data["key_bbref"], "E_def"]),
chances=int(df_of.at[df_data["key_bbref"], "chances"]),
season_pct=season_pct,
)
arm_rating = arm_outfield(of_arms)
for f in of_payloads:
f["error"] = error_rating
f["arm"] = arm_rating
position_payload.append(f)
no_data = False
except Exception as e:
logger.info(f"Outfield position failed: {e}")
if df_data["key_bbref"] in df_c.index:
try:
if (
df_c.at[df_data["key_bbref"], "SB"]
+ df_c.at[df_data["key_bbref"], "CS"]
== 0
):
arm_rating = 3
else:
arm_rating = arm_catcher(
cs_pct=df_c.at[df_data["key_bbref"], "caught_stealing_perc"],
raa=int(df_c.at[df_data["key_bbref"], "bis_runs_catcher_sb"]),
season_pct=season_pct,
)
position_payload.append(
{
"player_id": int(df_data["player_id"]),
"position": "C",
"innings": float(df_c.at[df_data["key_bbref"], "Inn_def"]),
"range": range_catcher(
rs_value=int(
df_c.at[df_data["key_bbref"], "tz_runs_catcher"]
),
season_pct=season_pct,
),
"error": get_any_error(
pos_code="c",
errors=int(df_c.at[df_data["key_bbref"], "E_def"]),
chances=int(df_c.at[df_data["key_bbref"], "chances"]),
season_pct=season_pct,
),
"arm": arm_rating,
"pb": pb_catcher(
pb=int(df_c.at[df_data["key_bbref"], "PB"]),
innings=int(
float(df_c.at[df_data["key_bbref"], "Inn_def"])
),
season_pct=season_pct,
),
"overthrow": ot_catcher(
errors=int(df_c.at[df_data["key_bbref"], "E_def"]),
chances=int(df_c.at[df_data["key_bbref"], "chances"]),
season_pct=season_pct,
),
}
)
no_data = False
except Exception as e:
logger.info(f"Catcher position failed: {e}")
if no_data:
position_payload.append(
{
"player_id": int(df_data["player_id"]),
"position": "DH",
"innings": df_data["PA_vL"] + df_data["PA_vR"],
}
)
print("Calculating fielding lines now...")
all_stats.apply(process_pos, axis=1)
print("Fielding is complete.\n\nPosting positions now...")
if post_pos:
resp = await db_put(
"cardpositions", payload={"positions": position_payload}, timeout=30
)
print(f"Response: {resp}\n")
return len(position_payload)
def range_pitcher(
rs_value: int = None, rf_per9_value: float = None, season_pct: float = 1.0
):
if rs_value is None and rf_per9_value is None:
raise KeyError(
"Neither rs nor rf value was provided to calculate pitching range"
)
if rs_value is not None:
if rs_value >= (3 * season_pct):
return 1
elif rs_value >= (1 * season_pct):
return 2
elif rs_value >= (0 * season_pct):
return 3
elif rs_value >= (-2 * season_pct):
return 4
else:
return 5
else:
if rf_per9_value is not None:
if rf_per9_value >= 2.61:
return 1
elif rf_per9_value >= 2.18:
return 2
elif rf_per9_value <= 1.0:
return 5
elif rf_per9_value <= 1.50:
return 4
else:
return 3
def range_catcher(rs_value: int, season_pct: float):
if rs_value >= 7 * season_pct:
return 1
elif rs_value >= 3 * season_pct:
return 2
elif rs_value >= -1 * season_pct:
return 3
elif rs_value >= -5 * season_pct:
return 4
else:
return 5
def range_first_base(tz_runs: int, r_dp: int, season_pct: float):
if (tz_runs + r_dp) >= max(6 * season_pct, 2):
return 1
elif (tz_runs + r_dp) >= max(2 * season_pct, 1):
return 2
elif (tz_runs + r_dp) >= min(-1 * season_pct, -1):
return 3
elif (tz_runs + r_dp) >= min(-3 * season_pct, -3):
return 4
else:
return 5
def range_second_base(tz_runs: int, r_dp: int, season_pct: float):
if (tz_runs + r_dp) >= max(6 * season_pct, 2):
return 1
elif (tz_runs + r_dp) >= max(2 * season_pct, 1):
return 2
elif (tz_runs + r_dp) >= min(-1 * season_pct, -1):
return 3
elif (tz_runs + r_dp) >= min(-3 * season_pct, -3):
return 4
else:
return 5
def range_third_base(tz_runs: int, r_dp: int, season_pct: float):
if (tz_runs + r_dp) >= max(6 * season_pct, 2):
return 1
elif (tz_runs + r_dp) >= max(2 * season_pct, 1):
return 2
elif (tz_runs + r_dp) >= min(-1 * season_pct, -1):
return 3
elif (tz_runs + r_dp) >= min(-3 * season_pct, -3):
return 4
else:
return 5
def range_shortstop(tz_runs: int, r_dp: int, season_pct: float):
if (tz_runs + r_dp) >= max(8 * season_pct, 2):
return 1
elif (tz_runs + r_dp) >= max(2 * season_pct, 1):
return 2
elif (tz_runs + r_dp) >= min(-1 * season_pct, -1):
return 3
elif (tz_runs + r_dp) >= min(-3 * season_pct, -3):
return 4
else:
return 5
def get_if_range(pos_code: str, tz_runs: int, r_dp: int, season_pct: float):
logger.info(f"pos: {pos_code} / tz_runs: {tz_runs} ({type(tz_runs)})")
if pos_code == "1b":
return range_first_base(tz_runs, 0, season_pct)
elif pos_code == "2b":
return range_second_base(tz_runs, 0, season_pct)
elif pos_code == "3b":
return range_third_base(tz_runs, 0, season_pct)
elif pos_code == "ss":
return range_shortstop(tz_runs, 0, season_pct)
else:
raise ValueError(
f"get_if_range - pos_code must be one of 1b, 2b, 3b, ss / {pos_code} not valid"
)
def range_center_field(drs: int, season_pct: float):
if drs >= 9 * season_pct:
return 1
elif drs >= 3 * season_pct:
return 2
elif drs >= -1 * season_pct:
return 3
elif drs >= -4 * season_pct:
return 4
else:
return 5
def range_left_field(drs: int, season_pct: float):
return range_center_field(drs, season_pct)
def range_right_field(drs: int, season_pct: float):
return range_center_field(drs, season_pct)
def get_of_range(pos_code: str, tz_runs: int, season_pct: float):
logger.info(f"pos: {pos_code} / tz_runs: {tz_runs}")
if pos_code == "lf":
return range_left_field(tz_runs, season_pct)
elif pos_code == "cf":
return range_center_field(tz_runs, season_pct)
else:
return range_right_field(tz_runs, season_pct)
def valid_error_ratings(err_num: int, position: str) -> int:
if position.lower() == "p":
valid_err = [
0,
4,
6,
7,
8,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
26,
27,
28,
30,
31,
33,
34,
35,
36,
38,
39,
40,
42,
43,
44,
46,
47,
48,
50,
51,
]
elif position.lower() == "c":
valid_err = list(range(17))
elif position.lower() == "1b":
valid_err = list(range(31))
elif position.lower() == "2b":
valid_err = [
0,
1,
2,
3,
4,
5,
6,
8,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
32,
34,
37,
39,
41,
44,
47,
50,
53,
56,
59,
62,
65,
68,
71,
]
elif position.lower() == "3b":
valid_err = [
0,
1,
2,
3,
4,
5,
6,
8,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
37,
39,
41,
44,
47,
50,
53,
56,
59,
62,
65,
]
elif position.lower() == "ss":
valid_err = [
0,
1,
2,
3,
4,
5,
6,
7,
8,
10,
12,
14,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
36,
38,
40,
42,
44,
48,
52,
56,
60,
64,
68,
72,
]
# Outfielders
else:
valid_err = list(range(26))
if err_num in valid_err:
return err_num
elif err_num > valid_err[len(valid_err) - 1]:
return valid_err[len(valid_err) - 1]
else:
for x in valid_err:
if err_num <= x:
return x
def raw_error(errors: int, chances: int, season_pct: float, chance_max: int):
if errors == 0 or chances == 0:
return 0
# c_max = max(round(chance_max * season_pct), 1)
c_max = chance_max
return errors * c_max / chances
def error_pitcher(errors: int, chances: int, season_pct: float):
return valid_error_ratings(int(raw_error(errors, chances, season_pct, 300)), "p")
def error_catcher(errors: int, chances: int, season_pct: float):
return valid_error_ratings(int(raw_error(errors, chances, season_pct, 500)), "c")
def error_first_base(errors: int, chances: int, season_pct: float):
return valid_error_ratings(int(raw_error(errors, chances, season_pct, 1300)), "1b")
def error_second_base(errors: int, chances: int, season_pct: float):
return valid_error_ratings(int(raw_error(errors, chances, season_pct, 700)), "2b")
def error_third_base(errors: int, chances: int, season_pct: float):
return valid_error_ratings(int(raw_error(errors, chances, season_pct, 500)), "3b")
def error_shortstop(errors: int, chances: int, season_pct: float):
return valid_error_ratings(int(raw_error(errors, chances, season_pct, 700)), "ss")
def error_outfield(errors: int, chances: int, season_pct: float):
return valid_error_ratings(int(raw_error(errors, chances, season_pct, 250)), "of")
def get_any_error(pos_code: str, errors: int, chances: int, season_pct: float):
if pos_code.lower() == "p":
return error_pitcher(errors, chances, season_pct)
elif pos_code.lower() == "c":
return error_catcher(errors, chances, season_pct)
elif pos_code.lower() == "1b":
return error_first_base(errors, chances, season_pct)
elif pos_code.lower() == "2b":
return error_second_base(errors, chances, season_pct)
elif pos_code.lower() == "3b":
return error_third_base(errors, chances, season_pct)
elif pos_code.lower() == "ss":
return error_shortstop(errors, chances, season_pct)
elif pos_code.lower() in ["lf", "cf", "rf", "of"]:
return error_outfield(errors, chances, season_pct)
def arm_outfield(all_arms: list):
if not all_arms:
return 5
# Thresholds adjusted for tz_runs_total scale (ranges ~-8 to +23)
# Note: These thresholds are for tz_runs_total, not bis_runs_outfield
# 2005 data: 23 (Edmonds), 21 (Crawford), 19 (Crisp/Clark/A.Jones), 18, 17...
if max(all_arms) > 22:
return -6 # Only 1 player (Jim Edmonds: 23)
elif max(all_arms) > 19:
return -5 # No more than 3 players (Carl Crawford: 21)
elif max(all_arms) > 16:
return -4 # Very good arms (19s, 18s, 17s)
elif max(all_arms) > 12:
return -3 # Good arms (14, 13)
elif max(all_arms) > 8:
return -2 # Above average (11, 10, 9)
elif max(all_arms) > 4:
return -1 # Average (8, 7, 6, 5)
elif max(all_arms) > 0:
return 0 # Below average (4, 3, 2, 1)
elif max(all_arms) > -4:
return 1 # Poor arm (0, -1, -2, -3)
else:
return 2 # Very poor arm (-4 and below)
def arm_catcher(cs_pct: str, raa: int, season_pct: float) -> int:
if cs_pct == "":
return 3
cs_pct = float(cs_pct.strip("%")) / 100
if raa > 5 * season_pct:
max_arm = -4
elif raa > 2 * season_pct:
max_arm = -2
elif raa > -1 * season_pct:
max_arm = 0
elif raa > -2 * season_pct:
max_arm = 3
else:
max_arm = 5
if cs_pct > 0.6:
raw_arm = -5
elif cs_pct > 0.5:
raw_arm = -4
elif cs_pct > 0.4:
raw_arm = -3
elif cs_pct > 0.3:
raw_arm = -2
elif cs_pct > 0.25:
raw_arm = -1
elif cs_pct > 0.2:
raw_arm = 0
elif cs_pct > 0.16:
raw_arm = 1
elif cs_pct > 0.12:
raw_arm = 2
elif cs_pct > 0.1:
raw_arm = 3
elif cs_pct > 0.05:
raw_arm = 4
else:
raw_arm = 5
return int(min(max_arm, raw_arm))
def pb_catcher(pb: int, innings: int, season_pct: float):
if pb == 0 or innings == 0:
return 0
return int(abs(min(pb * 1000 * season_pct / innings, 20)))
def ot_catcher(errors: int, chances: int, season_pct: float):
if errors == 0 or chances == 0:
return 0
c_max = 3000 * season_pct
return int(min(errors * c_max / chances / 3, 20))
def hold_pitcher(raw_cs: str, picks: int, season_pct: float) -> str:
# Pickoff bonus (improves hold by 1-3 points)
if picks > 8 * season_pct:
pick_bonus = 3
elif picks > 5 * season_pct:
pick_bonus = 2
elif picks > 2 * season_pct:
pick_bonus = 1
else:
pick_bonus = 0
# No CS data: default to +2, pickoff bonus can improve to -1 at best
if raw_cs == "":
return max(2 - pick_bonus, -1)
# Base hold rating from caught stealing percentage
cs_pct = float(raw_cs.strip("%")) / 100
if cs_pct > 0.667:
hold_num = -5
elif cs_pct > 0.6:
hold_num = -4
elif cs_pct > 0.48:
hold_num = -3
elif cs_pct > 0.34:
hold_num = -2
elif cs_pct > 0.26:
hold_num = -1
elif cs_pct > 0.22:
hold_num = 0
elif cs_pct > 0.2:
hold_num = 1
elif cs_pct > 0.18:
hold_num = 3
elif cs_pct > 0.16:
hold_num = 4
elif cs_pct > 0.14:
hold_num = 5
elif cs_pct > 0.12:
hold_num = 6
elif cs_pct > 0.1:
hold_num = 7
elif cs_pct > 0.06:
hold_num = 8
else:
hold_num = 9
# Apply pickoff bonus (lower = better), cap at -5
final_hold = max(hold_num - pick_bonus, -5)
return final_hold
def pow_ratings(innings: float, gs: int, games: int) -> tuple[int, int]:
try:
games = int(games)
except ValueError:
logger.error(f"Could not read Pitcher Games: {games} / setting to 0")
games = 0
try:
gs = int(gs)
except ValueError:
logger.error(f"Could not read Pitcher GS: {gs} / setting to 0")
gs = 0
if innings <= 1 or games <= 1:
return 1, 1
s_innings = int(innings * gs / games)
r_innings = int(innings * (games - gs) / games)
if gs == 0:
s_pow = 1
else:
s_pow = max(round(s_innings / gs), 1)
if r_innings == 0:
r_pow = 1
else:
r_pow = max(round(r_innings / (games - gs)), 1)
if r_innings / max(s_innings, 1) < 0.1:
r_pow = 1
elif r_pow >= s_pow > 1:
r_pow = s_pow - 1
return s_pow, r_pow
def innings_float(innings: str) -> float:
if "." in innings:
whole, decimal = innings.split(".")
else:
whole = innings
decimal = "0"
return float(int(whole) + int(decimal) * 0.333)
# Get position stats into dataframes
def get_bbref_fielding_df(
position: Literal["p", "c", "1b", "2b", "3b", "ss", "lf", "cf", "rf", "of"],
s_num: int,
):
url = f"https://www.baseball-reference.com/leagues/majors/{s_num}-specialpos_{position}-fielding.shtml"
soup = BeautifulSoup(requests.get(url).text, "html.parser")
table = soup.find("table", {"id": "players_players_standard_fielding_fielding"})
headers = []
data = []
indeces = []
for row in table.find_all("tr"):
row_data = []
col_names = []
for cell in row.find_all("td"):
if cell.has_attr("data-append-csv"):
player_id = cell["data-append-csv"]
row_data.append(player_id)
if len(headers) == 0:
col_names.append("key_bbref")
# try:
# player_id = cell['data-append-csv']
# row_data.append(player_id)
# if len(headers) == 0:
# col_names.append('key_bbref')
# except Exception as e:
# pass
row_data.append(cell.text)
if len(headers) == 0:
col_names.append(cell["data-stat"])
if len(row_data) > 0:
data.append(row_data)
indeces.append(row_data[0])
if len(headers) == 0:
headers.extend(col_names)
pos_frame = pd.DataFrame(data, index=indeces, columns=headers).query(
"key_bbref == key_bbref"
)
if position == "p":
return pos_frame.drop_duplicates(subset=["key_bbref"], keep="first")
tmp = pos_frame[~pos_frame["chances"].isin(["0", "1", "2"])]
return tmp.drop_duplicates(subset=["key_bbref"], keep="first")