paper-dynasty-card-creation/pitchers/calcs_pitcher.py
Cal Corum db3822565c Add offense_col resolver for retrosheet pipeline to fix 883 silent KeyErrors
The FullCard migration requires offense_col and player_id on each player's
DataFrame row. The retrosheet pipeline calculates ratings before posting,
so both fields were missing — causing silent card layout builder failures.

Adds a three-tier resolution: CSV cache → API bulk fetch → deterministic
hash fallback. Also includes player_id fallback in both calcs modules.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 15:37:59 -06:00

378 lines
11 KiB
Python

import math
from creation_helpers import mround, sanitize_chance_output
from typing import List
from exceptions import logger
from pitchers.models import PitchingCardRatingsModel
from pitchers.card_builder import build_pitcher_full_cards
def get_pitcher_ratings(df_data) -> List[dict]:
# Calculate OB values with min cap (ensure scalar values for comparison)
ob_vl = float(108 * (df_data["BB_vL"] + df_data["HBP_vL"]) / df_data["TBF_vL"])
ob_vr = float(108 * (df_data["BB_vR"] + df_data["HBP_vR"]) / df_data["TBF_vR"])
vl = PitchingCardRatingsModel(
pitchingcard_id=df_data.pitchingcard_id,
pit_hand=df_data.pitch_hand,
vs_hand="L",
all_hits=sanitize_chance_output(
(df_data["AVG_vL"] - 0.05) * 108
), # Subtracting chances from BP results
all_other_ob=sanitize_chance_output(min(ob_vl, 0.8)),
hard_rate=df_data["Hard%_vL"],
med_rate=df_data["Med%_vL"],
soft_rate=df_data["Soft%_vL"],
)
vr = PitchingCardRatingsModel(
pitchingcard_id=df_data.pitchingcard_id,
pit_hand=df_data.pitch_hand,
vs_hand="R",
all_hits=sanitize_chance_output(
(df_data["AVG_vR"] - 0.05) * 108
), # Subtracting chances from BP results
all_other_ob=sanitize_chance_output(min(ob_vr, 0.8)),
hard_rate=df_data["Hard%_vR"],
med_rate=df_data["Med%_vR"],
soft_rate=df_data["Soft%_vR"],
)
vl.all_outs = mround(108 - vl.all_hits - vl.all_other_ob, base=0.5)
vr.all_outs = mround(108 - vr.all_hits - vr.all_other_ob, base=0.5)
logger.info(
f"vL - All Hits: {vl.all_hits} / Other OB: {vl.all_other_ob} / All Outs: {vl.all_outs} "
f"/ Total: {vl.total_chances()}"
)
logger.info(
f"vR - All Hits: {vr.all_hits} / Other OB: {vr.all_other_ob} / All Outs: {vr.all_outs} "
f"/ Total: {vr.total_chances()}"
)
vl.calculate_singles(
df_data["H_vL"],
df_data["H_vL"] - df_data["2B_vL"] - df_data["3B_vL"] - df_data["HR_vL"],
)
vr.calculate_singles(
df_data["H_vR"],
df_data["H_vR"] - df_data["2B_vR"] - df_data["3B_vR"] - df_data["HR_vR"],
)
logger.info(
f"vL: All Hits: {vl.all_hits} / BP Singles: {vl.bp_single} / Single 2: {vl.single_two} / "
f"Single 1: {vl.single_one} / Single CF: {vl.single_center}"
)
logger.info(
f"vR: All Hits: {vr.all_hits} / BP Singles: {vr.bp_single} / Single 2: {vr.single_two} / "
f"Single 1: {vr.single_one} / Single CF: {vr.single_center}"
)
vl.calculate_xbh(
df_data["2B_vL"], df_data["3B_vL"], df_data["HR_vL"], df_data["HR/FB_vL"]
)
vr.calculate_xbh(
df_data["2B_vR"], df_data["3B_vR"], df_data["HR_vR"], df_data["HR/FB_vR"]
)
logger.debug(
f"vL: All XBH: {vl.all_hits - vl.single_one - vl.single_two - vl.single_center - vl.bp_single} / "
f"Double**: {vl.double_two} / Double(cf): {vl.double_cf} / Triple: {vl.triple} / "
f"BP HR: {vl.bp_homerun} / ND HR: {vl.homerun}"
)
logger.debug(
f"vR: All XBH: {vr.all_hits - vr.single_one - vr.single_two - vr.single_center - vr.bp_single} / "
f"Double**: {vr.double_two} / Double(cf): {vr.double_cf} / Triple: {vr.triple} / "
f"BP HR: {vr.bp_homerun} / ND HR: {vr.homerun}"
)
vl.calculate_other_ob(df_data["BB_vL"], df_data["HBP_vL"])
vr.calculate_other_ob(df_data["BB_vR"], df_data["HBP_vR"])
logger.info(
f"vL: All other OB: {vl.all_other_ob} / HBP: {vl.hbp} / BB: {vl.walk} / "
f"Total Chances: {vl.total_chances()}"
)
logger.info(
f"vR: All other OB: {vr.all_other_ob} / HBP: {vr.hbp} / BB: {vr.walk} / "
f"Total Chances: {vr.total_chances()}"
)
vl.calculate_strikouts(
df_data["SO_vL"],
df_data["TBF_vL"] - df_data["BB_vL"] - df_data["IBB_vL"] - df_data["HBP_vL"],
df_data["H_vL"],
)
vr.calculate_strikouts(
df_data["SO_vR"],
df_data["TBF_vR"] - df_data["BB_vR"] - df_data["IBB_vR"] - df_data["HBP_vR"],
df_data["H_vR"],
)
logger.info(
f"vL: All Outs: {vl.all_outs} / Ks: {vl.strikeout} / Current Outs: {vl.total_outs()}"
)
logger.info(
f"vR: All Outs: {vr.all_outs} / Ks: {vr.strikeout} / Current Outs: {vr.total_outs()}"
)
vl.calculate_other_outs(df_data["FB%_vL"], df_data["GB%_vL"], df_data["Oppo%_vL"])
vr.calculate_other_outs(df_data["FB%_vR"], df_data["GB%_vR"], df_data["Oppo%_vR"])
logger.info(f"vL: Total chances: {vl.total_chances()}")
logger.info(f"vR: Total chances: {vr.total_chances()}")
vl_dict = vl.custom_to_dict()
vr_dict = vr.custom_to_dict()
try:
offense_col = int(df_data["offense_col"]) if "offense_col" in df_data else 1
player_id = (
int(df_data["player_id"])
if "player_id" in df_data
else abs(hash(df_data["key_bbref"])) % 10000
)
vl_card, vr_card = build_pitcher_full_cards(
vl, vr, offense_col, player_id, df_data["pitch_hand"]
)
vl_dict.update(vl_card.card_output())
vr_dict.update(vr_card.card_output())
except Exception as e:
logger.warning(f"Card layout builder failed for {df_data.name}: {e}")
return [vl_dict, vr_dict]
def total_chances(chance_data):
sum_chances = 0
for key in chance_data:
if key not in ["id", "player_id", "cardset_id", "vs_hand", "is_prep"]:
sum_chances += chance_data[key]
return sum_chances
def soft_rate(pct):
if pct > 0.2:
return "high"
elif pct < 0.1:
return "low"
else:
return "avg"
def med_rate(pct):
if pct > 0.65:
return "high"
elif pct < 0.4:
return "low"
else:
return "avg"
def hard_rate(pct):
if pct > 0.4:
return "high"
elif pct < 0.2:
return "low"
else:
return "avg"
def hr_per_fb_rate(pct):
if pct > 0.18:
return "high"
elif pct < 0.08:
return "low"
else:
return "avg"
def all_singles(row, hits_vl, hits_vr):
if int(row[7]) == 0:
tot_singles_vl = 0
else:
tot_singles_vl = hits_vl * (
(int(row[7]) - int(row[8]) - int(row[9]) - int(row[12])) / int(row[7])
)
if int(row[40]) == 0:
tot_singles_vr = 0
else:
tot_singles_vr = hits_vr * (
(int(row[40]) - int(row[41]) - int(row[42]) - int(row[45])) / int(row[40])
)
return mround(tot_singles_vl), mround(tot_singles_vr)
def bp_singles(singles_vl, singles_vr):
bpsi_vl = 5 if singles_vl >= 5 else 0
bpsi_vr = 5 if singles_vr >= 5 else 0
return mround(bpsi_vl), mround(bpsi_vr)
def wh_singles(rem_si_vl, rem_si_vr, hard_rate_vl, hard_rate_vr):
if hard_rate_vl == "low":
whs_vl = 0
else:
whs_vl = rem_si_vl / 2
if hard_rate_vr == "low":
whs_vr = 0
else:
whs_vr = rem_si_vr / 2
return mround(whs_vl), mround(whs_vr)
def one_singles(rem_si_vl, rem_si_vr, soft_rate_vl, soft_rate_vr):
if soft_rate_vl == "high":
oss_vl = rem_si_vl
else:
oss_vl = 0
if soft_rate_vr == "high":
oss_vr = rem_si_vr
else:
oss_vr = 0
return mround(oss_vl), mround(oss_vr)
def bp_homerun(hr_vl, hr_vr, hr_rate_vl, hr_rate_vr):
if hr_rate_vl == "low":
bphr_vl = hr_vl
elif hr_rate_vl == "avg":
bphr_vl = hr_vl * 0.75
else:
bphr_vl = hr_vl * 0.4
if hr_rate_vr == "low":
bphr_vr = hr_vr
elif hr_rate_vr == "avg":
bphr_vr = hr_vr * 0.75
else:
bphr_vr = hr_vr * 0.4
return mround(bphr_vl), mround(bphr_vr)
def triples(all_xbh_vl, all_xbh_vr, triple_rate_vl, triple_rate_vr):
tr_vl = all_xbh_vl * triple_rate_vl if all_xbh_vl > 0 else 0
tr_vr = all_xbh_vr * triple_rate_vr if all_xbh_vr > 0 else 0
return mround(tr_vl), mround(tr_vr)
def two_doubles(all_doubles_vl, all_doubles_vr, soft_rate_vl, soft_rate_vr):
two_doubles_vl = all_doubles_vl if soft_rate_vl == "high" else 0
two_doubles_vr = all_doubles_vr if soft_rate_vr == "high" else 0
return mround(two_doubles_vl), mround(two_doubles_vr)
def hbp_rate(hbp, bb):
if hbp == 0:
return 0
elif bb == 0:
return 1
else:
return hbp / bb
def hbps(all_ob, this_hbp_rate):
if all_ob == 0 or this_hbp_rate == 0:
return 0
else:
return mround(all_ob * this_hbp_rate)
def xchecks(pos, all_chances=True):
if pos.lower() == "p":
return 1 if all_chances else 0
elif pos.lower() == "c":
return 3 if all_chances else 2
elif pos.lower() == "1b":
return 2 if all_chances else 1
elif pos.lower() == "2b":
return 6 if all_chances else 5
elif pos.lower() == "3b":
return 3 if all_chances else 2
elif pos.lower() == "ss":
return 7 if all_chances else 6
elif pos.lower() == "lf":
return 2 if all_chances else 1
elif pos.lower() == "cf":
return 3 if all_chances else 2
else:
return 2 if all_chances else 1
def oppo_fly(all_fly, oppo_rate):
if all_fly == 0 or oppo_rate == 0:
return 0
else:
return mround(all_fly * oppo_rate)
def groundball_a(all_gb, dp_rate):
if all_gb == 0 or dp_rate == 0:
return 0
elif dp_rate > 0.6:
return all_gb
else:
return mround(all_gb * (dp_rate * 1.5))
def balks(total_balks: int, innings: float, season_pct):
try:
total_balks = int(total_balks)
except ValueError:
logger.error(f"Could not read balks: {total_balks} / setting to 0")
total_balks = 0
try:
innings = float(innings)
except ValueError:
logger.error(f"Could not read innings: {innings} / setting to 0")
innings = 0
if innings == 0:
return 0
numerator = total_balks * 290 * season_pct
logger.info(
f"total_balks: {total_balks} / season_pct {season_pct} / innings: {innings} / numerator: {numerator}"
)
return min(round(numerator / innings), 20)
def wild_pitches(total_wps: int, innings: float, season_pct):
if innings == 0:
return 0
# return min(round((int(total_wps) * 200 * season_pct) / float(innings)), 20)
return min(round((int(total_wps) * 200) / float(innings)), 20)
def closer_rating(gf: int, saves: int, games: int):
if gf == 0 or games == 0 or saves == 0:
return None
if gf / games >= 0.875:
return 6
elif gf / games >= 0.8:
return 5
elif gf / games >= 0.7:
return 4
elif gf / games >= 0.55:
return 3
elif gf / games >= 0.4:
return 2
elif gf / games >= 0.25:
return 1
elif gf / games >= 0.1:
return 0
else:
return None