From 7fd691690c0144c2475e0ebcdff1250d48e0b52e Mon Sep 17 00:00:00 2001 From: Cal Corum Date: Sun, 8 Mar 2026 14:22:17 -0500 Subject: [PATCH] Fix pitcher hold rating: change pickoffs from override to bonus modifier Pickoffs were using min(pick_cap, hold_num) which let high pickoff counts completely override bad CS%, giving 31% of pitchers a -3 hold rating. Now pickoffs act as a 1-3 point bonus on top of the CS%-based rating. Pitchers with no CS data default to +2 (capped at -1 with pickoff bonus) instead of the old +9. Co-Authored-By: Claude Opus 4.6 --- defenders/calcs_defense.py | 615 +++++++++++++++++++++++++------------ 1 file changed, 419 insertions(+), 196 deletions(-) diff --git a/defenders/calcs_defense.py b/defenders/calcs_defense.py index 13f82b7..6c95b04 100644 --- a/defenders/calcs_defense.py +++ b/defenders/calcs_defense.py @@ -8,148 +8,198 @@ from exceptions import logger async def create_positions( - all_stats: pd.DataFrame, season_pct: float, post_pos: bool, df_c: pd.DataFrame, df_1b: pd.DataFrame, - df_2b: pd.DataFrame, df_3b: pd.DataFrame, df_ss: pd.DataFrame, df_lf: pd.DataFrame, df_cf: pd.DataFrame, - df_rf: pd.DataFrame, df_of: pd.DataFrame): + all_stats: pd.DataFrame, + season_pct: float, + post_pos: bool, + df_c: pd.DataFrame, + df_1b: pd.DataFrame, + df_2b: pd.DataFrame, + df_3b: pd.DataFrame, + df_ss: pd.DataFrame, + df_lf: pd.DataFrame, + df_cf: pd.DataFrame, + df_rf: pd.DataFrame, + df_of: pd.DataFrame, +): position_payload = [] def process_pos(df_data): no_data = True - for pos_data in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]: - if df_data['key_bbref'] in pos_data[0].index: + for pos_data in [(df_1b, "1b"), (df_2b, "2b"), (df_3b, "3b"), (df_ss, "ss")]: + if df_data["key_bbref"] in pos_data[0].index: logger.info(f'Running {pos_data[1]} stats for {df_data["p_name"]}') try: - average_range = (int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']) + - int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) + - min( - int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']), - int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) - )) / 3 - - position_payload.append({ - "player_id": int(df_data['player_id']), - "position": pos_data[1].upper(), - "innings": float(pos_data[0].at[df_data["key_bbref"], 'Inn_def']), - "range": get_if_range( - pos_code=pos_data[1], - tz_runs=round(average_range), - r_dp=0, - season_pct=season_pct - ), - "error": get_any_error( - pos_code=pos_data[1], - errors=int(pos_data[0].at[df_data["key_bbref"], 'E_def']), - chances=int(pos_data[0].at[df_data["key_bbref"], 'chances']), - season_pct=season_pct + average_range = ( + int(pos_data[0].at[df_data["key_bbref"], "tz_runs_total"]) + + int(pos_data[0].at[df_data["key_bbref"], "bis_runs_total"]) + + min( + int(pos_data[0].at[df_data["key_bbref"], "tz_runs_total"]), + int(pos_data[0].at[df_data["key_bbref"], "bis_runs_total"]), ) - }) + ) / 3 + + position_payload.append( + { + "player_id": int(df_data["player_id"]), + "position": pos_data[1].upper(), + "innings": float( + pos_data[0].at[df_data["key_bbref"], "Inn_def"] + ), + "range": get_if_range( + pos_code=pos_data[1], + tz_runs=round(average_range), + r_dp=0, + season_pct=season_pct, + ), + "error": get_any_error( + pos_code=pos_data[1], + errors=int( + pos_data[0].at[df_data["key_bbref"], "E_def"] + ), + chances=int( + pos_data[0].at[df_data["key_bbref"], "chances"] + ), + season_pct=season_pct, + ), + } + ) no_data = False except Exception as e: - logger.info(f'Infield position failed: {e}') + logger.info(f"Infield position failed: {e}") of_arms = [] of_payloads = [] - for pos_data in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]: + for pos_data in [(df_lf, "lf"), (df_cf, "cf"), (df_rf, "rf")]: if df_data["key_bbref"] in pos_data[0].index: try: - average_range = (int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']) + - int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) + - min( - int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']), - int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) - )) / 3 - of_payloads.append({ - "player_id": int(df_data['player_id']), - "position": pos_data[1].upper(), - "innings": float(pos_data[0].at[df_data["key_bbref"], 'Inn_def']), - "range": get_of_range( - pos_code=pos_data[1], - tz_runs=round(average_range), - season_pct=season_pct + average_range = ( + int(pos_data[0].at[df_data["key_bbref"], "tz_runs_total"]) + + int(pos_data[0].at[df_data["key_bbref"], "bis_runs_total"]) + + min( + int(pos_data[0].at[df_data["key_bbref"], "tz_runs_total"]), + int(pos_data[0].at[df_data["key_bbref"], "bis_runs_total"]), ) - }) - of_arms.append(int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_outfield'])) + ) / 3 + of_payloads.append( + { + "player_id": int(df_data["player_id"]), + "position": pos_data[1].upper(), + "innings": float( + pos_data[0].at[df_data["key_bbref"], "Inn_def"] + ), + "range": get_of_range( + pos_code=pos_data[1], + tz_runs=round(average_range), + season_pct=season_pct, + ), + } + ) + of_arms.append( + int(pos_data[0].at[df_data["key_bbref"], "bis_runs_outfield"]) + ) no_data = False except Exception as e: - logger.info(f'Outfield position failed: {e}') + logger.info(f"Outfield position failed: {e}") - if df_data["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0: + if ( + df_data["key_bbref"] in df_of.index + and len(of_arms) > 0 + and len(of_payloads) > 0 + ): try: error_rating = get_any_error( pos_code=pos_data[1], - errors=int(df_of.at[df_data["key_bbref"], 'E_def']), - chances=int(df_of.at[df_data["key_bbref"], 'chances']), - season_pct=season_pct + errors=int(df_of.at[df_data["key_bbref"], "E_def"]), + chances=int(df_of.at[df_data["key_bbref"], "chances"]), + season_pct=season_pct, ) arm_rating = arm_outfield(of_arms) for f in of_payloads: - f['error'] = error_rating - f['arm'] = arm_rating + f["error"] = error_rating + f["arm"] = arm_rating position_payload.append(f) no_data = False except Exception as e: - logger.info(f'Outfield position failed: {e}') + logger.info(f"Outfield position failed: {e}") if df_data["key_bbref"] in df_c.index: try: - if df_c.at[df_data["key_bbref"], 'SB'] + df_c.at[df_data["key_bbref"], 'CS'] == 0: + if ( + df_c.at[df_data["key_bbref"], "SB"] + + df_c.at[df_data["key_bbref"], "CS"] + == 0 + ): arm_rating = 3 else: arm_rating = arm_catcher( - cs_pct=df_c.at[df_data["key_bbref"], 'caught_stealing_perc'], - raa=int(df_c.at[df_data["key_bbref"], 'bis_runs_catcher_sb']), - season_pct=season_pct + cs_pct=df_c.at[df_data["key_bbref"], "caught_stealing_perc"], + raa=int(df_c.at[df_data["key_bbref"], "bis_runs_catcher_sb"]), + season_pct=season_pct, ) - position_payload.append({ - "player_id": int(df_data['player_id']), - "position": 'C', - "innings": float(df_c.at[df_data["key_bbref"], 'Inn_def']), - "range": range_catcher( - rs_value=int(df_c.at[df_data["key_bbref"], 'tz_runs_catcher']), - season_pct=season_pct - ), - "error": get_any_error( - pos_code='c', - errors=int(df_c.at[df_data["key_bbref"], 'E_def']), - chances=int(df_c.at[df_data["key_bbref"], 'chances']), - season_pct=season_pct - ), - "arm": arm_rating, - "pb": pb_catcher( - pb=int(df_c.at[df_data["key_bbref"], 'PB']), - innings=int(float(df_c.at[df_data["key_bbref"], 'Inn_def'])), - season_pct=season_pct - ), - "overthrow": ot_catcher( - errors=int(df_c.at[df_data["key_bbref"], 'E_def']), - chances=int(df_c.at[df_data["key_bbref"], 'chances']), - season_pct=season_pct - ) - }) + position_payload.append( + { + "player_id": int(df_data["player_id"]), + "position": "C", + "innings": float(df_c.at[df_data["key_bbref"], "Inn_def"]), + "range": range_catcher( + rs_value=int( + df_c.at[df_data["key_bbref"], "tz_runs_catcher"] + ), + season_pct=season_pct, + ), + "error": get_any_error( + pos_code="c", + errors=int(df_c.at[df_data["key_bbref"], "E_def"]), + chances=int(df_c.at[df_data["key_bbref"], "chances"]), + season_pct=season_pct, + ), + "arm": arm_rating, + "pb": pb_catcher( + pb=int(df_c.at[df_data["key_bbref"], "PB"]), + innings=int( + float(df_c.at[df_data["key_bbref"], "Inn_def"]) + ), + season_pct=season_pct, + ), + "overthrow": ot_catcher( + errors=int(df_c.at[df_data["key_bbref"], "E_def"]), + chances=int(df_c.at[df_data["key_bbref"], "chances"]), + season_pct=season_pct, + ), + } + ) no_data = False except Exception as e: - logger.info(f'Catcher position failed: {e}') + logger.info(f"Catcher position failed: {e}") if no_data: - position_payload.append({ - "player_id": int(df_data['player_id']), - "position": 'DH', - "innings": df_data['PA_vL'] + df_data['PA_vR'] - }) + position_payload.append( + { + "player_id": int(df_data["player_id"]), + "position": "DH", + "innings": df_data["PA_vL"] + df_data["PA_vR"], + } + ) - print(f'Calculating fielding lines now...') + print(f"Calculating fielding lines now...") all_stats.apply(process_pos, axis=1) - print(f'Fielding is complete.\n\nPosting positions now...') + print(f"Fielding is complete.\n\nPosting positions now...") if post_pos: - resp = await db_put('cardpositions', payload={'positions': position_payload}, timeout=30) - print(f'Response: {resp}\n') + resp = await db_put( + "cardpositions", payload={"positions": position_payload}, timeout=30 + ) + print(f"Response: {resp}\n") return len(position_payload) -def range_pitcher(rs_value: int = None, rf_per9_value: float = None, season_pct: float = 1.0): +def range_pitcher( + rs_value: int = None, rf_per9_value: float = None, season_pct: float = 1.0 +): if rs_value is None and rf_per9_value is None: - raise KeyError('Neither rs nor rf value was provided to calculate pitching range') + raise KeyError( + "Neither rs nor rf value was provided to calculate pitching range" + ) if rs_value is not None: if rs_value >= (3 * season_pct): @@ -243,17 +293,19 @@ def range_shortstop(tz_runs: int, r_dp: int, season_pct: float): def get_if_range(pos_code: str, tz_runs: int, r_dp: int, season_pct: float): - logger.info(f'pos: {pos_code} / tz_runs: {tz_runs} ({type(tz_runs)})') - if pos_code == '1b': + logger.info(f"pos: {pos_code} / tz_runs: {tz_runs} ({type(tz_runs)})") + if pos_code == "1b": return range_first_base(tz_runs, 0, season_pct) - elif pos_code == '2b': + elif pos_code == "2b": return range_second_base(tz_runs, 0, season_pct) - elif pos_code == '3b': + elif pos_code == "3b": return range_third_base(tz_runs, 0, season_pct) - elif pos_code == 'ss': + elif pos_code == "ss": return range_shortstop(tz_runs, 0, season_pct) else: - raise ValueError(f'get_if_range - pos_code must be one of 1b, 2b, 3b, ss / {pos_code} not valid') + raise ValueError( + f"get_if_range - pos_code must be one of 1b, 2b, 3b, ss / {pos_code} not valid" + ) def range_center_field(drs: int, season_pct: float): @@ -278,39 +330,203 @@ def range_right_field(drs: int, season_pct: float): def get_of_range(pos_code: str, tz_runs: int, season_pct: float): - logger.info(f'pos: {pos_code} / tz_runs: {tz_runs}') - if pos_code == 'lf': + logger.info(f"pos: {pos_code} / tz_runs: {tz_runs}") + if pos_code == "lf": return range_left_field(tz_runs, season_pct) - elif pos_code == 'cf': + elif pos_code == "cf": return range_center_field(tz_runs, season_pct) else: return range_right_field(tz_runs, season_pct) def valid_error_ratings(err_num: int, position: str) -> int: - if position.lower() == 'p': + if position.lower() == "p": valid_err = [ - 0, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 30, 31, 33, 34, - 35, 36, 38, 39, 40, 42, 43, 44, 46, 47, 48, 50, 51 + 0, + 4, + 6, + 7, + 8, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 30, + 31, + 33, + 34, + 35, + 36, + 38, + 39, + 40, + 42, + 43, + 44, + 46, + 47, + 48, + 50, + 51, ] - elif position.lower() == 'c': + elif position.lower() == "c": valid_err = list(range(17)) - elif position.lower() == '1b': + elif position.lower() == "1b": valid_err = list(range(31)) - elif position.lower() == '2b': + elif position.lower() == "2b": valid_err = [ - 0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 32, 34, 37, 39, 41, 44, 47, 50, 53, 56, 59, 62, 65, 68, 71 + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 8, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 32, + 34, + 37, + 39, + 41, + 44, + 47, + 50, + 53, + 56, + 59, + 62, + 65, + 68, + 71, ] - elif position.lower() == '3b': + elif position.lower() == "3b": valid_err = [ - 0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32, 33, 34, 35, 37, 39, 41, 44, 47, 50, 53, 56, 59, 62, 65 + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 8, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 37, + 39, + 41, + 44, + 47, + 50, + 53, + 56, + 59, + 62, + 65, ] - elif position.lower() == 'ss': + elif position.lower() == "ss": valid_err = [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - 33, 34, 36, 38, 40, 42, 44, 48, 52, 56, 60, 64, 68, 72 + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 10, + 12, + 14, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 36, + 38, + 40, + 42, + 44, + 48, + 52, + 56, + 60, + 64, + 68, + 72, ] # Outfielders else: @@ -335,47 +551,47 @@ def raw_error(errors: int, chances: int, season_pct: float, chance_max: int): def error_pitcher(errors: int, chances: int, season_pct: float): - return valid_error_ratings(int(raw_error(errors, chances, season_pct, 300)), 'p') + return valid_error_ratings(int(raw_error(errors, chances, season_pct, 300)), "p") def error_catcher(errors: int, chances: int, season_pct: float): - return valid_error_ratings(int(raw_error(errors, chances, season_pct, 500)), 'c') + return valid_error_ratings(int(raw_error(errors, chances, season_pct, 500)), "c") def error_first_base(errors: int, chances: int, season_pct: float): - return valid_error_ratings(int(raw_error(errors, chances, season_pct, 1300)), '1b') + return valid_error_ratings(int(raw_error(errors, chances, season_pct, 1300)), "1b") def error_second_base(errors: int, chances: int, season_pct: float): - return valid_error_ratings(int(raw_error(errors, chances, season_pct, 700)), '2b') + return valid_error_ratings(int(raw_error(errors, chances, season_pct, 700)), "2b") def error_third_base(errors: int, chances: int, season_pct: float): - return valid_error_ratings(int(raw_error(errors, chances, season_pct, 500)), '3b') + return valid_error_ratings(int(raw_error(errors, chances, season_pct, 500)), "3b") def error_shortstop(errors: int, chances: int, season_pct: float): - return valid_error_ratings(int(raw_error(errors, chances, season_pct, 700)), 'ss') + return valid_error_ratings(int(raw_error(errors, chances, season_pct, 700)), "ss") def error_outfield(errors: int, chances: int, season_pct: float): - return valid_error_ratings(int(raw_error(errors, chances, season_pct, 250)), 'of') + return valid_error_ratings(int(raw_error(errors, chances, season_pct, 250)), "of") def get_any_error(pos_code: str, errors: int, chances: int, season_pct: float): - if pos_code.lower() == 'p': + if pos_code.lower() == "p": return error_pitcher(errors, chances, season_pct) - elif pos_code.lower() == 'c': + elif pos_code.lower() == "c": return error_catcher(errors, chances, season_pct) - elif pos_code.lower() == '1b': + elif pos_code.lower() == "1b": return error_first_base(errors, chances, season_pct) - elif pos_code.lower() == '2b': + elif pos_code.lower() == "2b": return error_second_base(errors, chances, season_pct) - elif pos_code.lower() == '3b': + elif pos_code.lower() == "3b": return error_third_base(errors, chances, season_pct) - elif pos_code.lower() == 'ss': + elif pos_code.lower() == "ss": return error_shortstop(errors, chances, season_pct) - elif pos_code.lower() in ['lf', 'cf', 'rf', 'of']: + elif pos_code.lower() in ["lf", "cf", "rf", "of"]: return error_outfield(errors, chances, season_pct) @@ -399,15 +615,15 @@ def arm_outfield(all_arms: list): elif max(all_arms) > 4: return -1 # Average (8, 7, 6, 5) elif max(all_arms) > 0: - return 0 # Below average (4, 3, 2, 1) + return 0 # Below average (4, 3, 2, 1) elif max(all_arms) > -4: - return 1 # Poor arm (0, -1, -2, -3) + return 1 # Poor arm (0, -1, -2, -3) else: - return 2 # Very poor arm (-4 and below) + return 2 # Very poor arm (-4 and below) def arm_catcher(cs_pct: str, raa: int, season_pct: float) -> int: - if cs_pct == '': + if cs_pct == "": return 3 cs_pct = float(cs_pct.strip("%")) / 100 @@ -422,25 +638,25 @@ def arm_catcher(cs_pct: str, raa: int, season_pct: float) -> int: else: max_arm = 5 - if cs_pct > .6: + if cs_pct > 0.6: raw_arm = -5 - elif cs_pct > .5: + elif cs_pct > 0.5: raw_arm = -4 - elif cs_pct > .4: + elif cs_pct > 0.4: raw_arm = -3 - elif cs_pct > .3: + elif cs_pct > 0.3: raw_arm = -2 - elif cs_pct > .25: + elif cs_pct > 0.25: raw_arm = -1 - elif cs_pct > .2: + elif cs_pct > 0.2: raw_arm = 0 - elif cs_pct > .16: + elif cs_pct > 0.16: raw_arm = 1 - elif cs_pct > .12: + elif cs_pct > 0.12: raw_arm = 2 - elif cs_pct > .1: + elif cs_pct > 0.1: raw_arm = 3 - elif cs_pct > .05: + elif cs_pct > 0.05: raw_arm = 4 else: raw_arm = 5 @@ -464,66 +680,69 @@ def ot_catcher(errors: int, chances: int, season_pct: float): def hold_pitcher(raw_cs: str, picks: int, season_pct: float) -> str: - if raw_cs == '': - return '+9' - - cs_pct = float(raw_cs.strip("%")) / 100 - if picks > 5 * season_pct: - pick_cap = -3 - elif picks > 3 * season_pct: - pick_cap = -2 - elif picks > 0 * season_pct: - pick_cap = 5 + # Pickoff bonus (improves hold by 1-3 points) + if picks > 8 * season_pct: + pick_bonus = 3 + elif picks > 5 * season_pct: + pick_bonus = 2 + elif picks > 2 * season_pct: + pick_bonus = 1 else: - pick_cap = 9 + pick_bonus = 0 - if cs_pct > .667: + # No CS data: default to +2, pickoff bonus can improve to -1 at best + if raw_cs == "": + return max(2 - pick_bonus, -1) + + # Base hold rating from caught stealing percentage + cs_pct = float(raw_cs.strip("%")) / 100 + if cs_pct > 0.667: hold_num = -5 - elif cs_pct > .6: + elif cs_pct > 0.6: hold_num = -4 - elif cs_pct > .48: + elif cs_pct > 0.48: hold_num = -3 - elif cs_pct > .34: + elif cs_pct > 0.34: hold_num = -2 - elif cs_pct > .26: + elif cs_pct > 0.26: hold_num = -1 - elif cs_pct > .22: + elif cs_pct > 0.22: hold_num = 0 - elif cs_pct > .2: + elif cs_pct > 0.2: hold_num = 1 - elif cs_pct > .18: + elif cs_pct > 0.18: hold_num = 3 - elif cs_pct > .16: + elif cs_pct > 0.16: hold_num = 4 - elif cs_pct > .14: + elif cs_pct > 0.14: hold_num = 5 - elif cs_pct > .12: + elif cs_pct > 0.12: hold_num = 6 - elif cs_pct > .1: + elif cs_pct > 0.1: hold_num = 7 - elif cs_pct > .06: + elif cs_pct > 0.06: hold_num = 8 else: hold_num = 9 - final_hold = min(pick_cap, hold_num) + # Apply pickoff bonus (lower = better), cap at -5 + final_hold = max(hold_num - pick_bonus, -5) return final_hold - # return f'{"+" if final_hold >= 0 else ""}{final_hold}' def pow_ratings(innings: float, gs: int, games: int) -> tuple[int, int]: try: games = int(games) except ValueError: - logger.error(f'Could not read Pitcher Games: {games} / setting to 0') + logger.error(f"Could not read Pitcher Games: {games} / setting to 0") games = 0 - + try: gs = int(gs) except ValueError: - logger.error(f'Could not read Pitcher GS: {gs} / setting to 0') + logger.error(f"Could not read Pitcher GS: {gs} / setting to 0") gs = 0 - + if innings <= 1 or games <= 1: return 1, 1 @@ -540,7 +759,7 @@ def pow_ratings(innings: float, gs: int, games: int) -> tuple[int, int]: else: r_pow = max(round(r_innings / (games - gs)), 1) - if r_innings / max(s_innings, 1) < .1: + if r_innings / max(s_innings, 1) < 0.1: r_pow = 1 elif r_pow >= s_pow > 1: r_pow = s_pow - 1 @@ -549,33 +768,35 @@ def pow_ratings(innings: float, gs: int, games: int) -> tuple[int, int]: def innings_float(innings: str) -> float: - if '.' in innings: - whole, decimal = innings.split('.') + if "." in innings: + whole, decimal = innings.split(".") else: whole = innings decimal = "0" - return float(int(whole) + int(decimal) * .333) + return float(int(whole) + int(decimal) * 0.333) # Get position stats into dataframes def get_bbref_fielding_df( - position: Literal['p', 'c', '1b', '2b', '3b', 'ss', 'lf', 'cf', 'rf', 'of'], s_num: int): - url = f'https://www.baseball-reference.com/leagues/majors/{s_num}-specialpos_{position}-fielding.shtml' - soup = BeautifulSoup(requests.get(url).text, 'html.parser') - table = soup.find('table', {'id': 'players_players_standard_fielding_fielding'}) + position: Literal["p", "c", "1b", "2b", "3b", "ss", "lf", "cf", "rf", "of"], + s_num: int, +): + url = f"https://www.baseball-reference.com/leagues/majors/{s_num}-specialpos_{position}-fielding.shtml" + soup = BeautifulSoup(requests.get(url).text, "html.parser") + table = soup.find("table", {"id": "players_players_standard_fielding_fielding"}) headers = [] data = [] indeces = [] - for row in table.find_all('tr'): + for row in table.find_all("tr"): row_data = [] col_names = [] - for cell in row.find_all('td'): - if cell.has_attr('data-append-csv'): - player_id = cell['data-append-csv'] + for cell in row.find_all("td"): + if cell.has_attr("data-append-csv"): + player_id = cell["data-append-csv"] row_data.append(player_id) if len(headers) == 0: - col_names.append('key_bbref') + col_names.append("key_bbref") # try: # player_id = cell['data-append-csv'] # row_data.append(player_id) @@ -585,15 +806,17 @@ def get_bbref_fielding_df( # pass row_data.append(cell.text) if len(headers) == 0: - col_names.append(cell['data-stat']) + col_names.append(cell["data-stat"]) if len(row_data) > 0: data.append(row_data) indeces.append(row_data[0]) if len(headers) == 0: headers.extend(col_names) - pos_frame = pd.DataFrame(data, index=indeces, columns=headers).query('key_bbref == key_bbref') - if position == 'p': - return pos_frame.drop_duplicates(subset=['key_bbref'], keep='first') + pos_frame = pd.DataFrame(data, index=indeces, columns=headers).query( + "key_bbref == key_bbref" + ) + if position == "p": + return pos_frame.drop_duplicates(subset=["key_bbref"], keep="first") - tmp = pos_frame[~pos_frame['chances'].isin(['0', '1', '2'])] - return tmp.drop_duplicates(subset=['key_bbref'], keep='first') + tmp = pos_frame[~pos_frame["chances"].isin(["0", "1", "2"])] + return tmp.drop_duplicates(subset=["key_bbref"], keep="first")