paper-dynasty-card-creation/defenders/calcs_defense.py

import pandas as pd
import requests
from bs4 import BeautifulSoup
from typing import Literal

from db_calls import db_put
from exceptions import logger


async def create_positions(
    all_stats: pd.DataFrame,
    season_pct: float,
    post_pos: bool,
    df_c: pd.DataFrame,
    df_1b: pd.DataFrame,
    df_2b: pd.DataFrame,
    df_3b: pd.DataFrame,
    df_ss: pd.DataFrame,
    df_lf: pd.DataFrame,
    df_cf: pd.DataFrame,
    df_rf: pd.DataFrame,
    df_of: pd.DataFrame,
):
    position_payload = []

    def process_pos(df_data):
        no_data = True
        for pos_data in [(df_1b, "1b"), (df_2b, "2b"), (df_3b, "3b"), (df_ss, "ss")]:
            if df_data["key_bbref"] in pos_data[0].index:
                logger.info(f'Running {pos_data[1]} stats for {df_data["p_name"]}')
                try:
                    average_range = (
                        int(pos_data[0].at[df_data["key_bbref"], "tz_runs_total"])
                        + int(pos_data[0].at[df_data["key_bbref"], "bis_runs_total"])
                        + min(
                            int(pos_data[0].at[df_data["key_bbref"], "tz_runs_total"]),
                            int(pos_data[0].at[df_data["key_bbref"], "bis_runs_total"]),
                        )
                    ) / 3

                    position_payload.append(
                        {
                            "player_id": int(df_data["player_id"]),
                            "position": pos_data[1].upper(),
                            "innings": float(
                                pos_data[0].at[df_data["key_bbref"], "Inn_def"]
                            ),
                            "range": get_if_range(
                                pos_code=pos_data[1],
                                tz_runs=round(average_range),
                                r_dp=0,
                                season_pct=season_pct,
                            ),
                            "error": get_any_error(
                                pos_code=pos_data[1],
                                errors=int(
                                    pos_data[0].at[df_data["key_bbref"], "E_def"]
                                ),
                                chances=int(
                                    pos_data[0].at[df_data["key_bbref"], "chances"]
                                ),
                                season_pct=season_pct,
                            ),
                        }
                    )
                    no_data = False
                except Exception as e:
                    logger.info(f"Infield position failed: {e}")

        of_arms = []
        of_payloads = []
        for pos_data in [(df_lf, "lf"), (df_cf, "cf"), (df_rf, "rf")]:
            if df_data["key_bbref"] in pos_data[0].index:
                try:
                    average_range = (
                        int(pos_data[0].at[df_data["key_bbref"], "tz_runs_total"])
                        + int(pos_data[0].at[df_data["key_bbref"], "bis_runs_total"])
                        + min(
                            int(pos_data[0].at[df_data["key_bbref"], "tz_runs_total"]),
                            int(pos_data[0].at[df_data["key_bbref"], "bis_runs_total"]),
                        )
                    ) / 3
                    of_payloads.append(
                        {
                            "player_id": int(df_data["player_id"]),
                            "position": pos_data[1].upper(),
                            "innings": float(
                                pos_data[0].at[df_data["key_bbref"], "Inn_def"]
                            ),
                            "range": get_of_range(
                                pos_code=pos_data[1],
                                tz_runs=round(average_range),
                                season_pct=season_pct,
                            ),
                        }
                    )
                    of_arms.append(
                        int(pos_data[0].at[df_data["key_bbref"], "bis_runs_outfield"])
                    )
                    no_data = False
                except Exception as e:
                    logger.info(f"Outfield position failed: {e}")

        if (
            df_data["key_bbref"] in df_of.index
            and len(of_arms) > 0
            and len(of_payloads) > 0
        ):
            try:
                error_rating = get_any_error(
                    pos_code=pos_data[1],
                    errors=int(df_of.at[df_data["key_bbref"], "E_def"]),
                    chances=int(df_of.at[df_data["key_bbref"], "chances"]),
                    season_pct=season_pct,
                )
                arm_rating = arm_outfield(of_arms)
                for f in of_payloads:
                    f["error"] = error_rating
                    f["arm"] = arm_rating
                    position_payload.append(f)
                no_data = False
            except Exception as e:
                logger.info(f"Outfield position failed: {e}")

        if df_data["key_bbref"] in df_c.index:
            try:
                if (
                    df_c.at[df_data["key_bbref"], "SB"]
                    + df_c.at[df_data["key_bbref"], "CS"]
                    == 0
                ):
                    arm_rating = 3
                else:
                    arm_rating = arm_catcher(
                        cs_pct=df_c.at[df_data["key_bbref"], "caught_stealing_perc"],
                        raa=int(df_c.at[df_data["key_bbref"], "bis_runs_catcher_sb"]),
                        season_pct=season_pct,
                    )
                position_payload.append(
                    {
                        "player_id": int(df_data["player_id"]),
                        "position": "C",
                        "innings": float(df_c.at[df_data["key_bbref"], "Inn_def"]),
                        "range": range_catcher(
                            rs_value=int(
                                df_c.at[df_data["key_bbref"], "tz_runs_catcher"]
                            ),
                            season_pct=season_pct,
                        ),
                        "error": get_any_error(
                            pos_code="c",
                            errors=int(df_c.at[df_data["key_bbref"], "E_def"]),
                            chances=int(df_c.at[df_data["key_bbref"], "chances"]),
                            season_pct=season_pct,
                        ),
                        "arm": arm_rating,
                        "pb": pb_catcher(
                            pb=int(df_c.at[df_data["key_bbref"], "PB"]),
                            innings=int(
                                float(df_c.at[df_data["key_bbref"], "Inn_def"])
                            ),
                            season_pct=season_pct,
                        ),
                        "overthrow": ot_catcher(
                            errors=int(df_c.at[df_data["key_bbref"], "E_def"]),
                            chances=int(df_c.at[df_data["key_bbref"], "chances"]),
                            season_pct=season_pct,
                        ),
                    }
                )
                no_data = False
            except Exception as e:
                logger.info(f"Catcher position failed: {e}")

        if no_data:
            position_payload.append(
                {
                    "player_id": int(df_data["player_id"]),
                    "position": "DH",
                    "innings": df_data["PA_vL"] + df_data["PA_vR"],
                }
            )

    print("Calculating fielding lines now...")
    all_stats.apply(process_pos, axis=1)
    print("Fielding is complete.\n\nPosting positions now...")
    if post_pos:
        resp = await db_put(
            "cardpositions", payload={"positions": position_payload}, timeout=30
        )
        print(f"Response: {resp}\n")

    return len(position_payload)


def range_pitcher(
    rs_value: int = None, rf_per9_value: float = None, season_pct: float = 1.0
):
    if rs_value is None and rf_per9_value is None:
        raise KeyError(
            "Neither rs nor rf value was provided to calculate pitching range"
        )

    if rs_value is not None:
        if rs_value >= (3 * season_pct):
            return 1
        elif rs_value >= (1 * season_pct):
            return 2
        elif rs_value >= (0 * season_pct):
            return 3
        elif rs_value >= (-2 * season_pct):
            return 4
        else:
            return 5

    else:
        if rf_per9_value is not None:
            if rf_per9_value >= 2.61:
                return 1
            elif rf_per9_value >= 2.18:
                return 2
            elif rf_per9_value <= 1.0:
                return 5
            elif rf_per9_value <= 1.50:
                return 4
            else:
                return 3


def range_catcher(rs_value: int, season_pct: float):
    if rs_value >= 7 * season_pct:
        return 1
    elif rs_value >= 3 * season_pct:
        return 2
    elif rs_value >= -1 * season_pct:
        return 3
    elif rs_value >= -5 * season_pct:
        return 4
    else:
        return 5


def range_first_base(tz_runs: int, r_dp: int, season_pct: float):
    if (tz_runs + r_dp) >= max(6 * season_pct, 2):
        return 1
    elif (tz_runs + r_dp) >= max(2 * season_pct, 1):
        return 2
    elif (tz_runs + r_dp) >= min(-1 * season_pct, -1):
        return 3
    elif (tz_runs + r_dp) >= min(-3 * season_pct, -3):
        return 4
    else:
        return 5


def range_second_base(tz_runs: int, r_dp: int, season_pct: float):
    if (tz_runs + r_dp) >= max(6 * season_pct, 2):
        return 1
    elif (tz_runs + r_dp) >= max(2 * season_pct, 1):
        return 2
    elif (tz_runs + r_dp) >= min(-1 * season_pct, -1):
        return 3
    elif (tz_runs + r_dp) >= min(-3 * season_pct, -3):
        return 4
    else:
        return 5


def range_third_base(tz_runs: int, r_dp: int, season_pct: float):
    if (tz_runs + r_dp) >= max(6 * season_pct, 2):
        return 1
    elif (tz_runs + r_dp) >= max(2 * season_pct, 1):
        return 2
    elif (tz_runs + r_dp) >= min(-1 * season_pct, -1):
        return 3
    elif (tz_runs + r_dp) >= min(-3 * season_pct, -3):
        return 4
    else:
        return 5


def range_shortstop(tz_runs: int, r_dp: int, season_pct: float):
    if (tz_runs + r_dp) >= max(8 * season_pct, 2):
        return 1
    elif (tz_runs + r_dp) >= max(2 * season_pct, 1):
        return 2
    elif (tz_runs + r_dp) >= min(-1 * season_pct, -1):
        return 3
    elif (tz_runs + r_dp) >= min(-3 * season_pct, -3):
        return 4
    else:
        return 5


def get_if_range(pos_code: str, tz_runs: int, r_dp: int, season_pct: float):
    logger.info(f"pos: {pos_code} / tz_runs: {tz_runs} ({type(tz_runs)})")
    if pos_code == "1b":
        return range_first_base(tz_runs, 0, season_pct)
    elif pos_code == "2b":
        return range_second_base(tz_runs, 0, season_pct)
    elif pos_code == "3b":
        return range_third_base(tz_runs, 0, season_pct)
    elif pos_code == "ss":
        return range_shortstop(tz_runs, 0, season_pct)
    else:
        raise ValueError(
            f"get_if_range - pos_code must be one of 1b, 2b, 3b, ss / {pos_code} not valid"
        )


def range_center_field(drs: int, season_pct: float):
    if drs >= 9 * season_pct:
        return 1
    elif drs >= 3 * season_pct:
        return 2
    elif drs >= -1 * season_pct:
        return 3
    elif drs >= -4 * season_pct:
        return 4
    else:
        return 5


def range_left_field(drs: int, season_pct: float):
    return range_center_field(drs, season_pct)


def range_right_field(drs: int, season_pct: float):
    return range_center_field(drs, season_pct)


def get_of_range(pos_code: str, tz_runs: int, season_pct: float):
    logger.info(f"pos: {pos_code} / tz_runs: {tz_runs}")
    if pos_code == "lf":
        return range_left_field(tz_runs, season_pct)
    elif pos_code == "cf":
        return range_center_field(tz_runs, season_pct)
    else:
        return range_right_field(tz_runs, season_pct)


def valid_error_ratings(err_num: int, position: str) -> int:
    if position.lower() == "p":
        valid_err = [
            0,
            4,
            6,
            7,
            8,
            10,
            11,
            12,
            13,
            14,
            15,
            16,
            17,
            18,
            19,
            20,
            21,
            22,
            23,
            24,
            26,
            27,
            28,
            30,
            31,
            33,
            34,
            35,
            36,
            38,
            39,
            40,
            42,
            43,
            44,
            46,
            47,
            48,
            50,
            51,
        ]
    elif position.lower() == "c":
        valid_err = list(range(17))
    elif position.lower() == "1b":
        valid_err = list(range(31))
    elif position.lower() == "2b":
        valid_err = [
            0,
            1,
            2,
            3,
            4,
            5,
            6,
            8,
            10,
            11,
            12,
            13,
            14,
            15,
            16,
            17,
            18,
            19,
            20,
            21,
            22,
            23,
            24,
            25,
            26,
            27,
            28,
            29,
            30,
            32,
            34,
            37,
            39,
            41,
            44,
            47,
            50,
            53,
            56,
            59,
            62,
            65,
            68,
            71,
        ]
    elif position.lower() == "3b":
        valid_err = [
            0,
            1,
            2,
            3,
            4,
            5,
            6,
            8,
            10,
            11,
            12,
            13,
            14,
            15,
            16,
            17,
            18,
            19,
            20,
            21,
            22,
            23,
            24,
            25,
            26,
            27,
            28,
            29,
            30,
            31,
            32,
            33,
            34,
            35,
            37,
            39,
            41,
            44,
            47,
            50,
            53,
            56,
            59,
            62,
            65,
        ]
    elif position.lower() == "ss":
        valid_err = [
            0,
            1,
            2,
            3,
            4,
            5,
            6,
            7,
            8,
            10,
            12,
            14,
            16,
            17,
            18,
            19,
            20,
            21,
            22,
            23,
            24,
            25,
            26,
            27,
            28,
            29,
            30,
            31,
            32,
            33,
            34,
            36,
            38,
            40,
            42,
            44,
            48,
            52,
            56,
            60,
            64,
            68,
            72,
        ]
    # Outfielders
    else:
        valid_err = list(range(26))

    if err_num in valid_err:
        return err_num
    elif err_num > valid_err[len(valid_err) - 1]:
        return valid_err[len(valid_err) - 1]
    else:
        for x in valid_err:
            if err_num <= x:
                return x


def raw_error(errors: int, chances: int, season_pct: float, chance_max: int):
    if errors == 0 or chances == 0:
        return 0
    # c_max = max(round(chance_max * season_pct), 1)
    c_max = chance_max
    return errors * c_max / chances


def error_pitcher(errors: int, chances: int, season_pct: float):
    return valid_error_ratings(int(raw_error(errors, chances, season_pct, 300)), "p")


def error_catcher(errors: int, chances: int, season_pct: float):
    return valid_error_ratings(int(raw_error(errors, chances, season_pct, 500)), "c")


def error_first_base(errors: int, chances: int, season_pct: float):
    return valid_error_ratings(int(raw_error(errors, chances, season_pct, 1300)), "1b")


def error_second_base(errors: int, chances: int, season_pct: float):
    return valid_error_ratings(int(raw_error(errors, chances, season_pct, 700)), "2b")


def error_third_base(errors: int, chances: int, season_pct: float):
    return valid_error_ratings(int(raw_error(errors, chances, season_pct, 500)), "3b")


def error_shortstop(errors: int, chances: int, season_pct: float):
    return valid_error_ratings(int(raw_error(errors, chances, season_pct, 700)), "ss")


def error_outfield(errors: int, chances: int, season_pct: float):
    return valid_error_ratings(int(raw_error(errors, chances, season_pct, 250)), "of")


def get_any_error(pos_code: str, errors: int, chances: int, season_pct: float):
    if pos_code.lower() == "p":
        return error_pitcher(errors, chances, season_pct)
    elif pos_code.lower() == "c":
        return error_catcher(errors, chances, season_pct)
    elif pos_code.lower() == "1b":
        return error_first_base(errors, chances, season_pct)
    elif pos_code.lower() == "2b":
        return error_second_base(errors, chances, season_pct)
    elif pos_code.lower() == "3b":
        return error_third_base(errors, chances, season_pct)
    elif pos_code.lower() == "ss":
        return error_shortstop(errors, chances, season_pct)
    elif pos_code.lower() in ["lf", "cf", "rf", "of"]:
        return error_outfield(errors, chances, season_pct)


def arm_outfield(all_arms: list):
    if not all_arms:
        return 5

    # Thresholds adjusted for tz_runs_total scale (ranges ~-8 to +23)
    # Note: These thresholds are for tz_runs_total, not bis_runs_outfield
    # 2005 data: 23 (Edmonds), 21 (Crawford), 19 (Crisp/Clark/A.Jones), 18, 17...
    if max(all_arms) > 22:
        return -6  # Only 1 player (Jim Edmonds: 23)
    elif max(all_arms) > 19:
        return -5  # No more than 3 players (Carl Crawford: 21)
    elif max(all_arms) > 16:
        return -4  # Very good arms (19s, 18s, 17s)
    elif max(all_arms) > 12:
        return -3  # Good arms (14, 13)
    elif max(all_arms) > 8:
        return -2  # Above average (11, 10, 9)
    elif max(all_arms) > 4:
        return -1  # Average (8, 7, 6, 5)
    elif max(all_arms) > 0:
        return 0  # Below average (4, 3, 2, 1)
    elif max(all_arms) > -4:
        return 1  # Poor arm (0, -1, -2, -3)
    else:
        return 2  # Very poor arm (-4 and below)


def arm_catcher(cs_pct: str, raa: int, season_pct: float) -> int:
    if cs_pct == "":
        return 3
    cs_pct = float(cs_pct.strip("%")) / 100

    if raa > 5 * season_pct:
        max_arm = -4
    elif raa > 2 * season_pct:
        max_arm = -2
    elif raa > -1 * season_pct:
        max_arm = 0
    elif raa > -2 * season_pct:
        max_arm = 3
    else:
        max_arm = 5

    if cs_pct > 0.6:
        raw_arm = -5
    elif cs_pct > 0.5:
        raw_arm = -4
    elif cs_pct > 0.4:
        raw_arm = -3
    elif cs_pct > 0.3:
        raw_arm = -2
    elif cs_pct > 0.25:
        raw_arm = -1
    elif cs_pct > 0.2:
        raw_arm = 0
    elif cs_pct > 0.16:
        raw_arm = 1
    elif cs_pct > 0.12:
        raw_arm = 2
    elif cs_pct > 0.1:
        raw_arm = 3
    elif cs_pct > 0.05:
        raw_arm = 4
    else:
        raw_arm = 5

    return int(min(max_arm, raw_arm))


def pb_catcher(pb: int, innings: int, season_pct: float):
    if pb == 0 or innings == 0:
        return 0

    return int(abs(min(pb * 1000 * season_pct / innings, 20)))


def ot_catcher(errors: int, chances: int, season_pct: float):
    if errors == 0 or chances == 0:
        return 0

    c_max = 3000 * season_pct
    return int(min(errors * c_max / chances / 3, 20))


def hold_pitcher(raw_cs: str, picks: int, season_pct: float) -> str:
    # Pickoff bonus (improves hold by 1-3 points)
    if picks > 8 * season_pct:
        pick_bonus = 3
    elif picks > 5 * season_pct:
        pick_bonus = 2
    elif picks > 2 * season_pct:
        pick_bonus = 1
    else:
        pick_bonus = 0

    # No CS data: default to +2, pickoff bonus can improve to -1 at best
    if raw_cs == "":
        return max(2 - pick_bonus, -1)

    # Base hold rating from caught stealing percentage
    cs_pct = float(raw_cs.strip("%")) / 100
    if cs_pct > 0.667:
        hold_num = -5
    elif cs_pct > 0.6:
        hold_num = -4
    elif cs_pct > 0.48:
        hold_num = -3
    elif cs_pct > 0.34:
        hold_num = -2
    elif cs_pct > 0.26:
        hold_num = -1
    elif cs_pct > 0.22:
        hold_num = 0
    elif cs_pct > 0.2:
        hold_num = 1
    elif cs_pct > 0.18:
        hold_num = 3
    elif cs_pct > 0.16:
        hold_num = 4
    elif cs_pct > 0.14:
        hold_num = 5
    elif cs_pct > 0.12:
        hold_num = 6
    elif cs_pct > 0.1:
        hold_num = 7
    elif cs_pct > 0.06:
        hold_num = 8
    else:
        hold_num = 9

    # Apply pickoff bonus (lower = better), cap at -5
    final_hold = max(hold_num - pick_bonus, -5)
    return final_hold


def pow_ratings(innings: float, gs: int, games: int) -> tuple[int, int]:
    try:
        games = int(games)
    except ValueError:
        logger.error(f"Could not read Pitcher Games: {games} / setting to 0")
        games = 0

    try:
        gs = int(gs)
    except ValueError:
        logger.error(f"Could not read Pitcher GS: {gs} / setting to 0")
        gs = 0

    if innings <= 1 or games <= 1:
        return 1, 1

    s_innings = int(innings * gs / games)
    r_innings = int(innings * (games - gs) / games)

    if gs == 0:
        s_pow = 1
    else:
        s_pow = max(round(s_innings / gs), 1)

    if r_innings == 0:
        r_pow = 1
    else:
        r_pow = max(round(r_innings / (games - gs)), 1)

    if r_innings / max(s_innings, 1) < 0.1:
        r_pow = 1
    elif r_pow >= s_pow > 1:
        r_pow = s_pow - 1

    return s_pow, r_pow


def innings_float(innings: str) -> float:
    if "." in innings:
        whole, decimal = innings.split(".")
    else:
        whole = innings
        decimal = "0"

    return float(int(whole) + int(decimal) * 0.333)


# Get position stats into dataframes
def get_bbref_fielding_df(
    position: Literal["p", "c", "1b", "2b", "3b", "ss", "lf", "cf", "rf", "of"],
    s_num: int,
):
    url = f"https://www.baseball-reference.com/leagues/majors/{s_num}-specialpos_{position}-fielding.shtml"
    soup = BeautifulSoup(requests.get(url).text, "html.parser")
    table = soup.find("table", {"id": "players_players_standard_fielding_fielding"})
    headers = []
    data = []
    indeces = []
    for row in table.find_all("tr"):
        row_data = []
        col_names = []
        for cell in row.find_all("td"):
            if cell.has_attr("data-append-csv"):
                player_id = cell["data-append-csv"]
                row_data.append(player_id)
                if len(headers) == 0:
                    col_names.append("key_bbref")
            # try:
            #     player_id = cell['data-append-csv']
            #     row_data.append(player_id)
            #     if len(headers) == 0:
            #         col_names.append('key_bbref')
            # except Exception as e:
            #     pass
            row_data.append(cell.text)
            if len(headers) == 0:
                col_names.append(cell["data-stat"])
        if len(row_data) > 0:
            data.append(row_data)
            indeces.append(row_data[0])
            if len(headers) == 0:
                headers.extend(col_names)
    pos_frame = pd.DataFrame(data, index=indeces, columns=headers).query(
        "key_bbref == key_bbref"
    )
    if position == "p":
        return pos_frame.drop_duplicates(subset=["key_bbref"], keep="first")

    tmp = pos_frame[~pos_frame["chances"].isin(["0", "1", "2"])]
    return tmp.drop_duplicates(subset=["key_bbref"], keep="first")