import datetime import urllib.parse import pandas as pd import numpy as np from typing import Any, Dict from creation_helpers import ( get_all_pybaseball_ids, sanitize_name, CLUB_LIST, FRANCHISE_LIST, pd_players_df, mlbteam_and_franchise, get_hand, NEW_PLAYER_COST, RARITY_BASE_COSTS, should_update_player_description, calculate_rarity_cost_adjustment, DEFAULT_BATTER_OPS, ) from db_calls import db_post, db_get, db_put, db_patch from . import calcs_batter as cba from defenders import calcs_defense as cde from exceptions import logger from rarity_thresholds import get_batter_thresholds async def pd_battingcards_df(cardset_id: int): bc_query = await db_get( "battingcards", params=[("cardset_id", cardset_id), ("short_output", True)] ) if bc_query["count"] == 0: raise ValueError("No batting cards returned from Paper Dynasty API") return pd.DataFrame(bc_query["cards"]).rename( columns={"id": "battingcard_id", "player": "player_id"} ) async def pd_battingcardratings_df(cardset_id: int, season: int): vl_query = await db_get( "battingcardratings", params=[ ("cardset_id", cardset_id), ("vs_hand", "L"), ("short_output", True), ("team_id", 31), ("ts", "s37136685556r6135248705"), ], ) vr_query = await db_get( "battingcardratings", params=[ ("cardset_id", cardset_id), ("vs_hand", "R"), ("short_output", True), ("team_id", 31), ("ts", "s37136685556r6135248705"), ], ) if 0 in [vl_query["count"], vr_query["count"]]: raise ValueError("No batting card ratings returned from Paper Dynasty API") vl = pd.DataFrame(vl_query["ratings"]) vr = pd.DataFrame(vr_query["ratings"]) ratings = pd.merge(vl, vr, on="battingcard", suffixes=("_vL", "_vR")).rename( columns={"battingcard": "battingcard_id"} ) def get_total_ops(df_data): ops_vl = df_data["obp_vL"] + df_data["slg_vL"] ops_vr = df_data["obp_vR"] + df_data["slg_vR"] return (ops_vr + ops_vl + min(ops_vl, ops_vr)) / 3 ratings["total_OPS"] = ratings.apply(get_total_ops, axis=1) # Get season-appropriate rarity thresholds thresholds = get_batter_thresholds(season) def new_rarity_id(df_data): return thresholds.get_rarity(df_data["total_OPS"]) ratings["new_rarity_id"] = ratings.apply(new_rarity_id, axis=1) return ratings # return pd.DataFrame(bcr_query['ratings']).rename(columns={'battingcard': 'battingcard_id'}) def get_batting_stats( file_path: str = None, start_date: datetime.datetime = None, end_date: datetime.datetime = None, ignore_limits: bool = False, ): min_vl = 20 if not ignore_limits else 1 min_vr = 40 if not ignore_limits else 1 if file_path is not None: vl_basic = pd.read_csv(f"{file_path}vlhp-basic.csv").query(f"PA >= {min_vl}") vr_basic = pd.read_csv(f"{file_path}vrhp-basic.csv").query(f"PA >= {min_vr}") total_basic = pd.merge( vl_basic, vr_basic, on="playerId", suffixes=("_vL", "_vR") ) vl_rate = pd.read_csv(f"{file_path}vlhp-rate.csv").query(f"PA >= {min_vl}") vr_rate = pd.read_csv(f"{file_path}vrhp-rate.csv").query(f"PA >= {min_vr}") total_rate = pd.merge(vl_rate, vr_rate, on="playerId", suffixes=("_vL", "_vR")) return pd.merge(total_basic, total_rate, on="playerId", suffixes=("", "_rate")) else: raise LookupError( "Date-based stat pulls not implemented, yet. Please provide batting csv files." ) def match_player_lines( all_batting: pd.DataFrame, all_players: pd.DataFrame, is_custom: bool = False ): def get_pids(df_data): return get_all_pybaseball_ids( [df_data["playerId"]], "fangraphs", is_custom, df_data["Name_vL"] ) print("Now pulling mlbam player IDs...") ids_and_names = all_batting.apply(get_pids, axis=1) player_data = ( ids_and_names.merge( all_players, how="left", left_on="key_bbref", right_on="bbref_id" ) .query("key_mlbam == key_mlbam") .set_index("key_bbref", drop=False) ) print("Matched mlbam to pd players.") final_batting = pd.merge( player_data, all_batting, left_on="key_fangraphs", right_on="playerId", sort=False, ).set_index("key_bbref", drop=False) return final_batting async def create_new_players( final_batting: pd.DataFrame, cardset: dict, card_base_url: str, release_dir: str, player_desc: str, ): new_players = [] new_mlbplayers = {} def create_batters(df_data): f_name = sanitize_name(df_data["name_first"]).title() l_name = sanitize_name(df_data["name_last"]).title() new_players.append( { "p_name": f"{f_name} {l_name}", "cost": NEW_PLAYER_COST, "image": f"{card_base_url}/{df_data['player_id']}/battingcard" f"{urllib.parse.quote('?d=')}{release_dir}", "mlbclub": CLUB_LIST[df_data["Tm_vL"]], "franchise": FRANCHISE_LIST[df_data["Tm_vL"]], "cardset_id": cardset["id"], "set_num": int(float(df_data["key_fangraphs"])), "rarity_id": 99, "pos_1": "DH", "description": f"{player_desc}", "bbref_id": df_data.name, "fangr_id": int(float(df_data["key_fangraphs"])), "strat_code": int(float(df_data["key_mlbam"])), } ) new_mlbplayers[df_data.name] = { "first_name": sanitize_name(df_data["name_first"]).title(), "last_name": sanitize_name(df_data["name_last"]).title(), "key_mlbam": int(float(df_data["key_mlbam"])), "key_fangraphs": int(float(df_data["key_fangraphs"])), "key_bbref": df_data["key_bbref"], "key_retro": df_data["key_retro"], } final_batting[final_batting["player_id"].isnull()].apply(create_batters, axis=1) print(f"Creating {len(new_players)} new players...") for x in new_players: mlb_query = await db_get("mlbplayers", params=[("key_bbref", x["bbref_id"])]) if mlb_query["count"] > 0: x["mlbplayer_id"] = mlb_query["players"][0]["id"] else: new_mlb = await db_post( "mlbplayers/one", payload=new_mlbplayers[x["bbref_id"]] ) x["mlbplayer_id"] = new_mlb["id"] this_player = await db_post("players", payload=x) final_batting.at[x["bbref_id"], "player_id"] = this_player["player_id"] final_batting.at[x["bbref_id"], "p_name"] = this_player["p_name"] print( f"Player IDs linked to batting stats.\n{len(final_batting.values)} players remain\n" ) return len(new_players) def get_run_stat_df(final_batting: pd.DataFrame, input_path: str): print("Reading baserunning stats...") run_data = pd.read_csv(f"{input_path}running.csv").set_index("Name-additional") run_data["bat_hand"] = run_data.apply(get_hand, axis=1) offense_stats = final_batting.join(run_data) print( f"Stats are tallied\n{len(offense_stats.values)} players remain\n\nCollecting defensive data from bbref..." ) return offense_stats async def calculate_batting_cards( offense_stats: pd.DataFrame, cardset: dict, season_pct: float, post_batters: bool ): batting_cards = [] def create_batting_card(df_data): logger.info(df_data["player_id"]) try: s_data = cba.stealing( chances=int(df_data["SBO"]), sb2s=int(df_data["SB2"]), cs2s=int(df_data["CS2"]), sb3s=int(df_data["SB3"]), cs3s=int(df_data["CS3"]), season_pct=season_pct, ) except ValueError as e: print(f"Stealing error for *{df_data.name}*: {e}") logger.error(e) s_data = [0, 0, 0, 0] batting_cards.append( { "player_id": df_data["player_id"], "key_bbref": df_data.name, "key_fangraphs": int(float(df_data["key_fangraphs"])), "key_mlbam": df_data["key_mlbam"], "key_retro": df_data["key_retro"], "name_first": df_data["name_first"].title(), "name_last": df_data["name_last"].title(), "steal_low": s_data[0], "steal_high": s_data[1], "steal_auto": s_data[2], "steal_jump": s_data[3], "hit_and_run": cba.hit_and_run( df_data["AB_vL"], df_data["AB_vR"], df_data["H_vL"], df_data["H_vR"], df_data["HR_vL"], df_data["HR_vR"], df_data["SO_vL"], df_data["SO_vR"], ), "running": cba.running(df_data["XBT%"]), "hand": df_data["bat_hand"], } ) print("Calculating batting cards...") offense_stats.apply(create_batting_card, axis=1) print("Cards are complete.\n\nPosting cards now...") if post_batters: resp = await db_put( "battingcards", payload={"cards": batting_cards}, timeout=30 ) print( f"Response: {resp}\n\nMatching batting card database IDs to player stats..." ) offense_stats = pd.merge( offense_stats, await pd_battingcards_df(cardset["id"]), on="player_id" ).set_index("key_bbref", drop=False) return offense_stats async def calculate_batting_ratings(offense_stats: pd.DataFrame, to_post: bool): batting_ratings = [] def create_batting_card_ratings(df_data): logger.debug(f"Calculating card ratings for {df_data.name}") batting_ratings.extend(cba.get_batter_ratings(df_data)) print("Calculating card ratings...") offense_stats.apply(create_batting_card_ratings, axis=1) print("Ratings are complete\n\nPosting ratings now...") if to_post: resp = await db_put( "battingcardratings", payload={"ratings": batting_ratings}, timeout=30 ) print(f"Response: {resp}\n\nPulling fresh PD player data...") return len(batting_ratings) async def post_player_updates( cardset: Dict[str, Any], card_base_url: str, release_dir: str, player_desc: str, is_liveseries: bool, to_post: bool, is_custom: bool, season: int, ) -> int: """ Update player metadata after card creation (costs, rarities, descriptions, teams, images). Process: 1. Pull fresh pd_players and batting cards/ratings 2. Calculate total OPS and assign rarity_id 3. For NEW players (cost == NEW_PLAYER_COST): - Set cost = RARITY_BASE_COSTS[rarity] * total_OPS / average_ops[rarity] - Set rarity_id 4. For existing players: - Update costs if rarity changed - Update descriptions (promo cardsets: only new cards; regular: all except PotM) - Update team/franchise if live series - Update image URLs Returns: Number of player updates sent to database """ p_data = await pd_players_df(cardset["id"]) p_data.set_index("player_id", drop=False) # Use LEFT JOIN to keep all batters, even those without ratings batting_cards = await pd_battingcards_df(cardset["id"]) batting_ratings = await pd_battingcardratings_df(cardset["id"], season) total_ratings = pd.merge( batting_cards, batting_ratings, on="battingcard_id", how="left", # Keep all batting cards ) # Assign default rarity (Common/5) for players without ratings if "new_rarity_id" not in total_ratings.columns: total_ratings["new_rarity_id"] = 5 total_ratings["new_rarity_id"] = ( total_ratings["new_rarity_id"] .replace(r"^\s*$", np.nan, regex=True) .fillna(5) .astype("Int64") # optional: keep it as nullable integer type ) # Assign default total_OPS for players without ratings (Common rarity default) if "total_OPS" in total_ratings.columns: missing_ops = total_ratings[total_ratings["total_OPS"].isna()] if not missing_ops.empty: logger.warning( f"batters.creation.post_player_updates - {len(missing_ops)} players missing total_OPS, assigning default 0.612: {missing_ops[['player_id', 'battingcard_id']].to_dict('records')}" ) total_ratings["total_OPS"] = total_ratings["total_OPS"].fillna(0.612) player_data = pd.merge(p_data, total_ratings, on="player_id").set_index( "player_id", drop=False ) del total_ratings def get_pids(df_data): if is_custom: return get_all_pybaseball_ids([df_data["fangr_id"]], "fangraphs", is_custom) else: return get_all_pybaseball_ids([df_data["bbref_id"]], "bbref") ids_and_names = player_data.apply(get_pids, axis=1) player_data = ( ids_and_names.merge( player_data, how="left", left_on="key_bbref", right_on="bbref_id" ) .query("key_mlbam == key_mlbam") .set_index("key_bbref", drop=False) ) player_updates = {} # { : [ (param pairs) ] } rarity_group = player_data.query("rarity == new_rarity_id").groupby("rarity") average_ops = rarity_group["total_OPS"].mean().to_dict() # Fill in missing rarity averages with defaults for rarity, default_ops in DEFAULT_BATTER_OPS.items(): if rarity not in average_ops: average_ops[rarity] = default_ops def get_player_updates(df_data): params = [] # Check if description should be updated using extracted business logic if should_update_player_description( cardset_name=cardset["name"], player_cost=df_data["cost"], current_description=df_data["description"], new_description=player_desc, ): params = [("description", f"{player_desc}")] logger.debug( f"batters.creation.post_player_updates - Setting description for player_id={df_data['player_id']}: " f"'{df_data['description']}' -> '{player_desc}' (cost={df_data['cost']}, cardset={cardset['name']})" ) else: logger.debug( f"batters.creation.post_player_updates - Skipping description update for player_id={df_data['player_id']}: " f"current='{df_data['description']}', proposed='{player_desc}' (cost={df_data['cost']}, cardset={cardset['name']})" ) if is_liveseries: team_data = mlbteam_and_franchise(int(float(df_data["key_mlbam"]))) if ( df_data["mlbclub"] != team_data["mlbclub"] and team_data["mlbclub"] is not None ): params.extend([("mlbclub", team_data["mlbclub"])]) if ( df_data["franchise"] != team_data["franchise"] and team_data["franchise"] is not None ): params.extend([("franchise", team_data["franchise"])]) # if release_directory not in df_data['image']: params.extend( [ ( "image", f"{card_base_url}/{df_data['player_id']}/battingcard" f"{urllib.parse.quote('?d=')}{release_dir}", ) ] ) if df_data["cost"] == NEW_PLAYER_COST: params.extend( [ ( "cost", round( RARITY_BASE_COSTS[df_data["new_rarity_id"]] * df_data["total_OPS"] / average_ops[df_data["new_rarity_id"]] ), ), ("rarity_id", df_data["new_rarity_id"]), ] ) elif df_data["rarity"] != df_data["new_rarity_id"]: # Calculate adjusted cost for rarity change using lookup table new_cost = calculate_rarity_cost_adjustment( old_rarity=df_data["rarity"], new_rarity=df_data["new_rarity_id"], old_cost=df_data["cost"], ) params.extend([("cost", new_cost), ("rarity_id", df_data["new_rarity_id"])]) if len(params) > 0: if df_data.player_id not in player_updates.keys(): player_updates[df_data.player_id] = params else: player_updates[df_data.player_id].extend(params) player_data.apply(get_player_updates, axis=1) print(f"Sending {len(player_updates)} player updates to PD database...") if to_post: for x in player_updates: await db_patch("players", object_id=x, params=player_updates[x]) return len(player_updates) async def run_batter_fielding( season: int, offense_stats: pd.DataFrame, season_pct: float, post_batters: bool ): print("Pulling catcher defense...") df_c = cde.get_bbref_fielding_df("c", season) print("Pulling first base defense...") df_1b = cde.get_bbref_fielding_df("1b", season) print("Pulling second base defense...") df_2b = cde.get_bbref_fielding_df("2b", season) print("Pulling third base defense...") df_3b = cde.get_bbref_fielding_df("3b", season) print("Pulling short stop defense...") df_ss = cde.get_bbref_fielding_df("ss", season) print("Pulling left field defense...") df_lf = cde.get_bbref_fielding_df("lf", season) print("Pulling center field defense...") df_cf = cde.get_bbref_fielding_df("cf", season) print("Pulling right field defense...") df_rf = cde.get_bbref_fielding_df("rf", season) print("Pulling outfield defense...") df_of = cde.get_bbref_fielding_df("of", season) print("Positions data is retrieved") await cde.create_positions( offense_stats, season_pct, post_batters, df_c, df_1b, df_2b, df_3b, df_ss, df_lf, df_cf, df_rf, df_of, ) async def run_batters( cardset: dict, input_path: str, post_players: bool, card_base_url: str, release_directory: str, player_description: str, season_pct: float, post_batters: bool, pull_fielding: bool, season: int, is_liveseries: bool, ignore_limits: bool, is_custom: bool = False, ): print("Pulling PD player IDs...") pd_players = await pd_players_df(cardset["id"]) print("Reading batting stats...") all_stats = get_batting_stats(file_path=input_path, ignore_limits=ignore_limits) print(f"Processed {len(all_stats.values)} batters\n") bat_step1 = match_player_lines(all_stats, pd_players, is_custom) if post_players: new_batters = await create_new_players( bat_step1, cardset, card_base_url, release_directory, player_description ) else: new_batters = 0 # Custom Cardsets if cardset["id"] in [16]: offense_stats = pd.merge( bat_step1, await pd_battingcards_df(cardset["id"]), on="player_id" ).set_index("key_bbref", drop=False) else: bat_step2 = get_run_stat_df(bat_step1, input_path) offense_stats = await calculate_batting_cards( bat_step2, cardset, season_pct, post_batters ) del bat_step2 del bat_step1, all_stats await calculate_batting_ratings(offense_stats, post_batters) if pull_fielding: await run_batter_fielding(season, offense_stats, season_pct, post_batters) await post_player_updates( cardset, card_base_url, release_directory, player_description, is_liveseries, post_batters, is_custom, season, ) return {"tot_batters": len(offense_stats.index), "new_batters": new_batters}