Standardize formatting with black and apply ruff auto-fixes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
323 lines
12 KiB
Python
323 lines
12 KiB
Python
"""
|
|
Fix script to regenerate cardpositions for cardset 27 batters.
|
|
This addresses the bug where batter positions were deleted but never recreated
|
|
due to script interruption on 2025-12-07.
|
|
"""
|
|
|
|
import asyncio
|
|
import pandas as pd
|
|
|
|
from db_calls import db_get, db_put, db_delete
|
|
from exceptions import logger
|
|
import defenders.calcs_defense as cde
|
|
|
|
# Configuration
|
|
CARDSET_ID = 27
|
|
DATA_INPUT_FILE_PATH = "data-input/2005 Live Cardset/"
|
|
|
|
|
|
async def get_batters_from_api():
|
|
"""Fetch all batters (players with battingcards) from the API for cardset 27."""
|
|
print(f"Fetching batters from cardset {CARDSET_ID}...")
|
|
|
|
# Get all players in cardset
|
|
resp = await db_get(
|
|
"players", params=[("cardset_id", CARDSET_ID), ("short_output", True)]
|
|
)
|
|
|
|
if not resp or resp.get("count", 0) == 0:
|
|
print("No players found!")
|
|
return []
|
|
|
|
# Filter to batters only (those with battingcard URLs)
|
|
batters = [p for p in resp["players"] if "batting" in p.get("image", "")]
|
|
print(f"Found {len(batters)} batters")
|
|
return batters
|
|
|
|
|
|
def calc_positions_for_batters(batters: list) -> pd.DataFrame:
|
|
"""Calculate position data for all batters using defense CSV files."""
|
|
print(f"Loading defense CSV files from {DATA_INPUT_FILE_PATH}...")
|
|
|
|
df_c = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_c.csv").set_index("key_bbref")
|
|
df_1b = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_1b.csv").set_index("key_bbref")
|
|
df_2b = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_2b.csv").set_index("key_bbref")
|
|
df_3b = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_3b.csv").set_index("key_bbref")
|
|
df_ss = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_ss.csv").set_index("key_bbref")
|
|
df_lf = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_lf.csv").set_index("key_bbref")
|
|
df_cf = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_cf.csv").set_index("key_bbref")
|
|
df_rf = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_rf.csv").set_index("key_bbref")
|
|
df_of = pd.read_csv(f"{DATA_INPUT_FILE_PATH}defense_of.csv").set_index("key_bbref")
|
|
|
|
all_pos = []
|
|
season_pct = 1.0
|
|
|
|
for batter in batters:
|
|
key_bbref = batter.get("bbref_id")
|
|
player_id = batter.get("player_id")
|
|
player_name = batter.get("p_name", "Unknown")
|
|
|
|
if not key_bbref:
|
|
print(f" Warning: No bbref_id for player {player_id} ({player_name})")
|
|
continue
|
|
|
|
no_data = True
|
|
|
|
# Process infield positions
|
|
for pos_df, position in [
|
|
(df_1b, "1b"),
|
|
(df_2b, "2b"),
|
|
(df_3b, "3b"),
|
|
(df_ss, "ss"),
|
|
]:
|
|
if key_bbref in pos_df.index:
|
|
try:
|
|
if "bis_runs_total" in pos_df.columns:
|
|
average_range = (
|
|
int(pos_df.at[key_bbref, "tz_runs_total"])
|
|
+ int(pos_df.at[key_bbref, "bis_runs_total"])
|
|
+ min(
|
|
int(pos_df.at[key_bbref, "tz_runs_total"]),
|
|
int(pos_df.at[key_bbref, "bis_runs_total"]),
|
|
)
|
|
) / 3
|
|
else:
|
|
average_range = pos_df.at[key_bbref, "tz_runs_total"]
|
|
|
|
if float(pos_df.at[key_bbref, "Inn_def"]) >= 10.0:
|
|
all_pos.append(
|
|
{
|
|
"player_id": player_id,
|
|
"position": position.upper(),
|
|
"innings": float(pos_df.at[key_bbref, "Inn_def"]),
|
|
"range": cde.get_if_range(
|
|
pos_code=position,
|
|
tz_runs=round(average_range),
|
|
r_dp=0,
|
|
season_pct=season_pct,
|
|
),
|
|
"error": cde.get_any_error(
|
|
pos_code=position,
|
|
errors=int(pos_df.at[key_bbref, "E_def"]),
|
|
chances=int(pos_df.at[key_bbref, "chances"]),
|
|
season_pct=season_pct,
|
|
),
|
|
}
|
|
)
|
|
no_data = False
|
|
except Exception as e:
|
|
logger.info(f"Infield position failed for {player_name}: {e}")
|
|
|
|
# Process outfield positions
|
|
of_arms = []
|
|
of_payloads = []
|
|
for pos_df, position in [(df_lf, "lf"), (df_cf, "cf"), (df_rf, "rf")]:
|
|
if key_bbref in pos_df.index:
|
|
try:
|
|
if "bis_runs_total" in pos_df.columns:
|
|
average_range = (
|
|
int(pos_df.at[key_bbref, "tz_runs_total"])
|
|
+ int(pos_df.at[key_bbref, "bis_runs_total"])
|
|
+ min(
|
|
int(pos_df.at[key_bbref, "tz_runs_total"]),
|
|
int(pos_df.at[key_bbref, "bis_runs_total"]),
|
|
)
|
|
) / 3
|
|
else:
|
|
average_range = pos_df.at[key_bbref, "tz_runs_total"]
|
|
|
|
if float(pos_df.at[key_bbref, "Inn_def"]) >= 10.0:
|
|
of_payloads.append(
|
|
{
|
|
"player_id": player_id,
|
|
"position": position.upper(),
|
|
"innings": float(pos_df.at[key_bbref, "Inn_def"]),
|
|
"range": cde.get_of_range(
|
|
pos_code=position,
|
|
tz_runs=round(average_range),
|
|
season_pct=season_pct,
|
|
),
|
|
}
|
|
)
|
|
of_run_rating = (
|
|
"bis_runs_outfield"
|
|
if "bis_runs_outfield" in pos_df.columns
|
|
else "tz_runs_total"
|
|
)
|
|
of_arms.append(int(pos_df.at[key_bbref, of_run_rating]))
|
|
no_data = False
|
|
except Exception as e:
|
|
logger.info(f"Outfield position failed for {player_name}: {e}")
|
|
|
|
# Add arm/error to outfield positions
|
|
if key_bbref in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0:
|
|
try:
|
|
error_rating = cde.get_any_error(
|
|
pos_code="of",
|
|
errors=int(df_of.at[key_bbref, "E_def"]),
|
|
chances=int(df_of.at[key_bbref, "chances"]),
|
|
season_pct=season_pct,
|
|
)
|
|
arm_rating = cde.arm_outfield(of_arms)
|
|
for f in of_payloads:
|
|
f["error"] = error_rating
|
|
f["arm"] = arm_rating
|
|
all_pos.append(f)
|
|
except Exception as e:
|
|
logger.info(f"Outfield arm/error failed for {player_name}: {e}")
|
|
|
|
# Process catcher
|
|
if key_bbref in df_c.index:
|
|
try:
|
|
run_rating = (
|
|
"bis_runs_catcher_sb"
|
|
if "bis_runs_catcher_sb" in df_c
|
|
else "tz_runs_catcher"
|
|
)
|
|
|
|
if df_c.at[key_bbref, "SB"] + df_c.at[key_bbref, "CS"] == 0:
|
|
arm_rating = 3
|
|
else:
|
|
arm_rating = cde.arm_catcher(
|
|
cs_pct=df_c.at[key_bbref, "caught_stealing_perc"],
|
|
raa=int(df_c.at[key_bbref, run_rating]),
|
|
season_pct=season_pct,
|
|
)
|
|
|
|
if float(df_c.at[key_bbref, "Inn_def"]) >= 10.0:
|
|
all_pos.append(
|
|
{
|
|
"player_id": player_id,
|
|
"position": "C",
|
|
"innings": float(df_c.at[key_bbref, "Inn_def"]),
|
|
"range": cde.range_catcher(
|
|
rs_value=int(df_c.at[key_bbref, "tz_runs_catcher"]),
|
|
season_pct=season_pct,
|
|
),
|
|
"error": cde.get_any_error(
|
|
pos_code="c",
|
|
errors=int(df_c.at[key_bbref, "E_def"]),
|
|
chances=int(df_c.at[key_bbref, "chances"]),
|
|
season_pct=season_pct,
|
|
),
|
|
"arm": arm_rating,
|
|
"pb": cde.pb_catcher(
|
|
pb=int(df_c.at[key_bbref, "PB"]),
|
|
innings=int(float(df_c.at[key_bbref, "Inn_def"])),
|
|
season_pct=season_pct,
|
|
),
|
|
"overthrow": cde.ot_catcher(
|
|
errors=int(df_c.at[key_bbref, "E_def"]),
|
|
chances=int(df_c.at[key_bbref, "chances"]),
|
|
season_pct=season_pct,
|
|
),
|
|
}
|
|
)
|
|
no_data = False
|
|
except Exception as e:
|
|
logger.info(f"Catcher position failed for {player_name}: {e}")
|
|
|
|
# DH fallback if no defensive data
|
|
if no_data:
|
|
all_pos.append(
|
|
{
|
|
"player_id": player_id,
|
|
"position": "DH",
|
|
"innings": 100, # Default innings for DH
|
|
}
|
|
)
|
|
|
|
print(f"Calculated {len(all_pos)} position records for {len(batters)} batters")
|
|
return pd.DataFrame(all_pos)
|
|
|
|
|
|
async def delete_batter_positions(batter_player_ids: list):
|
|
"""Delete existing batter cardpositions for cardset 27."""
|
|
print(f"Fetching existing cardpositions for cardset {CARDSET_ID}...")
|
|
|
|
existing = await db_get("cardpositions", params=[("cardset_id", CARDSET_ID)])
|
|
if not existing or existing.get("count", 0) == 0:
|
|
print("No existing positions found")
|
|
return 0
|
|
|
|
# Only delete positions for batters (not pitchers)
|
|
batter_ids_set = set(batter_player_ids)
|
|
positions_to_delete = [
|
|
p for p in existing["positions"] if p["player"]["player_id"] in batter_ids_set
|
|
]
|
|
|
|
print(
|
|
f"Found {len(positions_to_delete)} batter positions to delete (keeping pitcher positions)"
|
|
)
|
|
|
|
deleted = 0
|
|
for pos in positions_to_delete:
|
|
try:
|
|
await db_delete("cardpositions", object_id=pos["id"], timeout=1)
|
|
deleted += 1
|
|
if deleted % 50 == 0:
|
|
print(f" Deleted {deleted}/{len(positions_to_delete)} positions...")
|
|
except Exception as e:
|
|
print(f' Warning: Failed to delete position {pos["id"]}: {e}')
|
|
|
|
print(f"Deleted {deleted} batter positions")
|
|
return deleted
|
|
|
|
|
|
async def post_positions(pos_df: pd.DataFrame):
|
|
"""POST the new cardpositions to the API."""
|
|
all_pos = []
|
|
|
|
for _, row in pos_df.iterrows():
|
|
clean_row = row.dropna()
|
|
new_val = clean_row.to_dict()
|
|
new_val["player_id"] = int(row["player_id"])
|
|
all_pos.append(new_val)
|
|
|
|
print(f"POSTing {len(all_pos)} cardpositions...")
|
|
resp = await db_put("cardpositions", payload={"positions": all_pos}, timeout=10)
|
|
|
|
if resp is not None:
|
|
print(f"Successfully posted positions: {resp}")
|
|
return True
|
|
else:
|
|
print("Failed to post positions!")
|
|
return False
|
|
|
|
|
|
async def main():
|
|
print("=" * 60)
|
|
print("CARDPOSITIONS FIX SCRIPT")
|
|
print("=" * 60)
|
|
print(f"Target: Cardset {CARDSET_ID} (2005 Live)")
|
|
print()
|
|
|
|
# Step 1: Get batters from API
|
|
batters = await get_batters_from_api()
|
|
if not batters:
|
|
print("No batters to process!")
|
|
return
|
|
|
|
batter_player_ids = [b["player_id"] for b in batters]
|
|
|
|
# Step 2: Delete existing batter positions
|
|
await delete_batter_positions(batter_player_ids)
|
|
|
|
# Step 3: Calculate new positions
|
|
pos_df = calc_positions_for_batters(batters)
|
|
|
|
# Step 4: Post new positions
|
|
success = await post_positions(pos_df)
|
|
|
|
print()
|
|
print("=" * 60)
|
|
if success:
|
|
print("FIX COMPLETE - Batter positions have been regenerated!")
|
|
else:
|
|
print("FIX FAILED - Check logs for errors")
|
|
print("=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|