paper-dynasty-card-creation/fix_cardpositions.py
Cal Corum b10cfcfd09 Fix check_cards_and_upload.py to regenerate cards from PD API
- Changed card URL generation to fetch from PD API endpoint
  (/v2/players/{id}/battingcard) instead of existing S3 URL
- This ensures database changes (like cardpositions) are reflected
  in regenerated card images
- Added fix_cardpositions.py utility for regenerating batter positions
  without re-running full retrosheet_data.py script

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-07 17:05:47 -06:00

297 lines
12 KiB
Python

"""
Fix script to regenerate cardpositions for cardset 27 batters.
This addresses the bug where batter positions were deleted but never recreated
due to script interruption on 2025-12-07.
"""
import asyncio
import pandas as pd
from db_calls import db_get, db_put, db_delete
from exceptions import logger
import defenders.calcs_defense as cde
# Configuration
CARDSET_ID = 27
DATA_INPUT_FILE_PATH = 'data-input/2005 Live Cardset/'
async def get_batters_from_api():
"""Fetch all batters (players with battingcards) from the API for cardset 27."""
print(f'Fetching batters from cardset {CARDSET_ID}...')
# Get all players in cardset
resp = await db_get('players', params=[
('cardset_id', CARDSET_ID),
('short_output', True)
])
if not resp or resp.get('count', 0) == 0:
print('No players found!')
return []
# Filter to batters only (those with battingcard URLs)
batters = [p for p in resp['players'] if 'batting' in p.get('image', '')]
print(f'Found {len(batters)} batters')
return batters
def calc_positions_for_batters(batters: list) -> pd.DataFrame:
"""Calculate position data for all batters using defense CSV files."""
print(f'Loading defense CSV files from {DATA_INPUT_FILE_PATH}...')
df_c = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_c.csv').set_index('key_bbref')
df_1b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_1b.csv').set_index('key_bbref')
df_2b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_2b.csv').set_index('key_bbref')
df_3b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_3b.csv').set_index('key_bbref')
df_ss = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_ss.csv').set_index('key_bbref')
df_lf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_lf.csv').set_index('key_bbref')
df_cf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_cf.csv').set_index('key_bbref')
df_rf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_rf.csv').set_index('key_bbref')
df_of = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_of.csv').set_index('key_bbref')
all_pos = []
season_pct = 1.0
for batter in batters:
key_bbref = batter.get('bbref_id')
player_id = batter.get('player_id')
player_name = batter.get('p_name', 'Unknown')
if not key_bbref:
print(f' Warning: No bbref_id for player {player_id} ({player_name})')
continue
no_data = True
# Process infield positions
for pos_df, position in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]:
if key_bbref in pos_df.index:
try:
if 'bis_runs_total' in pos_df.columns:
average_range = (int(pos_df.at[key_bbref, 'tz_runs_total']) +
int(pos_df.at[key_bbref, 'bis_runs_total']) +
min(
int(pos_df.at[key_bbref, 'tz_runs_total']),
int(pos_df.at[key_bbref, 'bis_runs_total'])
)) / 3
else:
average_range = pos_df.at[key_bbref, 'tz_runs_total']
if float(pos_df.at[key_bbref, 'Inn_def']) >= 10.0:
all_pos.append({
"player_id": player_id,
"position": position.upper(),
"innings": float(pos_df.at[key_bbref, 'Inn_def']),
"range": cde.get_if_range(
pos_code=position,
tz_runs=round(average_range),
r_dp=0,
season_pct=season_pct
),
"error": cde.get_any_error(
pos_code=position,
errors=int(pos_df.at[key_bbref, 'E_def']),
chances=int(pos_df.at[key_bbref, 'chances']),
season_pct=season_pct
)
})
no_data = False
except Exception as e:
logger.info(f'Infield position failed for {player_name}: {e}')
# Process outfield positions
of_arms = []
of_payloads = []
for pos_df, position in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]:
if key_bbref in pos_df.index:
try:
if 'bis_runs_total' in pos_df.columns:
average_range = (int(pos_df.at[key_bbref, 'tz_runs_total']) +
int(pos_df.at[key_bbref, 'bis_runs_total']) +
min(
int(pos_df.at[key_bbref, 'tz_runs_total']),
int(pos_df.at[key_bbref, 'bis_runs_total'])
)) / 3
else:
average_range = pos_df.at[key_bbref, 'tz_runs_total']
if float(pos_df.at[key_bbref, 'Inn_def']) >= 10.0:
of_payloads.append({
"player_id": player_id,
"position": position.upper(),
"innings": float(pos_df.at[key_bbref, 'Inn_def']),
"range": cde.get_of_range(
pos_code=position,
tz_runs=round(average_range),
season_pct=season_pct
)
})
of_run_rating = 'bis_runs_outfield' if 'bis_runs_outfield' in pos_df.columns else 'tz_runs_total'
of_arms.append(int(pos_df.at[key_bbref, of_run_rating]))
no_data = False
except Exception as e:
logger.info(f'Outfield position failed for {player_name}: {e}')
# Add arm/error to outfield positions
if key_bbref in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0:
try:
error_rating = cde.get_any_error(
pos_code='of',
errors=int(df_of.at[key_bbref, 'E_def']),
chances=int(df_of.at[key_bbref, 'chances']),
season_pct=season_pct
)
arm_rating = cde.arm_outfield(of_arms)
for f in of_payloads:
f['error'] = error_rating
f['arm'] = arm_rating
all_pos.append(f)
except Exception as e:
logger.info(f'Outfield arm/error failed for {player_name}: {e}')
# Process catcher
if key_bbref in df_c.index:
try:
run_rating = 'bis_runs_catcher_sb' if 'bis_runs_catcher_sb' in df_c else 'tz_runs_catcher'
if df_c.at[key_bbref, 'SB'] + df_c.at[key_bbref, 'CS'] == 0:
arm_rating = 3
else:
arm_rating = cde.arm_catcher(
cs_pct=df_c.at[key_bbref, 'caught_stealing_perc'],
raa=int(df_c.at[key_bbref, run_rating]),
season_pct=season_pct
)
if float(df_c.at[key_bbref, 'Inn_def']) >= 10.0:
all_pos.append({
"player_id": player_id,
"position": 'C',
"innings": float(df_c.at[key_bbref, 'Inn_def']),
"range": cde.range_catcher(
rs_value=int(df_c.at[key_bbref, 'tz_runs_catcher']),
season_pct=season_pct
),
"error": cde.get_any_error(
pos_code='c',
errors=int(df_c.at[key_bbref, 'E_def']),
chances=int(df_c.at[key_bbref, 'chances']),
season_pct=season_pct
),
"arm": arm_rating,
"pb": cde.pb_catcher(
pb=int(df_c.at[key_bbref, 'PB']),
innings=int(float(df_c.at[key_bbref, 'Inn_def'])),
season_pct=season_pct
),
"overthrow": cde.ot_catcher(
errors=int(df_c.at[key_bbref, 'E_def']),
chances=int(df_c.at[key_bbref, 'chances']),
season_pct=season_pct
)
})
no_data = False
except Exception as e:
logger.info(f'Catcher position failed for {player_name}: {e}')
# DH fallback if no defensive data
if no_data:
all_pos.append({
"player_id": player_id,
"position": 'DH',
"innings": 100 # Default innings for DH
})
print(f'Calculated {len(all_pos)} position records for {len(batters)} batters')
return pd.DataFrame(all_pos)
async def delete_batter_positions(batter_player_ids: list):
"""Delete existing batter cardpositions for cardset 27."""
print(f'Fetching existing cardpositions for cardset {CARDSET_ID}...')
existing = await db_get('cardpositions', params=[('cardset_id', CARDSET_ID)])
if not existing or existing.get('count', 0) == 0:
print('No existing positions found')
return 0
# Only delete positions for batters (not pitchers)
batter_ids_set = set(batter_player_ids)
positions_to_delete = [
p for p in existing['positions']
if p['player']['player_id'] in batter_ids_set
]
print(f'Found {len(positions_to_delete)} batter positions to delete (keeping pitcher positions)')
deleted = 0
for pos in positions_to_delete:
try:
await db_delete('cardpositions', object_id=pos['id'], timeout=1)
deleted += 1
if deleted % 50 == 0:
print(f' Deleted {deleted}/{len(positions_to_delete)} positions...')
except Exception as e:
print(f' Warning: Failed to delete position {pos["id"]}: {e}')
print(f'Deleted {deleted} batter positions')
return deleted
async def post_positions(pos_df: pd.DataFrame):
"""POST the new cardpositions to the API."""
all_pos = []
for _, row in pos_df.iterrows():
clean_row = row.dropna()
new_val = clean_row.to_dict()
new_val['player_id'] = int(row['player_id'])
all_pos.append(new_val)
print(f'POSTing {len(all_pos)} cardpositions...')
resp = await db_put('cardpositions', payload={'positions': all_pos}, timeout=10)
if resp is not None:
print(f'Successfully posted positions: {resp}')
return True
else:
print('Failed to post positions!')
return False
async def main():
print('='*60)
print('CARDPOSITIONS FIX SCRIPT')
print('='*60)
print(f'Target: Cardset {CARDSET_ID} (2005 Live)')
print()
# Step 1: Get batters from API
batters = await get_batters_from_api()
if not batters:
print('No batters to process!')
return
batter_player_ids = [b['player_id'] for b in batters]
# Step 2: Delete existing batter positions
await delete_batter_positions(batter_player_ids)
# Step 3: Calculate new positions
pos_df = calc_positions_for_batters(batters)
# Step 4: Post new positions
success = await post_positions(pos_df)
print()
print('='*60)
if success:
print('FIX COMPLETE - Batter positions have been regenerated!')
else:
print('FIX FAILED - Check logs for errors')
print('='*60)
if __name__ == '__main__':
asyncio.run(main())