""" Fix script to regenerate cardpositions for cardset 27 batters. This addresses the bug where batter positions were deleted but never recreated due to script interruption on 2025-12-07. """ import asyncio import pandas as pd from db_calls import db_get, db_put, db_delete from exceptions import logger import defenders.calcs_defense as cde # Configuration CARDSET_ID = 27 DATA_INPUT_FILE_PATH = 'data-input/2005 Live Cardset/' async def get_batters_from_api(): """Fetch all batters (players with battingcards) from the API for cardset 27.""" print(f'Fetching batters from cardset {CARDSET_ID}...') # Get all players in cardset resp = await db_get('players', params=[ ('cardset_id', CARDSET_ID), ('short_output', True) ]) if not resp or resp.get('count', 0) == 0: print('No players found!') return [] # Filter to batters only (those with battingcard URLs) batters = [p for p in resp['players'] if 'batting' in p.get('image', '')] print(f'Found {len(batters)} batters') return batters def calc_positions_for_batters(batters: list) -> pd.DataFrame: """Calculate position data for all batters using defense CSV files.""" print(f'Loading defense CSV files from {DATA_INPUT_FILE_PATH}...') df_c = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_c.csv').set_index('key_bbref') df_1b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_1b.csv').set_index('key_bbref') df_2b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_2b.csv').set_index('key_bbref') df_3b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_3b.csv').set_index('key_bbref') df_ss = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_ss.csv').set_index('key_bbref') df_lf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_lf.csv').set_index('key_bbref') df_cf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_cf.csv').set_index('key_bbref') df_rf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_rf.csv').set_index('key_bbref') df_of = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_of.csv').set_index('key_bbref') all_pos = [] season_pct = 1.0 for batter in batters: key_bbref = batter.get('bbref_id') player_id = batter.get('player_id') player_name = batter.get('p_name', 'Unknown') if not key_bbref: print(f' Warning: No bbref_id for player {player_id} ({player_name})') continue no_data = True # Process infield positions for pos_df, position in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]: if key_bbref in pos_df.index: try: if 'bis_runs_total' in pos_df.columns: average_range = (int(pos_df.at[key_bbref, 'tz_runs_total']) + int(pos_df.at[key_bbref, 'bis_runs_total']) + min( int(pos_df.at[key_bbref, 'tz_runs_total']), int(pos_df.at[key_bbref, 'bis_runs_total']) )) / 3 else: average_range = pos_df.at[key_bbref, 'tz_runs_total'] if float(pos_df.at[key_bbref, 'Inn_def']) >= 10.0: all_pos.append({ "player_id": player_id, "position": position.upper(), "innings": float(pos_df.at[key_bbref, 'Inn_def']), "range": cde.get_if_range( pos_code=position, tz_runs=round(average_range), r_dp=0, season_pct=season_pct ), "error": cde.get_any_error( pos_code=position, errors=int(pos_df.at[key_bbref, 'E_def']), chances=int(pos_df.at[key_bbref, 'chances']), season_pct=season_pct ) }) no_data = False except Exception as e: logger.info(f'Infield position failed for {player_name}: {e}') # Process outfield positions of_arms = [] of_payloads = [] for pos_df, position in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]: if key_bbref in pos_df.index: try: if 'bis_runs_total' in pos_df.columns: average_range = (int(pos_df.at[key_bbref, 'tz_runs_total']) + int(pos_df.at[key_bbref, 'bis_runs_total']) + min( int(pos_df.at[key_bbref, 'tz_runs_total']), int(pos_df.at[key_bbref, 'bis_runs_total']) )) / 3 else: average_range = pos_df.at[key_bbref, 'tz_runs_total'] if float(pos_df.at[key_bbref, 'Inn_def']) >= 10.0: of_payloads.append({ "player_id": player_id, "position": position.upper(), "innings": float(pos_df.at[key_bbref, 'Inn_def']), "range": cde.get_of_range( pos_code=position, tz_runs=round(average_range), season_pct=season_pct ) }) of_run_rating = 'bis_runs_outfield' if 'bis_runs_outfield' in pos_df.columns else 'tz_runs_total' of_arms.append(int(pos_df.at[key_bbref, of_run_rating])) no_data = False except Exception as e: logger.info(f'Outfield position failed for {player_name}: {e}') # Add arm/error to outfield positions if key_bbref in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0: try: error_rating = cde.get_any_error( pos_code='of', errors=int(df_of.at[key_bbref, 'E_def']), chances=int(df_of.at[key_bbref, 'chances']), season_pct=season_pct ) arm_rating = cde.arm_outfield(of_arms) for f in of_payloads: f['error'] = error_rating f['arm'] = arm_rating all_pos.append(f) except Exception as e: logger.info(f'Outfield arm/error failed for {player_name}: {e}') # Process catcher if key_bbref in df_c.index: try: run_rating = 'bis_runs_catcher_sb' if 'bis_runs_catcher_sb' in df_c else 'tz_runs_catcher' if df_c.at[key_bbref, 'SB'] + df_c.at[key_bbref, 'CS'] == 0: arm_rating = 3 else: arm_rating = cde.arm_catcher( cs_pct=df_c.at[key_bbref, 'caught_stealing_perc'], raa=int(df_c.at[key_bbref, run_rating]), season_pct=season_pct ) if float(df_c.at[key_bbref, 'Inn_def']) >= 10.0: all_pos.append({ "player_id": player_id, "position": 'C', "innings": float(df_c.at[key_bbref, 'Inn_def']), "range": cde.range_catcher( rs_value=int(df_c.at[key_bbref, 'tz_runs_catcher']), season_pct=season_pct ), "error": cde.get_any_error( pos_code='c', errors=int(df_c.at[key_bbref, 'E_def']), chances=int(df_c.at[key_bbref, 'chances']), season_pct=season_pct ), "arm": arm_rating, "pb": cde.pb_catcher( pb=int(df_c.at[key_bbref, 'PB']), innings=int(float(df_c.at[key_bbref, 'Inn_def'])), season_pct=season_pct ), "overthrow": cde.ot_catcher( errors=int(df_c.at[key_bbref, 'E_def']), chances=int(df_c.at[key_bbref, 'chances']), season_pct=season_pct ) }) no_data = False except Exception as e: logger.info(f'Catcher position failed for {player_name}: {e}') # DH fallback if no defensive data if no_data: all_pos.append({ "player_id": player_id, "position": 'DH', "innings": 100 # Default innings for DH }) print(f'Calculated {len(all_pos)} position records for {len(batters)} batters') return pd.DataFrame(all_pos) async def delete_batter_positions(batter_player_ids: list): """Delete existing batter cardpositions for cardset 27.""" print(f'Fetching existing cardpositions for cardset {CARDSET_ID}...') existing = await db_get('cardpositions', params=[('cardset_id', CARDSET_ID)]) if not existing or existing.get('count', 0) == 0: print('No existing positions found') return 0 # Only delete positions for batters (not pitchers) batter_ids_set = set(batter_player_ids) positions_to_delete = [ p for p in existing['positions'] if p['player']['player_id'] in batter_ids_set ] print(f'Found {len(positions_to_delete)} batter positions to delete (keeping pitcher positions)') deleted = 0 for pos in positions_to_delete: try: await db_delete('cardpositions', object_id=pos['id'], timeout=1) deleted += 1 if deleted % 50 == 0: print(f' Deleted {deleted}/{len(positions_to_delete)} positions...') except Exception as e: print(f' Warning: Failed to delete position {pos["id"]}: {e}') print(f'Deleted {deleted} batter positions') return deleted async def post_positions(pos_df: pd.DataFrame): """POST the new cardpositions to the API.""" all_pos = [] for _, row in pos_df.iterrows(): clean_row = row.dropna() new_val = clean_row.to_dict() new_val['player_id'] = int(row['player_id']) all_pos.append(new_val) print(f'POSTing {len(all_pos)} cardpositions...') resp = await db_put('cardpositions', payload={'positions': all_pos}, timeout=10) if resp is not None: print(f'Successfully posted positions: {resp}') return True else: print('Failed to post positions!') return False async def main(): print('='*60) print('CARDPOSITIONS FIX SCRIPT') print('='*60) print(f'Target: Cardset {CARDSET_ID} (2005 Live)') print() # Step 1: Get batters from API batters = await get_batters_from_api() if not batters: print('No batters to process!') return batter_player_ids = [b['player_id'] for b in batters] # Step 2: Delete existing batter positions await delete_batter_positions(batter_player_ids) # Step 3: Calculate new positions pos_df = calc_positions_for_batters(batters) # Step 4: Post new positions success = await post_positions(pos_df) print() print('='*60) if success: print('FIX COMPLETE - Batter positions have been regenerated!') else: print('FIX FAILED - Check logs for errors') print('='*60) if __name__ == '__main__': asyncio.run(main())