Fix switch hitter detection for Rollins, Posada, and all switch hitters

Two bugs were preventing switch hitters from being correctly identified:

1. Missing handedness indicator in player names
   - Player names need special characters appended (* for left, # for switch)
   - new_player_payload() now appends '#' for switch hitters

2. Overly strict threshold in get_bat_hand()
   - Required 10+ total PAs to classify as switch hitter
   - Now correctly identifies ANY player who batted from both sides as 'S'
   - Removes arbitrary PA threshold that caused misclassification

Impact: Fixes Jimmie Rollins and Jorge Posada showing as 'R' instead of 'S'
       Applies to all switch hitters in retrosheet-based cardsets

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Cal Corum 2025-11-11 16:04:33 -06:00
parent 49b976ce37
commit a20348ef7d

View File

@ -222,19 +222,23 @@ def get_player_ids(plays: pd.DataFrame, which: Literal['batters', 'pitchers']) -
r_vs_l = 0 if 'r' not in pa_vl else pa_vl['r']
r_vs_r = 0 if 'r' not in pa_vr else pa_vr['r']
# If player ONLY batted from one side (zero PAs from other side), classify as single-handed
if sum([l_vs_l, l_vs_r]) == 0 and sum([r_vs_l, r_vs_r]) > 0:
return 'R'
elif sum([l_vs_l, l_vs_r]) > 0 and sum([r_vs_l, r_vs_r]) == 0:
return 'L'
if sum([l_vs_l, l_vs_r, r_vs_l, r_vs_r]) < 10:
if sum([l_vs_l, l_vs_r]) > sum([r_vs_l, r_vs_r]):
return 'L'
else:
return 'R'
else:
# If player batted from both sides (even if limited sample), they're a switch hitter
# This correctly identifies switch hitters regardless of total PA count
if sum([l_vs_l, l_vs_r]) > 0 and sum([r_vs_l, r_vs_r]) > 0:
return 'S'
# Fallback for edge cases (shouldn't reach here in normal flow)
if sum([l_vs_l, l_vs_r]) > sum([r_vs_l, r_vs_r]):
return 'L'
else:
return 'R'
def get_pitch_hand(row):
first_event = plays.drop_duplicates('pitcher_id').loc[plays.pitcher_id == row['key_retro'], 'pitcher_hand']
return first_event.item()
@ -1096,8 +1100,15 @@ async def get_or_post_players(bstat_df: pd.DataFrame = None, bat_rat_df: pd.Data
return mlb_player
def new_player_payload(row, ratings_df: pd.DataFrame):
# Append handedness indicator to player name (* for left, # for switch)
name_suffix = ''
if row.get('bat_hand') == 'L':
name_suffix = '*'
elif row.get('bat_hand') == 'S':
name_suffix = '#'
return {
'p_name': f'{row["use_name"]} {row["last_name"]}',
'p_name': f'{row["use_name"]} {row["last_name"]}{name_suffix}',
'cost': f'{ratings_df.loc[row['key_bbref']]["cost"]}',
'image': f'change-me',
'mlbclub': CLUB_LIST[row['Tm']],