Added one-time utility scripts used to prepare 2005 defense CSV files for compatibility with retrosheet_data.py. Scripts: - rename_defense_columns.py: Renamed initial batch of defense columns - RF/9 → range_factor_per_nine - RF/G → range_factor_per_game - DP → DP_def, E → E_def, Ch → chances, Inn → Inn_def - CS% → caught_stealing_perc, PO → pickoffs - Name-additional → key_bbref - rename_additional_defense_columns.py: Second batch of column renames - Fld% → fielding_perc - Rtot → tz_runs_total, Rtot/yr → tz_runs_total_per_season - Rtz → tz_runs_field, Rdp → tz_runs_infield - undo_po_rename.py: Reverted PO → pickoffs for position players - Kept 'pickoffs' for defense_p.csv (pitchers) - Changed back to 'PO' for all other positions (c, 1b, 2b, etc.) - test_retrosheet_integration.py: Integration test for retrosheet_transformer - Validates batting and pitching stats loading - Tests date range filtering - Verifies player counts These scripts have already been executed and the defense files are properly formatted. Kept for historical reference and documentation. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
72 lines
1.6 KiB
Python
72 lines
1.6 KiB
Python
"""
|
|
Script to rename defense CSV column headers to match expected format.
|
|
"""
|
|
|
|
import pandas as pd
|
|
import os
|
|
|
|
# Column mapping
|
|
COLUMN_MAPPING = {
|
|
'RF/9': 'range_factor_per_nine',
|
|
'RF/G': 'range_factor_per_game',
|
|
'DP': 'DP_def',
|
|
'E': 'E_def',
|
|
'Ch': 'chances',
|
|
'Inn': 'Inn_def',
|
|
'CS%': 'caught_stealing_perc',
|
|
'PO': 'pickoffs',
|
|
'Name-additional': 'key_bbref'
|
|
}
|
|
|
|
# Directory with defense files
|
|
DATA_DIR = 'data-input/2005 Live Cardset/'
|
|
|
|
# Defense files to process
|
|
DEFENSE_FILES = [
|
|
'defense_c.csv',
|
|
'defense_1b.csv',
|
|
'defense_2b.csv',
|
|
'defense_3b.csv',
|
|
'defense_ss.csv',
|
|
'defense_lf.csv',
|
|
'defense_cf.csv',
|
|
'defense_rf.csv',
|
|
'defense_of.csv',
|
|
'defense_p.csv'
|
|
]
|
|
|
|
print("Renaming defense CSV columns...")
|
|
print(f"Column mappings:")
|
|
for old, new in COLUMN_MAPPING.items():
|
|
print(f" {old} -> {new}")
|
|
print()
|
|
|
|
for filename in DEFENSE_FILES:
|
|
filepath = os.path.join(DATA_DIR, filename)
|
|
|
|
if not os.path.exists(filepath):
|
|
print(f"⚠ Skipping {filename} (not found)")
|
|
continue
|
|
|
|
# Read CSV
|
|
df = pd.read_csv(filepath)
|
|
|
|
# Track which columns were renamed
|
|
renamed = []
|
|
for old_col, new_col in COLUMN_MAPPING.items():
|
|
if old_col in df.columns:
|
|
renamed.append(f"{old_col} -> {new_col}")
|
|
|
|
# Rename columns
|
|
df = df.rename(columns=COLUMN_MAPPING)
|
|
|
|
# Save back
|
|
df.to_csv(filepath, index=False)
|
|
|
|
print(f"✓ {filename}: Renamed {len(renamed)} columns")
|
|
if renamed:
|
|
for r in renamed:
|
|
print(f" {r}")
|
|
|
|
print("\n✓ All defense files processed successfully!")
|