Added one-time utility scripts used to prepare 2005 defense CSV files for compatibility with retrosheet_data.py. Scripts: - rename_defense_columns.py: Renamed initial batch of defense columns - RF/9 → range_factor_per_nine - RF/G → range_factor_per_game - DP → DP_def, E → E_def, Ch → chances, Inn → Inn_def - CS% → caught_stealing_perc, PO → pickoffs - Name-additional → key_bbref - rename_additional_defense_columns.py: Second batch of column renames - Fld% → fielding_perc - Rtot → tz_runs_total, Rtot/yr → tz_runs_total_per_season - Rtz → tz_runs_field, Rdp → tz_runs_infield - undo_po_rename.py: Reverted PO → pickoffs for position players - Kept 'pickoffs' for defense_p.csv (pitchers) - Changed back to 'PO' for all other positions (c, 1b, 2b, etc.) - test_retrosheet_integration.py: Integration test for retrosheet_transformer - Validates batting and pitching stats loading - Tests date range filtering - Verifies player counts These scripts have already been executed and the defense files are properly formatted. Kept for historical reference and documentation. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
57 lines
1.6 KiB
Python
57 lines
1.6 KiB
Python
"""
|
|
Test script to verify retrosheet_data.py works with the new transformer.
|
|
"""
|
|
|
|
import sys
|
|
import logging
|
|
from retrosheet_data import get_base_batting_df, get_base_pitching_df, RETRO_FILE_PATH, EVENTS_FILENAME
|
|
|
|
# Set up logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"Testing Retrosheet Data Integration")
|
|
print(f"{'='*60}\n")
|
|
|
|
file_path = f'{RETRO_FILE_PATH}{EVENTS_FILENAME}'
|
|
print(f"Loading from: {file_path}")
|
|
|
|
# Test date range (full 2005 season)
|
|
start_date = 20050403 # Opening Day 2005
|
|
end_date = 20051003 # End of 2005 regular season
|
|
|
|
print(f"\nTest 1: Loading Batting Data")
|
|
print(f"Date range: {start_date} to {end_date}")
|
|
|
|
try:
|
|
all_plays_batting, batting_stats = get_base_batting_df(file_path, start_date, end_date)
|
|
print(f"✓ Success!")
|
|
print(f" - Total plays loaded: {len(all_plays_batting)}")
|
|
print(f" - Qualified batters: {len(batting_stats)}")
|
|
print(f"\nSample batting stats:")
|
|
print(batting_stats.head(3))
|
|
except Exception as e:
|
|
print(f"✗ Failed: {e}")
|
|
sys.exit(1)
|
|
|
|
print(f"\n{'-'*60}\n")
|
|
print(f"Test 2: Loading Pitching Data")
|
|
|
|
try:
|
|
all_plays_pitching, pitching_stats = get_base_pitching_df(file_path, start_date, end_date)
|
|
print(f"✓ Success!")
|
|
print(f" - Total plays loaded: {len(all_plays_pitching)}")
|
|
print(f" - Qualified pitchers: {len(pitching_stats)}")
|
|
print(f"\nSample pitching stats:")
|
|
print(pitching_stats.head(3))
|
|
except Exception as e:
|
|
print(f"✗ Failed: {e}")
|
|
sys.exit(1)
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"All tests passed! ✓")
|
|
print(f"{'='*60}\n")
|