172 lines
7.4 KiB
Python
172 lines
7.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Demo script to test the automated data fetcher without requiring pybaseball installation
|
|
"""
|
|
|
|
import asyncio
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from unittest.mock import Mock, patch
|
|
import tempfile
|
|
|
|
# Mock pybaseball for demo
|
|
mock_pb = Mock()
|
|
mock_pb.cache = Mock()
|
|
mock_pb.cache.enable = Mock()
|
|
|
|
# Create sample data that the fetcher would normally get from pybaseball
|
|
sample_pitching_data = pd.DataFrame({
|
|
'Name': ['Jacob deGrom', 'Gerrit Cole', 'Shane Bieber'],
|
|
'Team': ['NYM', 'NYY', 'CLE'],
|
|
'W': [15, 16, 13],
|
|
'L': [4, 8, 7],
|
|
'ERA': [2.38, 3.23, 3.28],
|
|
'IP': [201.1, 200.1, 214.1],
|
|
'SO': [255, 243, 259]
|
|
})
|
|
|
|
sample_batting_data = pd.DataFrame({
|
|
'Name': ['Ronald Acuña Jr.', 'Mookie Betts', 'Juan Soto'],
|
|
'Team': ['ATL', 'LAD', 'WSN'],
|
|
'G': [119, 142, 151],
|
|
'PA': [556, 614, 654],
|
|
'H': [148, 160, 145],
|
|
'HR': [41, 35, 29],
|
|
'SB': [73, 12, 9],
|
|
'CS': [11, 3, 4],
|
|
'SB%': [0.869, 0.8, 0.692],
|
|
'GDP': [5, 15, 20],
|
|
'R': [149, 122, 111],
|
|
'BB': [78, 65, 145],
|
|
'SO': [144, 111, 93],
|
|
'IDfg': ['2203', '13611', '19251']
|
|
})
|
|
|
|
sample_splits_data = pd.DataFrame({
|
|
'Split': ['vs LHP', 'vs RHP', 'Home', 'Away'],
|
|
'G': [40, 79, 60, 59],
|
|
'PA': [150, 406, 278, 278],
|
|
'H': [42, 106, 74, 74],
|
|
'AVG': [.295, .275, .280, .280],
|
|
'OBP': [.380, .350, .365, .365],
|
|
'SLG': [.520, .480, .500, .500]
|
|
})
|
|
|
|
async def demo_data_fetcher():
|
|
"""Demonstrate the data fetcher functionality"""
|
|
print("🚀 Automated Data Fetcher Demo")
|
|
print("=" * 50)
|
|
|
|
# Mock pybaseball functions to return our sample data
|
|
mock_pb.pitching_stats_bref = Mock(return_value=sample_pitching_data)
|
|
mock_pb.batting_stats_bref = Mock(return_value=sample_batting_data)
|
|
mock_pb.batting_stats = Mock(return_value=sample_batting_data)
|
|
mock_pb.pitching_stats = Mock(return_value=sample_pitching_data)
|
|
mock_pb.get_splits = Mock(return_value=sample_splits_data)
|
|
|
|
# Patch the imports
|
|
with patch.dict('sys.modules', {
|
|
'pybaseball': mock_pb,
|
|
'creation_helpers': Mock(),
|
|
'exceptions': Mock()
|
|
}):
|
|
# Import after patching
|
|
from automated_data_fetcher import DataFetcher, LiveSeriesDataFetcher
|
|
|
|
# Mock the logger
|
|
with patch('automated_data_fetcher.logger') as mock_logger:
|
|
mock_logger.info = print # Redirect log output to print
|
|
mock_logger.warning = print
|
|
mock_logger.error = print
|
|
|
|
# Create temporary directory for output
|
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
print(f"📁 Using temporary directory: {tmp_dir}")
|
|
|
|
# Test 1: Season Data Fetcher
|
|
print("\n🔄 Testing Season Data Fetcher...")
|
|
fetcher = DataFetcher(2023, "Season")
|
|
fetcher.output_dir = Path(tmp_dir) / "season_test"
|
|
|
|
# Mock the helper functions
|
|
with patch.object(fetcher, '_get_active_players', return_value=['2203', '13611']):
|
|
with patch.object(fetcher, '_fetch_player_splits', return_value={
|
|
'batting': sample_splits_data.copy(),
|
|
'pitching': pd.DataFrame()
|
|
}):
|
|
# Fetch data
|
|
bref_data = await fetcher.fetch_baseball_reference_data()
|
|
fg_data = await fetcher.fetch_fangraphs_data()
|
|
|
|
# Combine and save
|
|
all_data = {**bref_data, **fg_data}
|
|
fetcher.save_data_to_csv(all_data)
|
|
|
|
print(f"✅ Season data saved to: {fetcher.output_dir}")
|
|
print(f"📊 Generated {len(all_data)} data files:")
|
|
for name, df in all_data.items():
|
|
if hasattr(df, '__len__'):
|
|
print(f" - {name}: {len(df)} records")
|
|
else:
|
|
print(f" - {name}: (mock data)")
|
|
|
|
# Test 2: Live Series Data Fetcher
|
|
print("\n🔄 Testing Live Series Data Fetcher...")
|
|
live_fetcher = LiveSeriesDataFetcher(2023, 81)
|
|
live_fetcher.output_dir = Path(tmp_dir) / "live_test"
|
|
|
|
with patch.object(live_fetcher, '_get_active_players', return_value=['2203']):
|
|
with patch.object(live_fetcher, '_fetch_player_splits', return_value={
|
|
'batting': sample_splits_data.head(2), # Smaller dataset for live
|
|
'pitching': pd.DataFrame()
|
|
}):
|
|
live_data = await live_fetcher.fetch_live_data()
|
|
live_fetcher.save_data_to_csv(live_data)
|
|
|
|
print(f"✅ Live data saved to: {live_fetcher.output_dir}")
|
|
print(f"📊 Generated {len(live_data)} data files:")
|
|
for name, df in live_data.items():
|
|
if hasattr(df, '__len__'):
|
|
print(f" - {name}: {len(df)} records")
|
|
else:
|
|
print(f" - {name}: (mock data)")
|
|
print(f"📅 Date range: {live_fetcher.start_date} to {live_fetcher.end_date}")
|
|
|
|
# Test 3: File outputs
|
|
print("\n📄 Generated CSV Files:")
|
|
for output_dir in [fetcher.output_dir, live_fetcher.output_dir]:
|
|
if output_dir.exists():
|
|
csv_files = list(output_dir.glob("*.csv"))
|
|
print(f"\n📁 {output_dir.name}:")
|
|
for csv_file in csv_files:
|
|
size = csv_file.stat().st_size
|
|
print(f" - {csv_file.name} ({size} bytes)")
|
|
|
|
# Test 4: Show what still needs manual download
|
|
print("\n⚠️ MANUAL DOWNLOAD STILL REQUIRED:")
|
|
manual_files = [
|
|
"vlhp-basic.csv (FanGraphs vs LHP batting)",
|
|
"vlhp-rate.csv (FanGraphs vs LHP rate stats)",
|
|
"vrhp-basic.csv (FanGraphs vs RHP batting)",
|
|
"vrhp-rate.csv (FanGraphs vs RHP rate stats)",
|
|
"vlhh-basic.csv (FanGraphs vs LHH pitching)",
|
|
"vlhh-rate.csv (FanGraphs vs LHH rate stats)",
|
|
"vrhh-basic.csv (FanGraphs vs RHH pitching)",
|
|
"vrhh-rate.csv (FanGraphs vs RHH rate stats)"
|
|
]
|
|
|
|
for file in manual_files:
|
|
print(f" - {file}")
|
|
|
|
print(f"\n✨ Demo Complete!")
|
|
print("=" * 50)
|
|
print("The automated data fetcher successfully:")
|
|
print("✅ Created output directories")
|
|
print("✅ Fetched Baseball Reference data (mocked)")
|
|
print("✅ Fetched FanGraphs basic data (mocked)")
|
|
print("✅ Saved data to properly named CSV files")
|
|
print("✅ Handled date ranges for live series")
|
|
print("⚠️ FanGraphs split data still requires manual download")
|
|
|
|
if __name__ == '__main__':
|
|
asyncio.run(demo_data_fetcher()) |