Standardize formatting with black and apply ruff auto-fixes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
196 lines
7.7 KiB
Python
196 lines
7.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Demo script to test the automated data fetcher without requiring pybaseball installation
|
|
"""
|
|
|
|
import asyncio
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from unittest.mock import Mock, patch
|
|
import tempfile
|
|
|
|
# Mock pybaseball for demo
|
|
mock_pb = Mock()
|
|
mock_pb.cache = Mock()
|
|
mock_pb.cache.enable = Mock()
|
|
|
|
# Create sample data that the fetcher would normally get from pybaseball
|
|
sample_pitching_data = pd.DataFrame(
|
|
{
|
|
"Name": ["Jacob deGrom", "Gerrit Cole", "Shane Bieber"],
|
|
"Team": ["NYM", "NYY", "CLE"],
|
|
"W": [15, 16, 13],
|
|
"L": [4, 8, 7],
|
|
"ERA": [2.38, 3.23, 3.28],
|
|
"IP": [201.1, 200.1, 214.1],
|
|
"SO": [255, 243, 259],
|
|
}
|
|
)
|
|
|
|
sample_batting_data = pd.DataFrame(
|
|
{
|
|
"Name": ["Ronald Acuña Jr.", "Mookie Betts", "Juan Soto"],
|
|
"Team": ["ATL", "LAD", "WSN"],
|
|
"G": [119, 142, 151],
|
|
"PA": [556, 614, 654],
|
|
"H": [148, 160, 145],
|
|
"HR": [41, 35, 29],
|
|
"SB": [73, 12, 9],
|
|
"CS": [11, 3, 4],
|
|
"SB%": [0.869, 0.8, 0.692],
|
|
"GDP": [5, 15, 20],
|
|
"R": [149, 122, 111],
|
|
"BB": [78, 65, 145],
|
|
"SO": [144, 111, 93],
|
|
"IDfg": ["2203", "13611", "19251"],
|
|
}
|
|
)
|
|
|
|
sample_splits_data = pd.DataFrame(
|
|
{
|
|
"Split": ["vs LHP", "vs RHP", "Home", "Away"],
|
|
"G": [40, 79, 60, 59],
|
|
"PA": [150, 406, 278, 278],
|
|
"H": [42, 106, 74, 74],
|
|
"AVG": [0.295, 0.275, 0.280, 0.280],
|
|
"OBP": [0.380, 0.350, 0.365, 0.365],
|
|
"SLG": [0.520, 0.480, 0.500, 0.500],
|
|
}
|
|
)
|
|
|
|
|
|
async def demo_data_fetcher():
|
|
"""Demonstrate the data fetcher functionality"""
|
|
print("🚀 Automated Data Fetcher Demo")
|
|
print("=" * 50)
|
|
|
|
# Mock pybaseball functions to return our sample data
|
|
mock_pb.pitching_stats_bref = Mock(return_value=sample_pitching_data)
|
|
mock_pb.batting_stats_bref = Mock(return_value=sample_batting_data)
|
|
mock_pb.batting_stats = Mock(return_value=sample_batting_data)
|
|
mock_pb.pitching_stats = Mock(return_value=sample_pitching_data)
|
|
mock_pb.get_splits = Mock(return_value=sample_splits_data)
|
|
|
|
# Patch the imports
|
|
with patch.dict(
|
|
"sys.modules",
|
|
{"pybaseball": mock_pb, "creation_helpers": Mock(), "exceptions": Mock()},
|
|
):
|
|
# Import after patching
|
|
from automated_data_fetcher import DataFetcher, LiveSeriesDataFetcher
|
|
|
|
# Mock the logger
|
|
with patch("automated_data_fetcher.logger") as mock_logger:
|
|
mock_logger.info = print # Redirect log output to print
|
|
mock_logger.warning = print
|
|
mock_logger.error = print
|
|
|
|
# Create temporary directory for output
|
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
print(f"📁 Using temporary directory: {tmp_dir}")
|
|
|
|
# Test 1: Season Data Fetcher
|
|
print("\n🔄 Testing Season Data Fetcher...")
|
|
fetcher = DataFetcher(2023, "Season")
|
|
fetcher.output_dir = Path(tmp_dir) / "season_test"
|
|
|
|
# Mock the helper functions
|
|
with patch.object(
|
|
fetcher, "_get_active_players", return_value=["2203", "13611"]
|
|
):
|
|
with patch.object(
|
|
fetcher,
|
|
"_fetch_player_splits",
|
|
return_value={
|
|
"batting": sample_splits_data.copy(),
|
|
"pitching": pd.DataFrame(),
|
|
},
|
|
):
|
|
# Fetch data
|
|
bref_data = await fetcher.fetch_baseball_reference_data()
|
|
fg_data = await fetcher.fetch_fangraphs_data()
|
|
|
|
# Combine and save
|
|
all_data = {**bref_data, **fg_data}
|
|
fetcher.save_data_to_csv(all_data)
|
|
|
|
print(f"✅ Season data saved to: {fetcher.output_dir}")
|
|
print(f"📊 Generated {len(all_data)} data files:")
|
|
for name, df in all_data.items():
|
|
if hasattr(df, "__len__"):
|
|
print(f" - {name}: {len(df)} records")
|
|
else:
|
|
print(f" - {name}: (mock data)")
|
|
|
|
# Test 2: Live Series Data Fetcher
|
|
print("\n🔄 Testing Live Series Data Fetcher...")
|
|
live_fetcher = LiveSeriesDataFetcher(2023, 81)
|
|
live_fetcher.output_dir = Path(tmp_dir) / "live_test"
|
|
|
|
with patch.object(
|
|
live_fetcher, "_get_active_players", return_value=["2203"]
|
|
):
|
|
with patch.object(
|
|
live_fetcher,
|
|
"_fetch_player_splits",
|
|
return_value={
|
|
"batting": sample_splits_data.head(
|
|
2
|
|
), # Smaller dataset for live
|
|
"pitching": pd.DataFrame(),
|
|
},
|
|
):
|
|
live_data = await live_fetcher.fetch_live_data()
|
|
live_fetcher.save_data_to_csv(live_data)
|
|
|
|
print(f"✅ Live data saved to: {live_fetcher.output_dir}")
|
|
print(f"📊 Generated {len(live_data)} data files:")
|
|
for name, df in live_data.items():
|
|
if hasattr(df, "__len__"):
|
|
print(f" - {name}: {len(df)} records")
|
|
else:
|
|
print(f" - {name}: (mock data)")
|
|
print(
|
|
f"📅 Date range: {live_fetcher.start_date} to {live_fetcher.end_date}"
|
|
)
|
|
|
|
# Test 3: File outputs
|
|
print("\n📄 Generated CSV Files:")
|
|
for output_dir in [fetcher.output_dir, live_fetcher.output_dir]:
|
|
if output_dir.exists():
|
|
csv_files = list(output_dir.glob("*.csv"))
|
|
print(f"\n📁 {output_dir.name}:")
|
|
for csv_file in csv_files:
|
|
size = csv_file.stat().st_size
|
|
print(f" - {csv_file.name} ({size} bytes)")
|
|
|
|
# Test 4: Show what still needs manual download
|
|
print("\n⚠️ MANUAL DOWNLOAD STILL REQUIRED:")
|
|
manual_files = [
|
|
"vlhp-basic.csv (FanGraphs vs LHP batting)",
|
|
"vlhp-rate.csv (FanGraphs vs LHP rate stats)",
|
|
"vrhp-basic.csv (FanGraphs vs RHP batting)",
|
|
"vrhp-rate.csv (FanGraphs vs RHP rate stats)",
|
|
"vlhh-basic.csv (FanGraphs vs LHH pitching)",
|
|
"vlhh-rate.csv (FanGraphs vs LHH rate stats)",
|
|
"vrhh-basic.csv (FanGraphs vs RHH pitching)",
|
|
"vrhh-rate.csv (FanGraphs vs RHH rate stats)",
|
|
]
|
|
|
|
for file in manual_files:
|
|
print(f" - {file}")
|
|
|
|
print("\n✨ Demo Complete!")
|
|
print("=" * 50)
|
|
print("The automated data fetcher successfully:")
|
|
print("✅ Created output directories")
|
|
print("✅ Fetched Baseball Reference data (mocked)")
|
|
print("✅ Fetched FanGraphs basic data (mocked)")
|
|
print("✅ Saved data to properly named CSV files")
|
|
print("✅ Handled date ranges for live series")
|
|
print("⚠️ FanGraphs split data still requires manual download")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(demo_data_fetcher())
|