#!/usr/bin/env python3 """ Demo script to test the automated data fetcher without requiring pybaseball installation """ import asyncio import pandas as pd from pathlib import Path from unittest.mock import Mock, patch import tempfile # Mock pybaseball for demo mock_pb = Mock() mock_pb.cache = Mock() mock_pb.cache.enable = Mock() # Create sample data that the fetcher would normally get from pybaseball sample_pitching_data = pd.DataFrame({ 'Name': ['Jacob deGrom', 'Gerrit Cole', 'Shane Bieber'], 'Team': ['NYM', 'NYY', 'CLE'], 'W': [15, 16, 13], 'L': [4, 8, 7], 'ERA': [2.38, 3.23, 3.28], 'IP': [201.1, 200.1, 214.1], 'SO': [255, 243, 259] }) sample_batting_data = pd.DataFrame({ 'Name': ['Ronald Acuña Jr.', 'Mookie Betts', 'Juan Soto'], 'Team': ['ATL', 'LAD', 'WSN'], 'G': [119, 142, 151], 'PA': [556, 614, 654], 'H': [148, 160, 145], 'HR': [41, 35, 29], 'SB': [73, 12, 9], 'CS': [11, 3, 4], 'SB%': [0.869, 0.8, 0.692], 'GDP': [5, 15, 20], 'R': [149, 122, 111], 'BB': [78, 65, 145], 'SO': [144, 111, 93], 'IDfg': ['2203', '13611', '19251'] }) sample_splits_data = pd.DataFrame({ 'Split': ['vs LHP', 'vs RHP', 'Home', 'Away'], 'G': [40, 79, 60, 59], 'PA': [150, 406, 278, 278], 'H': [42, 106, 74, 74], 'AVG': [.295, .275, .280, .280], 'OBP': [.380, .350, .365, .365], 'SLG': [.520, .480, .500, .500] }) async def demo_data_fetcher(): """Demonstrate the data fetcher functionality""" print("🚀 Automated Data Fetcher Demo") print("=" * 50) # Mock pybaseball functions to return our sample data mock_pb.pitching_stats_bref = Mock(return_value=sample_pitching_data) mock_pb.batting_stats_bref = Mock(return_value=sample_batting_data) mock_pb.batting_stats = Mock(return_value=sample_batting_data) mock_pb.pitching_stats = Mock(return_value=sample_pitching_data) mock_pb.get_splits = Mock(return_value=sample_splits_data) # Patch the imports with patch.dict('sys.modules', { 'pybaseball': mock_pb, 'creation_helpers': Mock(), 'exceptions': Mock() }): # Import after patching from automated_data_fetcher import DataFetcher, LiveSeriesDataFetcher # Mock the logger with patch('automated_data_fetcher.logger') as mock_logger: mock_logger.info = print # Redirect log output to print mock_logger.warning = print mock_logger.error = print # Create temporary directory for output with tempfile.TemporaryDirectory() as tmp_dir: print(f"📁 Using temporary directory: {tmp_dir}") # Test 1: Season Data Fetcher print("\n🔄 Testing Season Data Fetcher...") fetcher = DataFetcher(2023, "Season") fetcher.output_dir = Path(tmp_dir) / "season_test" # Mock the helper functions with patch.object(fetcher, '_get_active_players', return_value=['2203', '13611']): with patch.object(fetcher, '_fetch_player_splits', return_value={ 'batting': sample_splits_data.copy(), 'pitching': pd.DataFrame() }): # Fetch data bref_data = await fetcher.fetch_baseball_reference_data() fg_data = await fetcher.fetch_fangraphs_data() # Combine and save all_data = {**bref_data, **fg_data} fetcher.save_data_to_csv(all_data) print(f"✅ Season data saved to: {fetcher.output_dir}") print(f"📊 Generated {len(all_data)} data files:") for name, df in all_data.items(): if hasattr(df, '__len__'): print(f" - {name}: {len(df)} records") else: print(f" - {name}: (mock data)") # Test 2: Live Series Data Fetcher print("\n🔄 Testing Live Series Data Fetcher...") live_fetcher = LiveSeriesDataFetcher(2023, 81) live_fetcher.output_dir = Path(tmp_dir) / "live_test" with patch.object(live_fetcher, '_get_active_players', return_value=['2203']): with patch.object(live_fetcher, '_fetch_player_splits', return_value={ 'batting': sample_splits_data.head(2), # Smaller dataset for live 'pitching': pd.DataFrame() }): live_data = await live_fetcher.fetch_live_data() live_fetcher.save_data_to_csv(live_data) print(f"✅ Live data saved to: {live_fetcher.output_dir}") print(f"📊 Generated {len(live_data)} data files:") for name, df in live_data.items(): if hasattr(df, '__len__'): print(f" - {name}: {len(df)} records") else: print(f" - {name}: (mock data)") print(f"📅 Date range: {live_fetcher.start_date} to {live_fetcher.end_date}") # Test 3: File outputs print("\n📄 Generated CSV Files:") for output_dir in [fetcher.output_dir, live_fetcher.output_dir]: if output_dir.exists(): csv_files = list(output_dir.glob("*.csv")) print(f"\n📁 {output_dir.name}:") for csv_file in csv_files: size = csv_file.stat().st_size print(f" - {csv_file.name} ({size} bytes)") # Test 4: Show what still needs manual download print("\n⚠️ MANUAL DOWNLOAD STILL REQUIRED:") manual_files = [ "vlhp-basic.csv (FanGraphs vs LHP batting)", "vlhp-rate.csv (FanGraphs vs LHP rate stats)", "vrhp-basic.csv (FanGraphs vs RHP batting)", "vrhp-rate.csv (FanGraphs vs RHP rate stats)", "vlhh-basic.csv (FanGraphs vs LHH pitching)", "vlhh-rate.csv (FanGraphs vs LHH rate stats)", "vrhh-basic.csv (FanGraphs vs RHH pitching)", "vrhh-rate.csv (FanGraphs vs RHH rate stats)" ] for file in manual_files: print(f" - {file}") print(f"\n✨ Demo Complete!") print("=" * 50) print("The automated data fetcher successfully:") print("✅ Created output directories") print("✅ Fetched Baseball Reference data (mocked)") print("✅ Fetched FanGraphs basic data (mocked)") print("✅ Saved data to properly named CSV files") print("✅ Handled date ranges for live series") print("⚠️ FanGraphs split data still requires manual download") if __name__ == '__main__': asyncio.run(demo_data_fetcher())