paper-dynasty-card-creation/test_data_fetcher_demo.py

#!/usr/bin/env python3
"""
Demo script to test the automated data fetcher without requiring pybaseball installation
"""

import asyncio
import pandas as pd
from pathlib import Path
from unittest.mock import Mock, patch
import tempfile

# Mock pybaseball for demo
mock_pb = Mock()
mock_pb.cache = Mock()
mock_pb.cache.enable = Mock()

# Create sample data that the fetcher would normally get from pybaseball
sample_pitching_data = pd.DataFrame(
    {
        "Name": ["Jacob deGrom", "Gerrit Cole", "Shane Bieber"],
        "Team": ["NYM", "NYY", "CLE"],
        "W": [15, 16, 13],
        "L": [4, 8, 7],
        "ERA": [2.38, 3.23, 3.28],
        "IP": [201.1, 200.1, 214.1],
        "SO": [255, 243, 259],
    }
)

sample_batting_data = pd.DataFrame(
    {
        "Name": ["Ronald Acuña Jr.", "Mookie Betts", "Juan Soto"],
        "Team": ["ATL", "LAD", "WSN"],
        "G": [119, 142, 151],
        "PA": [556, 614, 654],
        "H": [148, 160, 145],
        "HR": [41, 35, 29],
        "SB": [73, 12, 9],
        "CS": [11, 3, 4],
        "SB%": [0.869, 0.8, 0.692],
        "GDP": [5, 15, 20],
        "R": [149, 122, 111],
        "BB": [78, 65, 145],
        "SO": [144, 111, 93],
        "IDfg": ["2203", "13611", "19251"],
    }
)

sample_splits_data = pd.DataFrame(
    {
        "Split": ["vs LHP", "vs RHP", "Home", "Away"],
        "G": [40, 79, 60, 59],
        "PA": [150, 406, 278, 278],
        "H": [42, 106, 74, 74],
        "AVG": [0.295, 0.275, 0.280, 0.280],
        "OBP": [0.380, 0.350, 0.365, 0.365],
        "SLG": [0.520, 0.480, 0.500, 0.500],
    }
)


async def demo_data_fetcher():
    """Demonstrate the data fetcher functionality"""
    print("🚀 Automated Data Fetcher Demo")
    print("=" * 50)

    # Mock pybaseball functions to return our sample data
    mock_pb.pitching_stats_bref = Mock(return_value=sample_pitching_data)
    mock_pb.batting_stats_bref = Mock(return_value=sample_batting_data)
    mock_pb.batting_stats = Mock(return_value=sample_batting_data)
    mock_pb.pitching_stats = Mock(return_value=sample_pitching_data)
    mock_pb.get_splits = Mock(return_value=sample_splits_data)

    # Patch the imports
    with patch.dict(
        "sys.modules",
        {"pybaseball": mock_pb, "creation_helpers": Mock(), "exceptions": Mock()},
    ):
        # Import after patching
        from automated_data_fetcher import DataFetcher, LiveSeriesDataFetcher

        # Mock the logger
        with patch("automated_data_fetcher.logger") as mock_logger:
            mock_logger.info = print  # Redirect log output to print
            mock_logger.warning = print
            mock_logger.error = print

            # Create temporary directory for output
            with tempfile.TemporaryDirectory() as tmp_dir:
                print(f"📁 Using temporary directory: {tmp_dir}")

                # Test 1: Season Data Fetcher
                print("\n🔄 Testing Season Data Fetcher...")
                fetcher = DataFetcher(2023, "Season")
                fetcher.output_dir = Path(tmp_dir) / "season_test"

                # Mock the helper functions
                with patch.object(
                    fetcher, "_get_active_players", return_value=["2203", "13611"]
                ):
                    with patch.object(
                        fetcher,
                        "_fetch_player_splits",
                        return_value={
                            "batting": sample_splits_data.copy(),
                            "pitching": pd.DataFrame(),
                        },
                    ):
                        # Fetch data
                        bref_data = await fetcher.fetch_baseball_reference_data()
                        fg_data = await fetcher.fetch_fangraphs_data()

                        # Combine and save
                        all_data = {**bref_data, **fg_data}
                        fetcher.save_data_to_csv(all_data)

                        print(f"✅ Season data saved to: {fetcher.output_dir}")
                        print(f"📊 Generated {len(all_data)} data files:")
                        for name, df in all_data.items():
                            if hasattr(df, "__len__"):
                                print(f"   - {name}: {len(df)} records")
                            else:
                                print(f"   - {name}: (mock data)")

                # Test 2: Live Series Data Fetcher
                print("\n🔄 Testing Live Series Data Fetcher...")
                live_fetcher = LiveSeriesDataFetcher(2023, 81)
                live_fetcher.output_dir = Path(tmp_dir) / "live_test"

                with patch.object(
                    live_fetcher, "_get_active_players", return_value=["2203"]
                ):
                    with patch.object(
                        live_fetcher,
                        "_fetch_player_splits",
                        return_value={
                            "batting": sample_splits_data.head(
                                2
                            ),  # Smaller dataset for live
                            "pitching": pd.DataFrame(),
                        },
                    ):
                        live_data = await live_fetcher.fetch_live_data()
                        live_fetcher.save_data_to_csv(live_data)

                        print(f"✅ Live data saved to: {live_fetcher.output_dir}")
                        print(f"📊 Generated {len(live_data)} data files:")
                        for name, df in live_data.items():
                            if hasattr(df, "__len__"):
                                print(f"   - {name}: {len(df)} records")
                            else:
                                print(f"   - {name}: (mock data)")
                        print(
                            f"📅 Date range: {live_fetcher.start_date} to {live_fetcher.end_date}"
                        )

                # Test 3: File outputs
                print("\n📄 Generated CSV Files:")
                for output_dir in [fetcher.output_dir, live_fetcher.output_dir]:
                    if output_dir.exists():
                        csv_files = list(output_dir.glob("*.csv"))
                        print(f"\n📁 {output_dir.name}:")
                        for csv_file in csv_files:
                            size = csv_file.stat().st_size
                            print(f"   - {csv_file.name} ({size} bytes)")

                # Test 4: Show what still needs manual download
                print("\n⚠️  MANUAL DOWNLOAD STILL REQUIRED:")
                manual_files = [
                    "vlhp-basic.csv (FanGraphs vs LHP batting)",
                    "vlhp-rate.csv (FanGraphs vs LHP rate stats)",
                    "vrhp-basic.csv (FanGraphs vs RHP batting)",
                    "vrhp-rate.csv (FanGraphs vs RHP rate stats)",
                    "vlhh-basic.csv (FanGraphs vs LHH pitching)",
                    "vlhh-rate.csv (FanGraphs vs LHH rate stats)",
                    "vrhh-basic.csv (FanGraphs vs RHH pitching)",
                    "vrhh-rate.csv (FanGraphs vs RHH rate stats)",
                ]

                for file in manual_files:
                    print(f"   - {file}")

                print("\n✨ Demo Complete!")
                print("=" * 50)
                print("The automated data fetcher successfully:")
                print("✅ Created output directories")
                print("✅ Fetched Baseball Reference data (mocked)")
                print("✅ Fetched FanGraphs basic data (mocked)")
                print("✅ Saved data to properly named CSV files")
                print("✅ Handled date ranges for live series")
                print("⚠️  FanGraphs split data still requires manual download")


if __name__ == "__main__":
    asyncio.run(demo_data_fetcher())