paper-dynasty-card-creation/tests/test_automated_data_fetcher.py

import pytest
import pandas as pd
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch, AsyncMock

# Create a proper mock for pybaseball
mock_pb = Mock()
mock_pb.cache = Mock()
mock_pb.cache.enable = Mock()
mock_pb.batting_stats_bref = Mock()
mock_pb.pitching_stats_bref = Mock()
mock_pb.batting_stats = Mock()
mock_pb.pitching_stats = Mock()
mock_pb.batting_stats_range = Mock()
mock_pb.pitching_stats_range = Mock()
mock_pb.get_splits = Mock()

# Mock the modules before importing
with patch.dict(
    "sys.modules",
    {"pybaseball": mock_pb, "creation_helpers": Mock(), "exceptions": Mock()},
):
    from automated_data_fetcher import (
        DataFetcher,
        LiveSeriesDataFetcher,
        fetch_season_data,
        fetch_live_series_data,
    )


class TestDataFetcher:
    """Test cases for the DataFetcher class"""

    @pytest.fixture
    def fetcher(self):
        """Create a DataFetcher instance for testing"""
        with tempfile.TemporaryDirectory() as tmp_dir:
            # Override output directory to use temp directory
            fetcher = DataFetcher(2023, "Season")
            fetcher.output_dir = Path(tmp_dir) / "test_output"
            yield fetcher

    @pytest.fixture
    def sample_batting_data(self):
        """Sample batting data for testing"""
        return pd.DataFrame(
            {
                "Name": ["Player A", "Player B", "Player C"],
                "Team": ["NYY", "LAD", "BOS"],
                "G": [162, 140, 120],
                "PA": [650, 580, 450],
                "H": [180, 160, 120],
                "HR": [30, 25, 15],
                "RBI": [100, 85, 65],
                "SB": [20, 5, 8],
                "CS": [5, 2, 3],
                "SB%": [0.8, 0.714, 0.727],
                "GDP": [15, 12, 8],
                "R": [95, 80, 55],
                "BB": [65, 55, 40],
                "SO": [150, 120, 90],
                "IDfg": ["12345", "67890", "11111"],
            }
        )

    @pytest.fixture
    def sample_pitching_data(self):
        """Sample pitching data for testing"""
        return pd.DataFrame(
            {
                "Name": ["Pitcher A", "Pitcher B"],
                "Team": ["NYY", "LAD"],
                "W": [15, 12],
                "L": [8, 10],
                "ERA": [3.25, 4.15],
                "G": [32, 30],
                "GS": [32, 30],
                "IP": [200.1, 180.2],
                "H": [180, 190],
                "HR": [25, 30],
                "BB": [60, 70],
                "SO": [220, 180],
            }
        )

    @pytest.fixture
    def sample_splits_data(self):
        """Sample splits data for testing"""
        return pd.DataFrame(
            {
                "Split": ["vs LHP", "vs RHP", "Home", "Away"],
                "G": [80, 82, 81, 81],
                "PA": [320, 330, 325, 325],
                "H": [85, 95, 90, 90],
                "AVG": [0.280, 0.295, 0.285, 0.285],
                "OBP": [0.350, 0.365, 0.360, 0.355],
                "SLG": [0.450, 0.480, 0.465, 0.465],
            }
        )

    def test_init(self, fetcher):
        """Test DataFetcher initialization"""
        assert fetcher.season == 2023
        assert fetcher.cardset_type == "Season"
        assert fetcher.cache_enabled == True
        # Note: fetcher.output_dir is overridden in the fixture to use temp directory

    def test_ensure_output_dir(self, fetcher):
        """Test output directory creation"""
        assert not fetcher.output_dir.exists()
        fetcher.ensure_output_dir()
        assert fetcher.output_dir.exists()

    def test_get_csv_filename(self, fetcher):
        """Test CSV filename mapping"""
        assert fetcher._get_csv_filename("pitching") == "pitching.csv"
        assert fetcher._get_csv_filename("running") == "running.csv"
        assert fetcher._get_csv_filename("batting_basic") == "batter-stats.csv"
        assert fetcher._get_csv_filename("pitching_basic") == "pitcher-stats.csv"
        assert fetcher._get_csv_filename("unknown_type") == "unknown_type.csv"

    def test_transform_for_card_creation_batting_splits(
        self, fetcher, sample_splits_data
    ):
        """Test batting splits transformation"""
        result = fetcher._transform_for_card_creation(
            sample_splits_data, "batting_splits"
        )

        # Should filter to only handedness splits
        expected_splits = ["vs LHP", "vs RHP"]
        assert all(split in expected_splits for split in result["Split"].values)
        assert len(result) == 2

    def test_transform_for_card_creation_running(self, fetcher, sample_batting_data):
        """Test running stats transformation"""
        result = fetcher._transform_for_card_creation(sample_batting_data, "running")

        # Should include only running-related columns
        expected_cols = ["Name", "SB", "CS", "SB%", "GDP"]
        assert all(col in expected_cols for col in result.columns)

    def test_save_data_to_csv(self, fetcher, sample_batting_data):
        """Test saving data to CSV"""
        fetcher.ensure_output_dir()

        data = {"batting_basic": sample_batting_data}
        fetcher.save_data_to_csv(data)

        # Check file was created
        expected_file = fetcher.output_dir / "batter-stats.csv"
        assert expected_file.exists()

        # Verify content
        saved_data = pd.read_csv(expected_file)
        assert len(saved_data) == len(sample_batting_data)
        assert "Name" in saved_data.columns

    def test_save_data_to_csv_empty_dataframe(self, fetcher):
        """Test saving empty dataframe"""
        fetcher.ensure_output_dir()

        empty_data = {"empty_set": pd.DataFrame()}
        fetcher.save_data_to_csv(empty_data)

        # Should not create file for empty data
        expected_file = fetcher.output_dir / "empty_set.csv"
        assert not expected_file.exists()

    @patch("automated_data_fetcher.pb.batting_stats_bref")
    @patch("automated_data_fetcher.pb.pitching_stats_bref")
    @pytest.mark.asyncio
    async def test_fetch_baseball_reference_data(
        self,
        mock_pitching,
        mock_batting,
        fetcher,
        sample_batting_data,
        sample_pitching_data,
    ):
        """Test fetching Baseball Reference data"""
        # Mock pybaseball functions
        mock_batting.return_value = sample_batting_data
        mock_pitching.return_value = sample_pitching_data

        # Mock player ID and splits functions
        with patch.object(
            fetcher, "_get_active_players", return_value=["12345", "67890"]
        ):
            with patch.object(
                fetcher,
                "_fetch_player_splits",
                return_value={"batting": pd.DataFrame(), "pitching": pd.DataFrame()},
            ):
                result = await fetcher.fetch_baseball_reference_data()

        # Verify data structure
        assert "pitching" in result
        assert "running" in result
        assert "batting_splits" in result
        assert "pitching_splits" in result

        # Verify data content
        assert len(result["pitching"]) == 2
        assert len(result["running"]) == 3

    @patch("automated_data_fetcher.pb.batting_stats")
    @patch("automated_data_fetcher.pb.pitching_stats")
    @pytest.mark.asyncio
    async def test_fetch_fangraphs_data(
        self,
        mock_pitching,
        mock_batting,
        fetcher,
        sample_batting_data,
        sample_pitching_data,
    ):
        """Test fetching FanGraphs data"""
        # Mock pybaseball functions
        mock_batting.return_value = sample_batting_data
        mock_pitching.return_value = sample_pitching_data

        result = await fetcher.fetch_fangraphs_data()

        # Verify data structure
        assert "batting_basic" in result
        assert "pitching_basic" in result

        # Verify function calls
        mock_batting.assert_called_once_with(2023, 2023)
        mock_pitching.assert_called_once_with(2023, 2023)

    @patch("automated_data_fetcher.pb.batting_stats_range")
    @patch("automated_data_fetcher.pb.pitching_stats_range")
    @pytest.mark.asyncio
    async def test_fetch_fangraphs_data_with_dates(
        self,
        mock_pitching,
        mock_batting,
        fetcher,
        sample_batting_data,
        sample_pitching_data,
    ):
        """Test fetching FanGraphs data with date range"""
        # Mock pybaseball functions
        mock_batting.return_value = sample_batting_data
        mock_pitching.return_value = sample_pitching_data

        start_date = "2023-03-01"
        end_date = "2023-09-01"
        result = await fetcher.fetch_fangraphs_data(start_date, end_date)

        # Verify function calls with date parameters
        mock_batting.assert_called_once_with(start_date, end_date)
        mock_pitching.assert_called_once_with(start_date, end_date)

    @patch("automated_data_fetcher.get_all_pybaseball_ids")
    @pytest.mark.asyncio
    async def test_get_active_players_existing_function(self, mock_get_ids, fetcher):
        """Test getting player IDs using existing function"""
        mock_get_ids.return_value = ["12345", "67890", "11111"]

        result = await fetcher._get_active_players()

        assert result == ["12345", "67890", "11111"]
        mock_get_ids.assert_called_once_with(2023)

    @patch("automated_data_fetcher.get_all_pybaseball_ids")
    @patch("automated_data_fetcher.pb.batting_stats")
    @pytest.mark.asyncio
    async def test_get_active_players_fallback(
        self, mock_batting, mock_get_ids, fetcher, sample_batting_data
    ):
        """Test getting player IDs with fallback to FanGraphs"""
        # Mock existing function to fail
        mock_get_ids.side_effect = Exception("Function not available")
        mock_batting.return_value = sample_batting_data

        result = await fetcher._get_active_players()

        # Should fallback to FanGraphs data
        expected_ids = ["12345", "67890", "11111"]
        assert result == expected_ids

    @patch("automated_data_fetcher.pb.get_splits")
    @pytest.mark.asyncio
    async def test_fetch_player_splits(
        self, mock_get_splits, fetcher, sample_splits_data
    ):
        """Test fetching player splits"""
        # Mock get_splits to return sample data
        mock_get_splits.return_value = sample_splits_data

        player_ids = ["12345", "67890"]
        result = await fetcher._fetch_player_splits(player_ids)

        # Verify structure
        assert "batting" in result
        assert "pitching" in result

        # Verify splits were called for each player
        assert mock_get_splits.call_count == 4  # 2 players * 2 split types


class TestLiveSeriesDataFetcher:
    """Test cases for the LiveSeriesDataFetcher class"""

    @pytest.fixture
    def live_fetcher(self):
        """Create a LiveSeriesDataFetcher instance for testing"""
        with tempfile.TemporaryDirectory() as tmp_dir:
            fetcher = LiveSeriesDataFetcher(2023, 81)  # Half season
            fetcher.output_dir = Path(tmp_dir) / "test_output"
            yield fetcher

    def test_init(self, live_fetcher):
        """Test LiveSeriesDataFetcher initialization"""
        assert live_fetcher.season == 2023
        assert live_fetcher.cardset_type == "Live"
        assert live_fetcher.games_played == 81
        assert live_fetcher.start_date == "2023-03-01"

    def test_calculate_end_date(self, live_fetcher):
        """Test end date calculation"""
        # 81 games should be roughly half season (90 days)
        end_date = live_fetcher._calculate_end_date(81)

        # Should be a valid date string
        assert len(end_date) == 10  # YYYY-MM-DD format
        assert end_date.startswith("2023")

        # Should be after start date
        assert end_date > "2023-03-01"

        # Test full season
        full_season_end = live_fetcher._calculate_end_date(162)
        assert full_season_end > end_date

    @patch.object(DataFetcher, "fetch_baseball_reference_data")
    @patch.object(DataFetcher, "fetch_fangraphs_data")
    @pytest.mark.asyncio
    async def test_fetch_live_data(self, mock_fg_data, mock_bref_data, live_fetcher):
        """Test fetching live series data"""
        # Mock return values
        mock_bref_data.return_value = {
            "pitching": pd.DataFrame(),
            "running": pd.DataFrame(),
        }
        mock_fg_data.return_value = {"batting_basic": pd.DataFrame()}

        result = await live_fetcher.fetch_live_data()

        # Verify both data sources were called
        mock_bref_data.assert_called_once()
        mock_fg_data.assert_called_once_with(
            live_fetcher.start_date, live_fetcher.end_date
        )

        # Verify combined result
        assert "pitching" in result
        assert "running" in result
        assert "batting_basic" in result


class TestUtilityFunctions:
    """Test cases for utility functions"""

    @patch("automated_data_fetcher.DataFetcher")
    @pytest.mark.asyncio
    async def test_fetch_season_data(self, mock_fetcher_class):
        """Test fetch_season_data function"""
        # Create mock fetcher instance
        mock_fetcher = Mock()
        mock_fetcher.fetch_baseball_reference_data = AsyncMock(
            return_value={"pitching": pd.DataFrame()}
        )
        mock_fetcher.fetch_fangraphs_data = AsyncMock(
            return_value={"batting_basic": pd.DataFrame()}
        )
        mock_fetcher.save_data_to_csv = Mock()
        mock_fetcher.output_dir = Path("test/output")
        mock_fetcher_class.return_value = mock_fetcher

        # Capture print output
        with patch("builtins.print") as mock_print:
            await fetch_season_data(2023)

        # Verify fetcher was created and methods called
        mock_fetcher_class.assert_called_once_with(2023, "Season")
        mock_fetcher.fetch_baseball_reference_data.assert_called_once()
        mock_fetcher.fetch_fangraphs_data.assert_called_once()
        mock_fetcher.save_data_to_csv.assert_called_once()

        # Verify print output includes completion message
        print_calls = [call[0][0] for call in mock_print.call_args_list]
        assert any("AUTOMATED DOWNLOAD COMPLETE" in call for call in print_calls)

    @patch("automated_data_fetcher.LiveSeriesDataFetcher")
    @pytest.mark.asyncio
    async def test_fetch_live_series_data(self, mock_fetcher_class):
        """Test fetch_live_series_data function"""
        # Create mock fetcher instance
        mock_fetcher = Mock()
        mock_fetcher.fetch_live_data = AsyncMock(
            return_value={"live_data": pd.DataFrame()}
        )
        mock_fetcher.save_data_to_csv = Mock()
        mock_fetcher_class.return_value = mock_fetcher

        await fetch_live_series_data(2023, 81)

        # Verify fetcher was created and methods called
        mock_fetcher_class.assert_called_once_with(2023, 81)
        mock_fetcher.fetch_live_data.assert_called_once()
        mock_fetcher.save_data_to_csv.assert_called_once()


class TestErrorHandling:
    """Test error handling scenarios"""

    @pytest.fixture
    def fetcher(self):
        """Create a DataFetcher instance for error testing"""
        return DataFetcher(2023, "Season")

    @patch("automated_data_fetcher.pb.pitching_stats_bref")
    @pytest.mark.asyncio
    async def test_fetch_baseball_reference_data_error(self, mock_pitching, fetcher):
        """Test error handling in Baseball Reference data fetch"""
        # Mock function to raise an exception
        mock_pitching.side_effect = Exception("Network error")

        with pytest.raises(Exception, match="Error fetching Baseball Reference data"):
            await fetcher.fetch_baseball_reference_data()

    @patch("automated_data_fetcher.pb.batting_stats")
    @pytest.mark.asyncio
    async def test_fetch_fangraphs_data_error(self, mock_batting, fetcher):
        """Test error handling in FanGraphs data fetch"""
        # Mock function to raise an exception
        mock_batting.side_effect = Exception("API error")

        with pytest.raises(Exception, match="Error fetching FanGraphs data"):
            await fetcher.fetch_fangraphs_data()

    @patch("automated_data_fetcher.get_all_pybaseball_ids")
    @patch("automated_data_fetcher.pb.batting_stats")
    @pytest.mark.asyncio
    async def test_get_active_players_complete_failure(
        self, mock_batting, mock_get_ids, fetcher
    ):
        """Test complete failure in getting player IDs"""
        # Mock both functions to fail
        mock_get_ids.side_effect = Exception("Function error")
        mock_batting.side_effect = Exception("API error")

        result = await fetcher._get_active_players()

        # Should return empty list when all methods fail
        assert result == []

    @patch("automated_data_fetcher.pb.get_splits")
    @pytest.mark.asyncio
    async def test_fetch_player_splits_individual_errors(
        self, mock_get_splits, fetcher
    ):
        """Test handling individual player split fetch errors"""

        # Mock get_splits to fail for some players
        def side_effect(player_id, **kwargs):
            if player_id == "bad_player":
                raise Exception("Player not found")
            return pd.DataFrame({"Split": ["vs LHP"], "AVG": [0.250]})

        mock_get_splits.side_effect = side_effect

        player_ids = ["good_player", "bad_player", "another_good_player"]
        result = await fetcher._fetch_player_splits(player_ids)

        # Should handle errors gracefully and return data for successful players
        assert "batting" in result
        assert "pitching" in result

        # Should have been called for all players despite errors
        assert mock_get_splits.call_count == 6  # 3 players * 2 split types


# Integration test markers
@pytest.mark.integration
class TestIntegration:
    """Integration tests that require network access"""

    @pytest.mark.skip(reason="Requires network access and may be slow")
    @pytest.mark.asyncio
    async def test_real_data_fetch(self):
        """Test fetching real data from pybaseball (skip by default)"""
        fetcher = DataFetcher(2022, "Season")  # Use a complete season

        # This would actually call pybaseball APIs
        # Only run when specifically testing integration
        try:
            fg_data = await fetcher.fetch_fangraphs_data()
            assert "batting_basic" in fg_data
            assert "pitching_basic" in fg_data
        except Exception as e:
            pytest.skip(f"Network error during integration test: {e}")


if __name__ == "__main__":
    # Run tests
    pytest.main([__file__, "-v"])