Closes #21 All 14 async test methods in tests/test_automated_data_fetcher.py were missing @pytest.mark.asyncio. Without it, pytest collects them and silently passes without executing the coroutine body, providing no coverage. Added explicit @pytest.mark.asyncio to each async def test_* method. This makes the async intent unambiguous and is robust against any future asyncio_mode configuration changes. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
513 lines
18 KiB
Python
513 lines
18 KiB
Python
import pytest
|
|
import pandas as pd
|
|
import tempfile
|
|
from pathlib import Path
|
|
from unittest.mock import Mock, patch, AsyncMock
|
|
|
|
# Create a proper mock for pybaseball
|
|
mock_pb = Mock()
|
|
mock_pb.cache = Mock()
|
|
mock_pb.cache.enable = Mock()
|
|
mock_pb.batting_stats_bref = Mock()
|
|
mock_pb.pitching_stats_bref = Mock()
|
|
mock_pb.batting_stats = Mock()
|
|
mock_pb.pitching_stats = Mock()
|
|
mock_pb.batting_stats_range = Mock()
|
|
mock_pb.pitching_stats_range = Mock()
|
|
mock_pb.get_splits = Mock()
|
|
|
|
# Mock the modules before importing
|
|
with patch.dict(
|
|
"sys.modules",
|
|
{"pybaseball": mock_pb, "creation_helpers": Mock(), "exceptions": Mock()},
|
|
):
|
|
from automated_data_fetcher import (
|
|
DataFetcher,
|
|
LiveSeriesDataFetcher,
|
|
fetch_season_data,
|
|
fetch_live_series_data,
|
|
)
|
|
|
|
|
|
class TestDataFetcher:
|
|
"""Test cases for the DataFetcher class"""
|
|
|
|
@pytest.fixture
|
|
def fetcher(self):
|
|
"""Create a DataFetcher instance for testing"""
|
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
# Override output directory to use temp directory
|
|
fetcher = DataFetcher(2023, "Season")
|
|
fetcher.output_dir = Path(tmp_dir) / "test_output"
|
|
yield fetcher
|
|
|
|
@pytest.fixture
|
|
def sample_batting_data(self):
|
|
"""Sample batting data for testing"""
|
|
return pd.DataFrame(
|
|
{
|
|
"Name": ["Player A", "Player B", "Player C"],
|
|
"Team": ["NYY", "LAD", "BOS"],
|
|
"G": [162, 140, 120],
|
|
"PA": [650, 580, 450],
|
|
"H": [180, 160, 120],
|
|
"HR": [30, 25, 15],
|
|
"RBI": [100, 85, 65],
|
|
"SB": [20, 5, 8],
|
|
"CS": [5, 2, 3],
|
|
"SB%": [0.8, 0.714, 0.727],
|
|
"GDP": [15, 12, 8],
|
|
"R": [95, 80, 55],
|
|
"BB": [65, 55, 40],
|
|
"SO": [150, 120, 90],
|
|
"IDfg": ["12345", "67890", "11111"],
|
|
}
|
|
)
|
|
|
|
@pytest.fixture
|
|
def sample_pitching_data(self):
|
|
"""Sample pitching data for testing"""
|
|
return pd.DataFrame(
|
|
{
|
|
"Name": ["Pitcher A", "Pitcher B"],
|
|
"Team": ["NYY", "LAD"],
|
|
"W": [15, 12],
|
|
"L": [8, 10],
|
|
"ERA": [3.25, 4.15],
|
|
"G": [32, 30],
|
|
"GS": [32, 30],
|
|
"IP": [200.1, 180.2],
|
|
"H": [180, 190],
|
|
"HR": [25, 30],
|
|
"BB": [60, 70],
|
|
"SO": [220, 180],
|
|
}
|
|
)
|
|
|
|
@pytest.fixture
|
|
def sample_splits_data(self):
|
|
"""Sample splits data for testing"""
|
|
return pd.DataFrame(
|
|
{
|
|
"Split": ["vs LHP", "vs RHP", "Home", "Away"],
|
|
"G": [80, 82, 81, 81],
|
|
"PA": [320, 330, 325, 325],
|
|
"H": [85, 95, 90, 90],
|
|
"AVG": [0.280, 0.295, 0.285, 0.285],
|
|
"OBP": [0.350, 0.365, 0.360, 0.355],
|
|
"SLG": [0.450, 0.480, 0.465, 0.465],
|
|
}
|
|
)
|
|
|
|
def test_init(self, fetcher):
|
|
"""Test DataFetcher initialization"""
|
|
assert fetcher.season == 2023
|
|
assert fetcher.cardset_type == "Season"
|
|
assert fetcher.cache_enabled == True
|
|
# Note: fetcher.output_dir is overridden in the fixture to use temp directory
|
|
|
|
def test_ensure_output_dir(self, fetcher):
|
|
"""Test output directory creation"""
|
|
assert not fetcher.output_dir.exists()
|
|
fetcher.ensure_output_dir()
|
|
assert fetcher.output_dir.exists()
|
|
|
|
def test_get_csv_filename(self, fetcher):
|
|
"""Test CSV filename mapping"""
|
|
assert fetcher._get_csv_filename("pitching") == "pitching.csv"
|
|
assert fetcher._get_csv_filename("running") == "running.csv"
|
|
assert fetcher._get_csv_filename("batting_basic") == "batter-stats.csv"
|
|
assert fetcher._get_csv_filename("pitching_basic") == "pitcher-stats.csv"
|
|
assert fetcher._get_csv_filename("unknown_type") == "unknown_type.csv"
|
|
|
|
def test_transform_for_card_creation_batting_splits(
|
|
self, fetcher, sample_splits_data
|
|
):
|
|
"""Test batting splits transformation"""
|
|
result = fetcher._transform_for_card_creation(
|
|
sample_splits_data, "batting_splits"
|
|
)
|
|
|
|
# Should filter to only handedness splits
|
|
expected_splits = ["vs LHP", "vs RHP"]
|
|
assert all(split in expected_splits for split in result["Split"].values)
|
|
assert len(result) == 2
|
|
|
|
def test_transform_for_card_creation_running(self, fetcher, sample_batting_data):
|
|
"""Test running stats transformation"""
|
|
result = fetcher._transform_for_card_creation(sample_batting_data, "running")
|
|
|
|
# Should include only running-related columns
|
|
expected_cols = ["Name", "SB", "CS", "SB%", "GDP"]
|
|
assert all(col in expected_cols for col in result.columns)
|
|
|
|
def test_save_data_to_csv(self, fetcher, sample_batting_data):
|
|
"""Test saving data to CSV"""
|
|
fetcher.ensure_output_dir()
|
|
|
|
data = {"batting_basic": sample_batting_data}
|
|
fetcher.save_data_to_csv(data)
|
|
|
|
# Check file was created
|
|
expected_file = fetcher.output_dir / "batter-stats.csv"
|
|
assert expected_file.exists()
|
|
|
|
# Verify content
|
|
saved_data = pd.read_csv(expected_file)
|
|
assert len(saved_data) == len(sample_batting_data)
|
|
assert "Name" in saved_data.columns
|
|
|
|
def test_save_data_to_csv_empty_dataframe(self, fetcher):
|
|
"""Test saving empty dataframe"""
|
|
fetcher.ensure_output_dir()
|
|
|
|
empty_data = {"empty_set": pd.DataFrame()}
|
|
fetcher.save_data_to_csv(empty_data)
|
|
|
|
# Should not create file for empty data
|
|
expected_file = fetcher.output_dir / "empty_set.csv"
|
|
assert not expected_file.exists()
|
|
|
|
@patch("automated_data_fetcher.pb.batting_stats_bref")
|
|
@patch("automated_data_fetcher.pb.pitching_stats_bref")
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_baseball_reference_data(
|
|
self,
|
|
mock_pitching,
|
|
mock_batting,
|
|
fetcher,
|
|
sample_batting_data,
|
|
sample_pitching_data,
|
|
):
|
|
"""Test fetching Baseball Reference data"""
|
|
# Mock pybaseball functions
|
|
mock_batting.return_value = sample_batting_data
|
|
mock_pitching.return_value = sample_pitching_data
|
|
|
|
# Mock player ID and splits functions
|
|
with patch.object(
|
|
fetcher, "_get_active_players", return_value=["12345", "67890"]
|
|
):
|
|
with patch.object(
|
|
fetcher,
|
|
"_fetch_player_splits",
|
|
return_value={"batting": pd.DataFrame(), "pitching": pd.DataFrame()},
|
|
):
|
|
result = await fetcher.fetch_baseball_reference_data()
|
|
|
|
# Verify data structure
|
|
assert "pitching" in result
|
|
assert "running" in result
|
|
assert "batting_splits" in result
|
|
assert "pitching_splits" in result
|
|
|
|
# Verify data content
|
|
assert len(result["pitching"]) == 2
|
|
assert len(result["running"]) == 3
|
|
|
|
@patch("automated_data_fetcher.pb.batting_stats")
|
|
@patch("automated_data_fetcher.pb.pitching_stats")
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_fangraphs_data(
|
|
self,
|
|
mock_pitching,
|
|
mock_batting,
|
|
fetcher,
|
|
sample_batting_data,
|
|
sample_pitching_data,
|
|
):
|
|
"""Test fetching FanGraphs data"""
|
|
# Mock pybaseball functions
|
|
mock_batting.return_value = sample_batting_data
|
|
mock_pitching.return_value = sample_pitching_data
|
|
|
|
result = await fetcher.fetch_fangraphs_data()
|
|
|
|
# Verify data structure
|
|
assert "batting_basic" in result
|
|
assert "pitching_basic" in result
|
|
|
|
# Verify function calls
|
|
mock_batting.assert_called_once_with(2023, 2023)
|
|
mock_pitching.assert_called_once_with(2023, 2023)
|
|
|
|
@patch("automated_data_fetcher.pb.batting_stats_range")
|
|
@patch("automated_data_fetcher.pb.pitching_stats_range")
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_fangraphs_data_with_dates(
|
|
self,
|
|
mock_pitching,
|
|
mock_batting,
|
|
fetcher,
|
|
sample_batting_data,
|
|
sample_pitching_data,
|
|
):
|
|
"""Test fetching FanGraphs data with date range"""
|
|
# Mock pybaseball functions
|
|
mock_batting.return_value = sample_batting_data
|
|
mock_pitching.return_value = sample_pitching_data
|
|
|
|
start_date = "2023-03-01"
|
|
end_date = "2023-09-01"
|
|
result = await fetcher.fetch_fangraphs_data(start_date, end_date)
|
|
|
|
# Verify function calls with date parameters
|
|
mock_batting.assert_called_once_with(start_date, end_date)
|
|
mock_pitching.assert_called_once_with(start_date, end_date)
|
|
|
|
@patch("automated_data_fetcher.get_all_pybaseball_ids")
|
|
@pytest.mark.asyncio
|
|
async def test_get_active_players_existing_function(self, mock_get_ids, fetcher):
|
|
"""Test getting player IDs using existing function"""
|
|
mock_get_ids.return_value = ["12345", "67890", "11111"]
|
|
|
|
result = await fetcher._get_active_players()
|
|
|
|
assert result == ["12345", "67890", "11111"]
|
|
mock_get_ids.assert_called_once_with(2023)
|
|
|
|
@patch("automated_data_fetcher.get_all_pybaseball_ids")
|
|
@patch("automated_data_fetcher.pb.batting_stats")
|
|
@pytest.mark.asyncio
|
|
async def test_get_active_players_fallback(
|
|
self, mock_batting, mock_get_ids, fetcher, sample_batting_data
|
|
):
|
|
"""Test getting player IDs with fallback to FanGraphs"""
|
|
# Mock existing function to fail
|
|
mock_get_ids.side_effect = Exception("Function not available")
|
|
mock_batting.return_value = sample_batting_data
|
|
|
|
result = await fetcher._get_active_players()
|
|
|
|
# Should fallback to FanGraphs data
|
|
expected_ids = ["12345", "67890", "11111"]
|
|
assert result == expected_ids
|
|
|
|
@patch("automated_data_fetcher.pb.get_splits")
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_player_splits(
|
|
self, mock_get_splits, fetcher, sample_splits_data
|
|
):
|
|
"""Test fetching player splits"""
|
|
# Mock get_splits to return sample data
|
|
mock_get_splits.return_value = sample_splits_data
|
|
|
|
player_ids = ["12345", "67890"]
|
|
result = await fetcher._fetch_player_splits(player_ids)
|
|
|
|
# Verify structure
|
|
assert "batting" in result
|
|
assert "pitching" in result
|
|
|
|
# Verify splits were called for each player
|
|
assert mock_get_splits.call_count == 4 # 2 players * 2 split types
|
|
|
|
|
|
class TestLiveSeriesDataFetcher:
|
|
"""Test cases for the LiveSeriesDataFetcher class"""
|
|
|
|
@pytest.fixture
|
|
def live_fetcher(self):
|
|
"""Create a LiveSeriesDataFetcher instance for testing"""
|
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
fetcher = LiveSeriesDataFetcher(2023, 81) # Half season
|
|
fetcher.output_dir = Path(tmp_dir) / "test_output"
|
|
yield fetcher
|
|
|
|
def test_init(self, live_fetcher):
|
|
"""Test LiveSeriesDataFetcher initialization"""
|
|
assert live_fetcher.season == 2023
|
|
assert live_fetcher.cardset_type == "Live"
|
|
assert live_fetcher.games_played == 81
|
|
assert live_fetcher.start_date == "2023-03-01"
|
|
|
|
def test_calculate_end_date(self, live_fetcher):
|
|
"""Test end date calculation"""
|
|
# 81 games should be roughly half season (90 days)
|
|
end_date = live_fetcher._calculate_end_date(81)
|
|
|
|
# Should be a valid date string
|
|
assert len(end_date) == 10 # YYYY-MM-DD format
|
|
assert end_date.startswith("2023")
|
|
|
|
# Should be after start date
|
|
assert end_date > "2023-03-01"
|
|
|
|
# Test full season
|
|
full_season_end = live_fetcher._calculate_end_date(162)
|
|
assert full_season_end > end_date
|
|
|
|
@patch.object(DataFetcher, "fetch_baseball_reference_data")
|
|
@patch.object(DataFetcher, "fetch_fangraphs_data")
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_live_data(self, mock_fg_data, mock_bref_data, live_fetcher):
|
|
"""Test fetching live series data"""
|
|
# Mock return values
|
|
mock_bref_data.return_value = {
|
|
"pitching": pd.DataFrame(),
|
|
"running": pd.DataFrame(),
|
|
}
|
|
mock_fg_data.return_value = {"batting_basic": pd.DataFrame()}
|
|
|
|
result = await live_fetcher.fetch_live_data()
|
|
|
|
# Verify both data sources were called
|
|
mock_bref_data.assert_called_once()
|
|
mock_fg_data.assert_called_once_with(
|
|
live_fetcher.start_date, live_fetcher.end_date
|
|
)
|
|
|
|
# Verify combined result
|
|
assert "pitching" in result
|
|
assert "running" in result
|
|
assert "batting_basic" in result
|
|
|
|
|
|
class TestUtilityFunctions:
|
|
"""Test cases for utility functions"""
|
|
|
|
@patch("automated_data_fetcher.DataFetcher")
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_season_data(self, mock_fetcher_class):
|
|
"""Test fetch_season_data function"""
|
|
# Create mock fetcher instance
|
|
mock_fetcher = Mock()
|
|
mock_fetcher.fetch_baseball_reference_data = AsyncMock(
|
|
return_value={"pitching": pd.DataFrame()}
|
|
)
|
|
mock_fetcher.fetch_fangraphs_data = AsyncMock(
|
|
return_value={"batting_basic": pd.DataFrame()}
|
|
)
|
|
mock_fetcher.save_data_to_csv = Mock()
|
|
mock_fetcher.output_dir = Path("test/output")
|
|
mock_fetcher_class.return_value = mock_fetcher
|
|
|
|
# Capture print output
|
|
with patch("builtins.print") as mock_print:
|
|
await fetch_season_data(2023)
|
|
|
|
# Verify fetcher was created and methods called
|
|
mock_fetcher_class.assert_called_once_with(2023, "Season")
|
|
mock_fetcher.fetch_baseball_reference_data.assert_called_once()
|
|
mock_fetcher.fetch_fangraphs_data.assert_called_once()
|
|
mock_fetcher.save_data_to_csv.assert_called_once()
|
|
|
|
# Verify print output includes completion message
|
|
print_calls = [call[0][0] for call in mock_print.call_args_list]
|
|
assert any("AUTOMATED DOWNLOAD COMPLETE" in call for call in print_calls)
|
|
|
|
@patch("automated_data_fetcher.LiveSeriesDataFetcher")
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_live_series_data(self, mock_fetcher_class):
|
|
"""Test fetch_live_series_data function"""
|
|
# Create mock fetcher instance
|
|
mock_fetcher = Mock()
|
|
mock_fetcher.fetch_live_data = AsyncMock(
|
|
return_value={"live_data": pd.DataFrame()}
|
|
)
|
|
mock_fetcher.save_data_to_csv = Mock()
|
|
mock_fetcher_class.return_value = mock_fetcher
|
|
|
|
await fetch_live_series_data(2023, 81)
|
|
|
|
# Verify fetcher was created and methods called
|
|
mock_fetcher_class.assert_called_once_with(2023, 81)
|
|
mock_fetcher.fetch_live_data.assert_called_once()
|
|
mock_fetcher.save_data_to_csv.assert_called_once()
|
|
|
|
|
|
class TestErrorHandling:
|
|
"""Test error handling scenarios"""
|
|
|
|
@pytest.fixture
|
|
def fetcher(self):
|
|
"""Create a DataFetcher instance for error testing"""
|
|
return DataFetcher(2023, "Season")
|
|
|
|
@patch("automated_data_fetcher.pb.pitching_stats_bref")
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_baseball_reference_data_error(self, mock_pitching, fetcher):
|
|
"""Test error handling in Baseball Reference data fetch"""
|
|
# Mock function to raise an exception
|
|
mock_pitching.side_effect = Exception("Network error")
|
|
|
|
with pytest.raises(Exception, match="Error fetching Baseball Reference data"):
|
|
await fetcher.fetch_baseball_reference_data()
|
|
|
|
@patch("automated_data_fetcher.pb.batting_stats")
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_fangraphs_data_error(self, mock_batting, fetcher):
|
|
"""Test error handling in FanGraphs data fetch"""
|
|
# Mock function to raise an exception
|
|
mock_batting.side_effect = Exception("API error")
|
|
|
|
with pytest.raises(Exception, match="Error fetching FanGraphs data"):
|
|
await fetcher.fetch_fangraphs_data()
|
|
|
|
@patch("automated_data_fetcher.get_all_pybaseball_ids")
|
|
@patch("automated_data_fetcher.pb.batting_stats")
|
|
@pytest.mark.asyncio
|
|
async def test_get_active_players_complete_failure(
|
|
self, mock_batting, mock_get_ids, fetcher
|
|
):
|
|
"""Test complete failure in getting player IDs"""
|
|
# Mock both functions to fail
|
|
mock_get_ids.side_effect = Exception("Function error")
|
|
mock_batting.side_effect = Exception("API error")
|
|
|
|
result = await fetcher._get_active_players()
|
|
|
|
# Should return empty list when all methods fail
|
|
assert result == []
|
|
|
|
@patch("automated_data_fetcher.pb.get_splits")
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_player_splits_individual_errors(
|
|
self, mock_get_splits, fetcher
|
|
):
|
|
"""Test handling individual player split fetch errors"""
|
|
|
|
# Mock get_splits to fail for some players
|
|
def side_effect(player_id, **kwargs):
|
|
if player_id == "bad_player":
|
|
raise Exception("Player not found")
|
|
return pd.DataFrame({"Split": ["vs LHP"], "AVG": [0.250]})
|
|
|
|
mock_get_splits.side_effect = side_effect
|
|
|
|
player_ids = ["good_player", "bad_player", "another_good_player"]
|
|
result = await fetcher._fetch_player_splits(player_ids)
|
|
|
|
# Should handle errors gracefully and return data for successful players
|
|
assert "batting" in result
|
|
assert "pitching" in result
|
|
|
|
# Should have been called for all players despite errors
|
|
assert mock_get_splits.call_count == 6 # 3 players * 2 split types
|
|
|
|
|
|
# Integration test markers
|
|
@pytest.mark.integration
|
|
class TestIntegration:
|
|
"""Integration tests that require network access"""
|
|
|
|
@pytest.mark.skip(reason="Requires network access and may be slow")
|
|
@pytest.mark.asyncio
|
|
async def test_real_data_fetch(self):
|
|
"""Test fetching real data from pybaseball (skip by default)"""
|
|
fetcher = DataFetcher(2022, "Season") # Use a complete season
|
|
|
|
# This would actually call pybaseball APIs
|
|
# Only run when specifically testing integration
|
|
try:
|
|
fg_data = await fetcher.fetch_fangraphs_data()
|
|
assert "batting_basic" in fg_data
|
|
assert "pitching_basic" in fg_data
|
|
except Exception as e:
|
|
pytest.skip(f"Network error during integration test: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Run tests
|
|
pytest.main([__file__, "-v"])
|