from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import pandas as pd import time def get_fangraphs_splits_data(): """ Extract all 8 chunks of baseball splits data from FanGraphs """ # Initialize Chrome driver driver = webdriver.Chrome() # Base parameters base_params = { 'autoPt': 'false', 'splitTeams': 'false', 'statType': 'player', 'startDate': '2025-03-01', 'endDate': '2025-11-01', 'players': '', 'filter': '', 'groupBy': 'season', 'sort': '22,1' } # Define the 8 configurations configs = [ {'name': 'Batting_vLHP_Standard', 'position': 'B', 'splitArr': '1', 'statgroup': '1'}, {'name': 'Batting_vLHP_BattedBalls', 'position': 'B', 'splitArr': '1', 'statgroup': '3'}, {'name': 'Batting_vRHP_Standard', 'position': 'B', 'splitArr': '2', 'statgroup': '1'}, {'name': 'Batting_vRHP_BattedBalls', 'position': 'B', 'splitArr': '2', 'statgroup': '3'}, {'name': 'Pitching_vLHH_Standard', 'position': 'P', 'splitArr': '5', 'statgroup': '1'}, {'name': 'Pitching_vLHH_BattedBalls', 'position': 'P', 'splitArr': '5', 'statgroup': '3'}, {'name': 'Pitching_vRHH_Standard', 'position': 'P', 'splitArr': '6', 'statgroup': '1'}, {'name': 'Pitching_vRHH_BattedBalls', 'position': 'P', 'splitArr': '6', 'statgroup': '3'}, ] all_data = {} try: for config in configs: print(f"Fetching {config['name']}...") # Build URL url_params = {**base_params, 'position': config['position'], 'splitArr': config['splitArr'], 'statgroup': config['statgroup']} param_string = '&'.join([f"{k}={v}" for k, v in url_params.items()]) url = f"https://www.fangraphs.com/leaders/splits-leaderboards?{param_string}" # Navigate to URL driver.get(url) # Wait for table to load (wait for table element) wait = WebDriverWait(driver, 10) table = wait.until(EC.presence_of_element_located((By.TAG_NAME, "table"))) # Additional wait for data to populate time.sleep(2) # Extract table using pandas tables = pd.read_html(driver.page_source) if tables: # The main data table is typically the largest one df = max(tables, key=lambda x: len(x)) all_data[config['name']] = df print(f" ✓ Got {len(df)} rows") else: print(f" ✗ No tables found") return all_data finally: driver.quit() # Usage if __name__ == "__main__": data = get_fangraphs_splits_data() # Save to CSV files for name, df in data.items(): df.to_csv(f"{name}.csv", index=False) print(f"Saved {name}.csv") # Or work with the dataframes directly # Example: print first few rows of batting vs LHP print("\\nBatting vs LHP (Standard) - First 5 rows:") print(data['Batting_vLHP_Standard'].head())