from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import pandas as pd import time def get_fangraphs_splits_data(): """ Extract all 8 chunks of baseball splits data from FanGraphs """ # Initialize Chrome driver driver = webdriver.Chrome() # Base parameters base_params = { "autoPt": "false", "splitTeams": "false", "statType": "player", "startDate": "2025-03-01", "endDate": "2025-11-01", "players": "", "filter": "", "groupBy": "season", "sort": "22,1", } # Define the 8 configurations configs = [ { "name": "Batting_vLHP_Standard", "position": "B", "splitArr": "1", "statgroup": "1", }, { "name": "Batting_vLHP_BattedBalls", "position": "B", "splitArr": "1", "statgroup": "3", }, { "name": "Batting_vRHP_Standard", "position": "B", "splitArr": "2", "statgroup": "1", }, { "name": "Batting_vRHP_BattedBalls", "position": "B", "splitArr": "2", "statgroup": "3", }, { "name": "Pitching_vLHH_Standard", "position": "P", "splitArr": "5", "statgroup": "1", }, { "name": "Pitching_vLHH_BattedBalls", "position": "P", "splitArr": "5", "statgroup": "3", }, { "name": "Pitching_vRHH_Standard", "position": "P", "splitArr": "6", "statgroup": "1", }, { "name": "Pitching_vRHH_BattedBalls", "position": "P", "splitArr": "6", "statgroup": "3", }, ] all_data = {} try: for config in configs: print(f"Fetching {config['name']}...") # Build URL url_params = { **base_params, "position": config["position"], "splitArr": config["splitArr"], "statgroup": config["statgroup"], } param_string = "&".join([f"{k}={v}" for k, v in url_params.items()]) url = ( f"https://www.fangraphs.com/leaders/splits-leaderboards?{param_string}" ) # Navigate to URL driver.get(url) # Wait for table to load (wait for table element) wait = WebDriverWait(driver, 10) table = wait.until(EC.presence_of_element_located((By.TAG_NAME, "table"))) # Additional wait for data to populate time.sleep(2) # Extract table using pandas tables = pd.read_html(driver.page_source) if tables: # The main data table is typically the largest one df = max(tables, key=lambda x: len(x)) all_data[config["name"]] = df print(f" ✓ Got {len(df)} rows") else: print(" ✗ No tables found") return all_data finally: driver.quit() # Usage if __name__ == "__main__": data = get_fangraphs_splits_data() # Save to CSV files for name, df in data.items(): df.to_csv(f"{name}.csv", index=False) print(f"Saved {name}.csv") # Or work with the dataframes directly # Example: print first few rows of batting vs LHP print("\\nBatting vs LHP (Standard) - First 5 rows:") print(data["Batting_vLHP_Standard"].head())