import asyncio import requests import sys import time from bs4 import BeautifulSoup import pandas as pd import pybaseball as pb def pull_stats(start_date_string: str, end_date_string: str): print(f"Pulling stats from {start_date_string} to {end_date_string}") data = pb.batting_stats_range(start_date_string, end_date_string) print("Got it!") return data def batting_game_log_table(bbref_id: str, season: int = 1998): page_url = f"https://www.baseball-reference.com/players/gl.fcgi?id={bbref_id}&t=b&year={season}" soup = BeautifulSoup(requests.get(page_url).text, "html.parser") time.sleep(3) table = soup.find("table", {"id": "batting_gamelogs"}) tbody = table.find("tbody") headers = [] data = [] indeces = [] for row in tbody.find_all("tr"): row_data = [] col_names = [] for cell in row.find_all("td"): row_data.append(cell.text) if len(headers) == 0: col_names.append(cell["data-stat"]) if len(row_data) > 0: data.append(row_data) indeces.append(row_data[0]) if len(headers) == 0: headers.extend(col_names) player_frame = pd.DataFrame(data, index=indeces, columns=headers) return player_frame def batting_splits_table(bbref_id: str, season: int = 1998): page_url = f"https://www.baseball-reference.com/players/split.fcgi?id={bbref_id}&year={season}&t=b" soup = BeautifulSoup(requests.get(page_url).text, "lxml") time.sleep(3) table = soup.find("table", {"id": "plato"}) tbody = table.find("tbody") headers = [] data = [] indeces = [] for row in tbody.findAll("tr"): row_data = [] col_names = [] for cell in row.find_all("td"): row_data.append(cell.text) if len(headers) == 0: col_names.append(cell["data-stat"]) if len(row_data) > 0: data.append(row_data) indeces.append(row_data[0]) if len(headers) == 0: headers.extend(col_names) bsplit_frame = pd.DataFrame(data, index=indeces, columns=headers) return bsplit_frame async def main(args): # print('Fetching peripherals') # peripherals = get_pitching_peripherals(2021) # print('Got them!') # print(f'There are {len(peripherals)} records') # month_1 = pull_stats('1997-03-01', '1997-04-30') # print(f'Received {len(month_1)} records.') # print(f'Grabbing game logs...') # player_frame = batting_game_log_table('jeterde01') # print(f'Received {len(player_frame)} records') print("Grabbing game logs...") player_frame = batting_splits_table("jeterde01") print(f"Received {len(player_frame)} records") pass if __name__ == "__main__": asyncio.run(main(sys.argv[1:]))