import asyncio import requests import sys import time from bs4 import BeautifulSoup import pandas as pd import pybaseball as pb from creation_helpers import get_pitching_peripherals def pull_stats(start_date_string: str, end_date_string: str): print(f'Pulling stats from {start_date_string} to {end_date_string}') data = pb.batting_stats_range(start_date_string, end_date_string) print(f'Got it!') return data def batting_game_log_table(bbref_id: str, season: int = 1998): page_url = f'https://www.baseball-reference.com/players/gl.fcgi?id={bbref_id}&t=b&year={season}' soup = BeautifulSoup(requests.get(page_url).text, 'html.parser') time.sleep(3) table = soup.find('table', {'id': 'batting_gamelogs'}) tbody = table.find('tbody') headers = [] data = [] indeces = [] for row in tbody.find_all('tr'): row_data = [] col_names = [] for cell in row.find_all('td'): row_data.append(cell.text) if len(headers) == 0: col_names.append(cell['data-stat']) if len(row_data) > 0: data.append(row_data) indeces.append(row_data[0]) if len(headers) == 0: headers.extend(col_names) player_frame = pd.DataFrame(data, index=indeces, columns=headers) return player_frame def batting_splits_table(bbref_id: str, season: int = 1998): page_url = f'https://www.baseball-reference.com/players/split.fcgi?id={bbref_id}&year={season}&t=b' soup = BeautifulSoup(requests.get(page_url).text, 'lxml') time.sleep(3) table = soup.find('table', {'id': 'plato'}) tbody = table.find('tbody') headers = [] data = [] indeces = [] for row in tbody.findAll('tr'): row_data = [] col_names = [] for cell in row.find_all('td'): row_data.append(cell.text) if len(headers) == 0: col_names.append(cell['data-stat']) if len(row_data) > 0: data.append(row_data) indeces.append(row_data[0]) if len(headers) == 0: headers.extend(col_names) bsplit_frame = pd.DataFrame(data, index=indeces, columns=headers) return bsplit_frame async def main(args): # print('Fetching peripherals') # peripherals = get_pitching_peripherals(2021) # print('Got them!') # print(f'There are {len(peripherals)} records') # month_1 = pull_stats('1997-03-01', '1997-04-30') # print(f'Received {len(month_1)} records.') # print(f'Grabbing game logs...') # player_frame = batting_game_log_table('jeterde01') # print(f'Received {len(player_frame)} records') print(f'Grabbing game logs...') player_frame = batting_splits_table('jeterde01') print(f'Received {len(player_frame)} records') pass if __name__ == '__main__': asyncio.run(main(sys.argv[1:]))