paper-dynasty-card-creation/pybaseball_doodling.py
2024-10-19 01:05:23 -05:00

98 lines
2.9 KiB
Python

import asyncio
import requests
import sys
import time
from bs4 import BeautifulSoup
import pandas as pd
import pybaseball as pb
from creation_helpers import get_pitching_peripherals
def pull_stats(start_date_string: str, end_date_string: str):
print(f'Pulling stats from {start_date_string} to {end_date_string}')
data = pb.batting_stats_range(start_date_string, end_date_string)
print(f'Got it!')
return data
def batting_game_log_table(bbref_id: str, season: int = 1998):
page_url = f'https://www.baseball-reference.com/players/gl.fcgi?id={bbref_id}&t=b&year={season}'
soup = BeautifulSoup(requests.get(page_url).text, 'html.parser')
time.sleep(3)
table = soup.find('table', {'id': 'batting_gamelogs'})
tbody = table.find('tbody')
headers = []
data = []
indeces = []
for row in tbody.find_all('tr'):
row_data = []
col_names = []
for cell in row.find_all('td'):
row_data.append(cell.text)
if len(headers) == 0:
col_names.append(cell['data-stat'])
if len(row_data) > 0:
data.append(row_data)
indeces.append(row_data[0])
if len(headers) == 0:
headers.extend(col_names)
player_frame = pd.DataFrame(data, index=indeces, columns=headers)
return player_frame
def batting_splits_table(bbref_id: str, season: int = 1998):
page_url = f'https://www.baseball-reference.com/players/split.fcgi?id={bbref_id}&year={season}&t=b'
soup = BeautifulSoup(requests.get(page_url).text, 'lxml')
time.sleep(3)
table = soup.find('table', {'id': 'plato'})
tbody = table.find('tbody')
headers = []
data = []
indeces = []
for row in tbody.findAll('tr'):
row_data = []
col_names = []
for cell in row.find_all('td'):
row_data.append(cell.text)
if len(headers) == 0:
col_names.append(cell['data-stat'])
if len(row_data) > 0:
data.append(row_data)
indeces.append(row_data[0])
if len(headers) == 0:
headers.extend(col_names)
bsplit_frame = pd.DataFrame(data, index=indeces, columns=headers)
return bsplit_frame
async def main(args):
# print('Fetching peripherals')
# peripherals = get_pitching_peripherals(2021)
# print('Got them!')
# print(f'There are {len(peripherals)} records')
# month_1 = pull_stats('1997-03-01', '1997-04-30')
# print(f'Received {len(month_1)} records.')
# print(f'Grabbing game logs...')
# player_frame = batting_game_log_table('jeterde01')
# print(f'Received {len(player_frame)} records')
print(f'Grabbing game logs...')
player_frame = batting_splits_table('jeterde01')
print(f'Received {len(player_frame)} records')
pass
if __name__ == '__main__':
asyncio.run(main(sys.argv[1:]))