98 lines
2.9 KiB
Python
98 lines
2.9 KiB
Python
import asyncio
|
|
import requests
|
|
import sys
|
|
import time
|
|
|
|
from bs4 import BeautifulSoup
|
|
import pandas as pd
|
|
import pybaseball as pb
|
|
|
|
from creation_helpers import get_pitching_peripherals
|
|
|
|
|
|
def pull_stats(start_date_string: str, end_date_string: str):
|
|
print(f'Pulling stats from {start_date_string} to {end_date_string}')
|
|
data = pb.batting_stats_range(start_date_string, end_date_string)
|
|
print(f'Got it!')
|
|
return data
|
|
|
|
|
|
def batting_game_log_table(bbref_id: str, season: int = 1998):
|
|
page_url = f'https://www.baseball-reference.com/players/gl.fcgi?id={bbref_id}&t=b&year={season}'
|
|
soup = BeautifulSoup(requests.get(page_url).text, 'html.parser')
|
|
time.sleep(3)
|
|
table = soup.find('table', {'id': 'batting_gamelogs'})
|
|
tbody = table.find('tbody')
|
|
headers = []
|
|
data = []
|
|
indeces = []
|
|
|
|
for row in tbody.find_all('tr'):
|
|
row_data = []
|
|
col_names = []
|
|
for cell in row.find_all('td'):
|
|
row_data.append(cell.text)
|
|
if len(headers) == 0:
|
|
col_names.append(cell['data-stat'])
|
|
|
|
if len(row_data) > 0:
|
|
data.append(row_data)
|
|
indeces.append(row_data[0])
|
|
if len(headers) == 0:
|
|
headers.extend(col_names)
|
|
|
|
player_frame = pd.DataFrame(data, index=indeces, columns=headers)
|
|
return player_frame
|
|
|
|
|
|
def batting_splits_table(bbref_id: str, season: int = 1998):
|
|
page_url = f'https://www.baseball-reference.com/players/split.fcgi?id={bbref_id}&year={season}&t=b'
|
|
soup = BeautifulSoup(requests.get(page_url).text, 'lxml')
|
|
time.sleep(3)
|
|
table = soup.find('table', {'id': 'plato'})
|
|
tbody = table.find('tbody')
|
|
headers = []
|
|
data = []
|
|
indeces = []
|
|
|
|
for row in tbody.findAll('tr'):
|
|
row_data = []
|
|
col_names = []
|
|
for cell in row.find_all('td'):
|
|
row_data.append(cell.text)
|
|
if len(headers) == 0:
|
|
col_names.append(cell['data-stat'])
|
|
|
|
if len(row_data) > 0:
|
|
data.append(row_data)
|
|
indeces.append(row_data[0])
|
|
if len(headers) == 0:
|
|
headers.extend(col_names)
|
|
|
|
bsplit_frame = pd.DataFrame(data, index=indeces, columns=headers)
|
|
return bsplit_frame
|
|
|
|
|
|
async def main(args):
|
|
# print('Fetching peripherals')
|
|
# peripherals = get_pitching_peripherals(2021)
|
|
# print('Got them!')
|
|
# print(f'There are {len(peripherals)} records')
|
|
|
|
# month_1 = pull_stats('1997-03-01', '1997-04-30')
|
|
# print(f'Received {len(month_1)} records.')
|
|
|
|
# print(f'Grabbing game logs...')
|
|
# player_frame = batting_game_log_table('jeterde01')
|
|
# print(f'Received {len(player_frame)} records')
|
|
|
|
print(f'Grabbing game logs...')
|
|
player_frame = batting_splits_table('jeterde01')
|
|
print(f'Received {len(player_frame)} records')
|
|
|
|
pass
|
|
|
|
|
|
if __name__ == '__main__':
|
|
asyncio.run(main(sys.argv[1:]))
|