paper-dynasty-card-creation/pybaseball_doodling.py
Cal Corum 0a17745389 Run black and ruff across entire codebase
Standardize formatting with black and apply ruff auto-fixes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 14:24:33 -05:00

97 lines
2.8 KiB
Python

import asyncio
import requests
import sys
import time
from bs4 import BeautifulSoup
import pandas as pd
import pybaseball as pb
def pull_stats(start_date_string: str, end_date_string: str):
print(f"Pulling stats from {start_date_string} to {end_date_string}")
data = pb.batting_stats_range(start_date_string, end_date_string)
print("Got it!")
return data
def batting_game_log_table(bbref_id: str, season: int = 1998):
page_url = f"https://www.baseball-reference.com/players/gl.fcgi?id={bbref_id}&t=b&year={season}"
soup = BeautifulSoup(requests.get(page_url).text, "html.parser")
time.sleep(3)
table = soup.find("table", {"id": "batting_gamelogs"})
tbody = table.find("tbody")
headers = []
data = []
indeces = []
for row in tbody.find_all("tr"):
row_data = []
col_names = []
for cell in row.find_all("td"):
row_data.append(cell.text)
if len(headers) == 0:
col_names.append(cell["data-stat"])
if len(row_data) > 0:
data.append(row_data)
indeces.append(row_data[0])
if len(headers) == 0:
headers.extend(col_names)
player_frame = pd.DataFrame(data, index=indeces, columns=headers)
return player_frame
def batting_splits_table(bbref_id: str, season: int = 1998):
page_url = f"https://www.baseball-reference.com/players/split.fcgi?id={bbref_id}&year={season}&t=b"
soup = BeautifulSoup(requests.get(page_url).text, "lxml")
time.sleep(3)
table = soup.find("table", {"id": "plato"})
tbody = table.find("tbody")
headers = []
data = []
indeces = []
for row in tbody.findAll("tr"):
row_data = []
col_names = []
for cell in row.find_all("td"):
row_data.append(cell.text)
if len(headers) == 0:
col_names.append(cell["data-stat"])
if len(row_data) > 0:
data.append(row_data)
indeces.append(row_data[0])
if len(headers) == 0:
headers.extend(col_names)
bsplit_frame = pd.DataFrame(data, index=indeces, columns=headers)
return bsplit_frame
async def main(args):
# print('Fetching peripherals')
# peripherals = get_pitching_peripherals(2021)
# print('Got them!')
# print(f'There are {len(peripherals)} records')
# month_1 = pull_stats('1997-03-01', '1997-04-30')
# print(f'Received {len(month_1)} records.')
# print(f'Grabbing game logs...')
# player_frame = batting_game_log_table('jeterde01')
# print(f'Received {len(player_frame)} records')
print("Grabbing game logs...")
player_frame = batting_splits_table("jeterde01")
print(f"Received {len(player_frame)} records")
pass
if __name__ == "__main__":
asyncio.run(main(sys.argv[1:]))