paper-dynasty-card-creation/scripts/legacy/create_mlbplayers.py
Cal Corum 0a17745389 Run black and ruff across entire codebase
Standardize formatting with black and apply ruff auto-fixes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 14:24:33 -05:00

134 lines
4.7 KiB
Python

import asyncio
import datetime
import random
import sys
from creation_helpers import get_all_pybaseball_ids, get_args, sanitize_name
from db_calls import db_get, db_patch, player_desc, db_post
from exceptions import logger
async def main(args):
arg_data = get_args(args)
post_players = (
True
if "post_players" not in arg_data or arg_data["post_players"].lower() == "true"
else False
)
max_run = 9999999 if "max_run" not in arg_data else int(arg_data["max_run"])
p_query = await db_get("players")
if p_query["count"] == 0:
print("Well fuck. No players were found.")
return
player_cache = {
# bbref_id: mlbplayer_id
}
second_touch_players = []
second_touch_bbref_ids = []
new_mlbplayers = []
async def set_mlbplayer(pd_player: dict):
if post_players:
await db_patch(
"players",
object_id=pd_player["player_id"],
params=[("mlbplayer_id", player_cache[pd_player["bbref_id"]])],
)
logger.info("posting update to API")
else:
logger.info("not posting update to API")
async def search_and_match(pd_player: dict):
m_query = await db_get(
"mlbplayers", params=[("key_bbref", pd_player["bbref_id"])]
)
if m_query["count"] > 0:
this_mlb = m_query["players"][0]
player_cache[pd_player["bbref_id"]] = this_mlb["id"]
pd_player["mlbplayer_id"] = this_mlb["id"]
logger.info(
f'Set {player_desc(pd_player)} / player_id {pd_player["player_id"]} mlbplayer_id to cached '
f'{player_cache[pd_player["bbref_id"]]}'
)
await set_mlbplayer(pd_player)
else:
all_ids = get_all_pybaseball_ids([pd_player["bbref_id"]], "bbref")
if all_ids is not None:
if all_ids["key_bbref"] not in second_touch_bbref_ids:
new_mlbplayers.append(
{
"first_name": sanitize_name(all_ids["name_first"]).title(),
"last_name": sanitize_name(all_ids["name_last"]).title(),
"key_mlbam": int(all_ids["key_mlbam"]),
"key_fangraphs": int(all_ids["key_fangraphs"]),
"key_bbref": all_ids["key_bbref"],
"key_retro": all_ids["key_retro"],
"offense_col": random.randint(1, 3),
}
)
second_touch_bbref_ids.append(all_ids["key_bbref"])
else:
print(
f"Duplicate bbref ID: {all_ids['key_bbref']}; not adding to new_mlbplayers"
)
second_touch_players.append(pd_player)
logger.info(f"Added {player_desc(pd_player)} to new_mlbplayers")
else:
logger.info(
f'No return from pybaseball for {player_desc(x)} / player_id {x["player_id"]}'
)
print(f"No data for {player_desc(x)}")
start_time = datetime.datetime.now()
count = 0
for x in p_query["players"]:
count += 1
if count > max_run:
print(f"Count ({count}) exceeded max_run ({max_run})")
break
if count % 10 == 0:
print(f"Processing player #{count}...")
if x["mlbplayer"] is None:
if x["bbref_id"] in player_cache:
await set_mlbplayer(x)
else:
await search_and_match(x)
else:
logger.info(
f'{player_desc(x)} / player_id {x["player_id"]} / bbref_id {x["bbref_id"]} not found'
)
start_time_two = datetime.datetime.now()
run_time = start_time_two - start_time
print(f"Done first pass ({run_time}s), posting {len(new_mlbplayers)} now...")
count = 0
resp = await db_post(endpoint="mlbplayers", payload={"players": new_mlbplayers})
if resp is not None:
print("Starting second pass now...")
for x in second_touch_players:
count += 1
if count % 10 == 0:
print(f"Processing player #{count}...")
if x["bbref_id"] in player_cache:
await set_mlbplayer(x)
else:
await search_and_match(x)
start_time_three = datetime.datetime.now()
p_run_time = datetime.datetime.now() - start_time_two
print(
f"Done second pass ({p_run_time}s), total run time: {datetime.datetime.now() - start_time}s"
)
if __name__ == "__main__":
asyncio.run(main(sys.argv[1:]))