Standardize formatting with black and apply ruff auto-fixes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
134 lines
4.7 KiB
Python
134 lines
4.7 KiB
Python
import asyncio
|
|
import datetime
|
|
import random
|
|
import sys
|
|
|
|
from creation_helpers import get_all_pybaseball_ids, get_args, sanitize_name
|
|
from db_calls import db_get, db_patch, player_desc, db_post
|
|
from exceptions import logger
|
|
|
|
|
|
async def main(args):
|
|
arg_data = get_args(args)
|
|
post_players = (
|
|
True
|
|
if "post_players" not in arg_data or arg_data["post_players"].lower() == "true"
|
|
else False
|
|
)
|
|
max_run = 9999999 if "max_run" not in arg_data else int(arg_data["max_run"])
|
|
|
|
p_query = await db_get("players")
|
|
if p_query["count"] == 0:
|
|
print("Well fuck. No players were found.")
|
|
return
|
|
|
|
player_cache = {
|
|
# bbref_id: mlbplayer_id
|
|
}
|
|
|
|
second_touch_players = []
|
|
second_touch_bbref_ids = []
|
|
new_mlbplayers = []
|
|
|
|
async def set_mlbplayer(pd_player: dict):
|
|
if post_players:
|
|
await db_patch(
|
|
"players",
|
|
object_id=pd_player["player_id"],
|
|
params=[("mlbplayer_id", player_cache[pd_player["bbref_id"]])],
|
|
)
|
|
logger.info("posting update to API")
|
|
else:
|
|
logger.info("not posting update to API")
|
|
|
|
async def search_and_match(pd_player: dict):
|
|
m_query = await db_get(
|
|
"mlbplayers", params=[("key_bbref", pd_player["bbref_id"])]
|
|
)
|
|
if m_query["count"] > 0:
|
|
this_mlb = m_query["players"][0]
|
|
player_cache[pd_player["bbref_id"]] = this_mlb["id"]
|
|
pd_player["mlbplayer_id"] = this_mlb["id"]
|
|
logger.info(
|
|
f'Set {player_desc(pd_player)} / player_id {pd_player["player_id"]} mlbplayer_id to cached '
|
|
f'{player_cache[pd_player["bbref_id"]]}'
|
|
)
|
|
await set_mlbplayer(pd_player)
|
|
|
|
else:
|
|
all_ids = get_all_pybaseball_ids([pd_player["bbref_id"]], "bbref")
|
|
if all_ids is not None:
|
|
if all_ids["key_bbref"] not in second_touch_bbref_ids:
|
|
new_mlbplayers.append(
|
|
{
|
|
"first_name": sanitize_name(all_ids["name_first"]).title(),
|
|
"last_name": sanitize_name(all_ids["name_last"]).title(),
|
|
"key_mlbam": int(all_ids["key_mlbam"]),
|
|
"key_fangraphs": int(all_ids["key_fangraphs"]),
|
|
"key_bbref": all_ids["key_bbref"],
|
|
"key_retro": all_ids["key_retro"],
|
|
"offense_col": random.randint(1, 3),
|
|
}
|
|
)
|
|
second_touch_bbref_ids.append(all_ids["key_bbref"])
|
|
else:
|
|
print(
|
|
f"Duplicate bbref ID: {all_ids['key_bbref']}; not adding to new_mlbplayers"
|
|
)
|
|
second_touch_players.append(pd_player)
|
|
logger.info(f"Added {player_desc(pd_player)} to new_mlbplayers")
|
|
|
|
else:
|
|
logger.info(
|
|
f'No return from pybaseball for {player_desc(x)} / player_id {x["player_id"]}'
|
|
)
|
|
print(f"No data for {player_desc(x)}")
|
|
|
|
start_time = datetime.datetime.now()
|
|
count = 0
|
|
for x in p_query["players"]:
|
|
count += 1
|
|
if count > max_run:
|
|
print(f"Count ({count}) exceeded max_run ({max_run})")
|
|
break
|
|
if count % 10 == 0:
|
|
print(f"Processing player #{count}...")
|
|
|
|
if x["mlbplayer"] is None:
|
|
if x["bbref_id"] in player_cache:
|
|
await set_mlbplayer(x)
|
|
else:
|
|
await search_and_match(x)
|
|
|
|
else:
|
|
logger.info(
|
|
f'{player_desc(x)} / player_id {x["player_id"]} / bbref_id {x["bbref_id"]} not found'
|
|
)
|
|
|
|
start_time_two = datetime.datetime.now()
|
|
run_time = start_time_two - start_time
|
|
print(f"Done first pass ({run_time}s), posting {len(new_mlbplayers)} now...")
|
|
count = 0
|
|
|
|
resp = await db_post(endpoint="mlbplayers", payload={"players": new_mlbplayers})
|
|
if resp is not None:
|
|
print("Starting second pass now...")
|
|
for x in second_touch_players:
|
|
count += 1
|
|
if count % 10 == 0:
|
|
print(f"Processing player #{count}...")
|
|
if x["bbref_id"] in player_cache:
|
|
await set_mlbplayer(x)
|
|
else:
|
|
await search_and_match(x)
|
|
|
|
start_time_three = datetime.datetime.now()
|
|
p_run_time = datetime.datetime.now() - start_time_two
|
|
print(
|
|
f"Done second pass ({p_run_time}s), total run time: {datetime.datetime.now() - start_time}s"
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main(sys.argv[1:]))
|