import asyncio import datetime import random import sys from creation_helpers import get_all_pybaseball_ids, get_args, sanitize_name from db_calls import db_get, db_patch, player_desc, db_post from exceptions import logger async def main(args): arg_data = get_args(args) post_players = ( True if "post_players" not in arg_data or arg_data["post_players"].lower() == "true" else False ) max_run = 9999999 if "max_run" not in arg_data else int(arg_data["max_run"]) p_query = await db_get("players") if p_query["count"] == 0: print("Well fuck. No players were found.") return player_cache = { # bbref_id: mlbplayer_id } second_touch_players = [] second_touch_bbref_ids = [] new_mlbplayers = [] async def set_mlbplayer(pd_player: dict): if post_players: await db_patch( "players", object_id=pd_player["player_id"], params=[("mlbplayer_id", player_cache[pd_player["bbref_id"]])], ) logger.info("posting update to API") else: logger.info("not posting update to API") async def search_and_match(pd_player: dict): m_query = await db_get( "mlbplayers", params=[("key_bbref", pd_player["bbref_id"])] ) if m_query["count"] > 0: this_mlb = m_query["players"][0] player_cache[pd_player["bbref_id"]] = this_mlb["id"] pd_player["mlbplayer_id"] = this_mlb["id"] logger.info( f'Set {player_desc(pd_player)} / player_id {pd_player["player_id"]} mlbplayer_id to cached ' f'{player_cache[pd_player["bbref_id"]]}' ) await set_mlbplayer(pd_player) else: all_ids = get_all_pybaseball_ids([pd_player["bbref_id"]], "bbref") if all_ids is not None: if all_ids["key_bbref"] not in second_touch_bbref_ids: new_mlbplayers.append( { "first_name": sanitize_name(all_ids["name_first"]).title(), "last_name": sanitize_name(all_ids["name_last"]).title(), "key_mlbam": int(all_ids["key_mlbam"]), "key_fangraphs": int(all_ids["key_fangraphs"]), "key_bbref": all_ids["key_bbref"], "key_retro": all_ids["key_retro"], "offense_col": random.randint(1, 3), } ) second_touch_bbref_ids.append(all_ids["key_bbref"]) else: print( f"Duplicate bbref ID: {all_ids['key_bbref']}; not adding to new_mlbplayers" ) second_touch_players.append(pd_player) logger.info(f"Added {player_desc(pd_player)} to new_mlbplayers") else: logger.info( f'No return from pybaseball for {player_desc(x)} / player_id {x["player_id"]}' ) print(f"No data for {player_desc(x)}") start_time = datetime.datetime.now() count = 0 for x in p_query["players"]: count += 1 if count > max_run: print(f"Count ({count}) exceeded max_run ({max_run})") break if count % 10 == 0: print(f"Processing player #{count}...") if x["mlbplayer"] is None: if x["bbref_id"] in player_cache: await set_mlbplayer(x) else: await search_and_match(x) else: logger.info( f'{player_desc(x)} / player_id {x["player_id"]} / bbref_id {x["bbref_id"]} not found' ) start_time_two = datetime.datetime.now() run_time = start_time_two - start_time print(f"Done first pass ({run_time}s), posting {len(new_mlbplayers)} now...") count = 0 resp = await db_post(endpoint="mlbplayers", payload={"players": new_mlbplayers}) if resp is not None: print("Starting second pass now...") for x in second_touch_players: count += 1 if count % 10 == 0: print(f"Processing player #{count}...") if x["bbref_id"] in player_cache: await set_mlbplayer(x) else: await search_and_match(x) start_time_three = datetime.datetime.now() p_run_time = datetime.datetime.now() - start_time_two print( f"Done second pass ({p_run_time}s), total run time: {datetime.datetime.now() - start_time}s" ) if __name__ == "__main__": asyncio.run(main(sys.argv[1:]))