import asyncio import datetime import random import sys from creation_helpers import get_all_pybaseball_ids, get_args, sanitize_name from db_calls import db_get, db_patch, player_desc, db_post from exceptions import logger async def main(args): arg_data = get_args(args) post_players = True if 'post_players' not in arg_data or arg_data['post_players'].lower() == 'true' else False max_run = 9999999 if 'max_run' not in arg_data else int(arg_data['max_run']) p_query = await db_get('players') if p_query['count'] == 0: print(f'Well fuck. No players were found.') return player_cache = { # bbref_id: mlbplayer_id } second_touch_players = [] second_touch_bbref_ids = [] new_mlbplayers = [] async def set_mlbplayer(pd_player: dict): if post_players: await db_patch( 'players', object_id=pd_player['player_id'], params=[('mlbplayer_id', player_cache[pd_player['bbref_id']])] ) logger.info(f'posting update to API') else: logger.info(f'not posting update to API') async def search_and_match(pd_player: dict): m_query = await db_get('mlbplayers', params=[('key_bbref', pd_player['bbref_id'])]) if m_query['count'] > 0: this_mlb = m_query['players'][0] player_cache[pd_player['bbref_id']] = this_mlb['id'] pd_player['mlbplayer_id'] = this_mlb['id'] logger.info( f'Set {player_desc(pd_player)} / player_id {pd_player["player_id"]} mlbplayer_id to cached ' f'{player_cache[pd_player["bbref_id"]]}') await set_mlbplayer(pd_player) else: all_ids = get_all_pybaseball_ids([pd_player['bbref_id']], 'bbref') if all_ids is not None: if all_ids['key_bbref'] not in second_touch_bbref_ids: new_mlbplayers.append({ 'first_name': sanitize_name(all_ids['name_first']).title(), 'last_name': sanitize_name(all_ids['name_last']).title(), 'key_mlbam': int(all_ids['key_mlbam']), 'key_fangraphs': int(all_ids['key_fangraphs']), 'key_bbref': all_ids['key_bbref'], 'key_retro': all_ids['key_retro'], 'offense_col': random.randint(1, 3) }) second_touch_bbref_ids.append(all_ids['key_bbref']) else: print(f'Duplicate bbref ID: {all_ids['key_bbref']}; not adding to new_mlbplayers') second_touch_players.append(pd_player) logger.info(f'Added {player_desc(pd_player)} to new_mlbplayers') else: logger.info(f'No return from pybaseball for {player_desc(x)} / player_id {x["player_id"]}') print(f'No data for {player_desc(x)}') start_time = datetime.datetime.now() count = 0 for x in p_query['players']: count += 1 if count > max_run: print(f'Count ({count}) exceeded max_run ({max_run})') break if count % 10 == 0: print(f'Processing player #{count}...') if x['mlbplayer'] is None: if x['bbref_id'] in player_cache: await set_mlbplayer(x) else: await search_and_match(x) else: logger.info(f'{player_desc(x)} / player_id {x["player_id"]} / bbref_id {x["bbref_id"]} not found') start_time_two = datetime.datetime.now() run_time = start_time_two - start_time print(f'Done first pass ({run_time}s), posting {len(new_mlbplayers)} now...') count = 0 resp = await db_post(endpoint='mlbplayers', payload={'players': new_mlbplayers}) if resp is not None: print(f'Starting second pass now...') for x in second_touch_players: count += 1 if count % 10 == 0: print(f'Processing player #{count}...') if x['bbref_id'] in player_cache: await set_mlbplayer(x) else: await search_and_match(x) start_time_three = datetime.datetime.now() p_run_time = datetime.datetime.now() - start_time_two print(f'Done second pass ({p_run_time}s), total run time: {datetime.datetime.now() - start_time}s') if __name__ == '__main__': asyncio.run(main(sys.argv[1:]))