116 lines
4.4 KiB
Python
116 lines
4.4 KiB
Python
import asyncio
|
|
import datetime
|
|
import random
|
|
import sys
|
|
|
|
from creation_helpers import get_all_pybaseball_ids, get_args, sanitize_name
|
|
from db_calls import db_get, db_patch, player_desc, db_post
|
|
from exceptions import logger
|
|
|
|
|
|
async def main(args):
|
|
arg_data = get_args(args)
|
|
post_players = True if 'post_players' not in arg_data or arg_data['post_players'].lower() == 'true' else False
|
|
max_run = 9999999 if 'max_run' not in arg_data else int(arg_data['max_run'])
|
|
|
|
p_query = await db_get('players')
|
|
if p_query['count'] == 0:
|
|
print(f'Well fuck. No players were found.')
|
|
return
|
|
|
|
player_cache = {
|
|
# bbref_id: mlbplayer_id
|
|
}
|
|
|
|
second_touch_players = []
|
|
second_touch_bbref_ids = []
|
|
new_mlbplayers = []
|
|
|
|
async def set_mlbplayer(pd_player: dict):
|
|
if post_players:
|
|
await db_patch(
|
|
'players',
|
|
object_id=pd_player['player_id'],
|
|
params=[('mlbplayer_id', player_cache[pd_player['bbref_id']])]
|
|
)
|
|
logger.info(f'posting update to API')
|
|
else:
|
|
logger.info(f'not posting update to API')
|
|
|
|
async def search_and_match(pd_player: dict):
|
|
m_query = await db_get('mlbplayers', params=[('key_bbref', pd_player['bbref_id'])])
|
|
if m_query['count'] > 0:
|
|
this_mlb = m_query['players'][0]
|
|
player_cache[pd_player['bbref_id']] = this_mlb['id']
|
|
pd_player['mlbplayer_id'] = this_mlb['id']
|
|
logger.info(
|
|
f'Set {player_desc(pd_player)} / player_id {pd_player["player_id"]} mlbplayer_id to cached '
|
|
f'{player_cache[pd_player["bbref_id"]]}')
|
|
await set_mlbplayer(pd_player)
|
|
|
|
else:
|
|
all_ids = get_all_pybaseball_ids([pd_player['bbref_id']], 'bbref')
|
|
if all_ids is not None:
|
|
if all_ids['key_bbref'] not in second_touch_bbref_ids:
|
|
new_mlbplayers.append({
|
|
'first_name': sanitize_name(all_ids['name_first']).title(),
|
|
'last_name': sanitize_name(all_ids['name_last']).title(),
|
|
'key_mlbam': int(all_ids['key_mlbam']),
|
|
'key_fangraphs': int(all_ids['key_fangraphs']),
|
|
'key_bbref': all_ids['key_bbref'],
|
|
'key_retro': all_ids['key_retro'],
|
|
'offense_col': random.randint(1, 3)
|
|
})
|
|
second_touch_bbref_ids.append(all_ids['key_bbref'])
|
|
else:
|
|
print(f'Duplicate bbref ID: {all_ids['key_bbref']}; not adding to new_mlbplayers')
|
|
second_touch_players.append(pd_player)
|
|
logger.info(f'Added {player_desc(pd_player)} to new_mlbplayers')
|
|
|
|
else:
|
|
logger.info(f'No return from pybaseball for {player_desc(x)} / player_id {x["player_id"]}')
|
|
print(f'No data for {player_desc(x)}')
|
|
|
|
start_time = datetime.datetime.now()
|
|
count = 0
|
|
for x in p_query['players']:
|
|
count += 1
|
|
if count > max_run:
|
|
print(f'Count ({count}) exceeded max_run ({max_run})')
|
|
break
|
|
if count % 10 == 0:
|
|
print(f'Processing player #{count}...')
|
|
|
|
if x['mlbplayer'] is None:
|
|
if x['bbref_id'] in player_cache:
|
|
await set_mlbplayer(x)
|
|
else:
|
|
await search_and_match(x)
|
|
|
|
else:
|
|
logger.info(f'{player_desc(x)} / player_id {x["player_id"]} / bbref_id {x["bbref_id"]} not found')
|
|
|
|
start_time_two = datetime.datetime.now()
|
|
run_time = start_time_two - start_time
|
|
print(f'Done first pass ({run_time}s), posting {len(new_mlbplayers)} now...')
|
|
count = 0
|
|
|
|
resp = await db_post(endpoint='mlbplayers', payload={'players': new_mlbplayers})
|
|
if resp is not None:
|
|
print(f'Starting second pass now...')
|
|
for x in second_touch_players:
|
|
count += 1
|
|
if count % 10 == 0:
|
|
print(f'Processing player #{count}...')
|
|
if x['bbref_id'] in player_cache:
|
|
await set_mlbplayer(x)
|
|
else:
|
|
await search_and_match(x)
|
|
|
|
start_time_three = datetime.datetime.now()
|
|
p_run_time = datetime.datetime.now() - start_time_two
|
|
print(f'Done second pass ({p_run_time}s), total run time: {datetime.datetime.now() - start_time}s')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
asyncio.run(main(sys.argv[1:])) |