paper-dynasty-card-creation/scripts/legacy/create_mlbplayers.py
2024-10-17 09:28:02 -05:00

124 lines
4.7 KiB
Python

import asyncio
import datetime
import logging
import random
import sys
from creation_helpers import get_all_pybaseball_ids, get_args, sanitize_name
from db_calls import db_get, db_patch, player_desc, db_post
date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}'
log_level = logging.INFO
logging.basicConfig(
filename=f'logs/{date}.log',
format='%(asctime)s - card-creation - %(levelname)s - %(message)s',
level=log_level
)
async def main(args):
arg_data = get_args(args)
post_players = True if 'post_players' not in arg_data or arg_data['post_players'].lower() == 'true' else False
max_run = 9999999 if 'max_run' not in arg_data else int(arg_data['max_run'])
p_query = await db_get('players')
if p_query['count'] == 0:
print(f'Well fuck. No players were found.')
return
player_cache = {
# bbref_id: mlbplayer_id
}
second_touch_players = []
second_touch_bbref_ids = []
new_mlbplayers = []
async def set_mlbplayer(pd_player: dict):
if post_players:
await db_patch(
'players',
object_id=pd_player['player_id'],
params=[('mlbplayer_id', player_cache[pd_player['bbref_id']])]
)
logging.info(f'posting update to API')
else:
logging.info(f'not posting update to API')
async def search_and_match(pd_player: dict):
m_query = await db_get('mlbplayers', params=[('key_bbref', pd_player['bbref_id'])])
if m_query['count'] > 0:
this_mlb = m_query['players'][0]
player_cache[pd_player['bbref_id']] = this_mlb['id']
pd_player['mlbplayer_id'] = this_mlb['id']
logging.info(
f'Set {player_desc(pd_player)} / player_id {pd_player["player_id"]} mlbplayer_id to cached '
f'{player_cache[pd_player["bbref_id"]]}')
await set_mlbplayer(pd_player)
else:
all_ids = get_all_pybaseball_ids([pd_player['bbref_id']], 'bbref')
if all_ids is not None:
if all_ids['key_bbref'] not in second_touch_bbref_ids:
new_mlbplayers.append({
'first_name': sanitize_name(all_ids['name_first']).title(),
'last_name': sanitize_name(all_ids['name_last']).title(),
'key_mlbam': int(all_ids['key_mlbam']),
'key_fangraphs': int(all_ids['key_fangraphs']),
'key_bbref': all_ids['key_bbref'],
'key_retro': all_ids['key_retro'],
'offense_col': random.randint(1, 3)
})
second_touch_bbref_ids.append(all_ids['key_bbref'])
else:
print(f'Duplicate bbref ID: {all_ids['key_bbref']}; not adding to new_mlbplayers')
second_touch_players.append(pd_player)
logging.info(f'Added {player_desc(pd_player)} to new_mlbplayers')
else:
logging.info(f'No return from pybaseball for {player_desc(x)} / player_id {x["player_id"]}')
print(f'No data for {player_desc(x)}')
start_time = datetime.datetime.now()
count = 0
for x in p_query['players']:
count += 1
if count > max_run:
print(f'Count ({count}) exceeded max_run ({max_run})')
break
if count % 10 == 0:
print(f'Processing player #{count}...')
if x['mlbplayer'] is None:
if x['bbref_id'] in player_cache:
await set_mlbplayer(x)
else:
await search_and_match(x)
else:
logging.info(f'{player_desc(x)} / player_id {x["player_id"]} / bbref_id {x["bbref_id"]} not found')
start_time_two = datetime.datetime.now()
run_time = start_time_two - start_time
print(f'Done first pass ({run_time}s), posting {len(new_mlbplayers)} now...')
count = 0
resp = await db_post(endpoint='mlbplayers', payload={'players': new_mlbplayers})
if resp is not None:
print(f'Starting second pass now...')
for x in second_touch_players:
count += 1
if count % 10 == 0:
print(f'Processing player #{count}...')
if x['bbref_id'] in player_cache:
await set_mlbplayer(x)
else:
await search_and_match(x)
start_time_three = datetime.datetime.now()
p_run_time = datetime.datetime.now() - start_time_two
print(f'Done second pass ({p_run_time}s), total run time: {datetime.datetime.now() - start_time}s')
if __name__ == '__main__':
asyncio.run(main(sys.argv[1:]))