Create create_mlbplayers.py
This commit is contained in:
parent
4a70faafbc
commit
43c9fd0e32
124
create_mlbplayers.py
Normal file
124
create_mlbplayers.py
Normal file
@ -0,0 +1,124 @@
|
||||
import asyncio
|
||||
import datetime
|
||||
import logging
|
||||
import random
|
||||
import sys
|
||||
|
||||
from creation_helpers import get_all_pybaseball_ids, get_args, sanitize_name
|
||||
from db_calls import db_get, db_patch, player_desc, db_post
|
||||
|
||||
date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}'
|
||||
log_level = logging.INFO
|
||||
logging.basicConfig(
|
||||
filename=f'logs/{date}.log',
|
||||
format='%(asctime)s - card-creation - %(levelname)s - %(message)s',
|
||||
level=log_level
|
||||
)
|
||||
|
||||
|
||||
async def main(args):
|
||||
arg_data = get_args(args)
|
||||
post_players = True if 'post_players' not in arg_data or arg_data['post_players'].lower() == 'true' else False
|
||||
max_run = 9999999 if 'max_run' not in arg_data else int(arg_data['max_run'])
|
||||
|
||||
p_query = await db_get('players')
|
||||
if p_query['count'] == 0:
|
||||
print(f'Well fuck. No players were found.')
|
||||
return
|
||||
|
||||
player_cache = {
|
||||
# bbref_id: mlbplayer_id
|
||||
}
|
||||
|
||||
second_touch_players = []
|
||||
second_touch_bbref_ids = []
|
||||
new_mlbplayers = []
|
||||
|
||||
async def set_mlbplayer(pd_player: dict):
|
||||
if post_players:
|
||||
await db_patch(
|
||||
'players',
|
||||
object_id=pd_player['player_id'],
|
||||
params=[('mlbplayer_id', player_cache[pd_player['bbref_id']])]
|
||||
)
|
||||
logging.info(f'posting update to API')
|
||||
else:
|
||||
logging.info(f'not posting update to API')
|
||||
|
||||
async def search_and_match(pd_player: dict):
|
||||
m_query = await db_get('mlbplayers', params=[('key_bbref', pd_player['bbref_id'])])
|
||||
if m_query['count'] > 0:
|
||||
this_mlb = m_query['players'][0]
|
||||
player_cache[pd_player['bbref_id']] = this_mlb['id']
|
||||
pd_player['mlbplayer_id'] = this_mlb['id']
|
||||
logging.info(
|
||||
f'Set {player_desc(pd_player)} / player_id {pd_player["player_id"]} mlbplayer_id to cached '
|
||||
f'{player_cache[pd_player["bbref_id"]]}')
|
||||
await set_mlbplayer(pd_player)
|
||||
|
||||
else:
|
||||
all_ids = get_all_pybaseball_ids([pd_player['bbref_id']], 'bbref')
|
||||
if all_ids is not None:
|
||||
if all_ids['key_bbref'] not in second_touch_bbref_ids:
|
||||
new_mlbplayers.append({
|
||||
'first_name': sanitize_name(all_ids['name_first']).title(),
|
||||
'last_name': sanitize_name(all_ids['name_last']).title(),
|
||||
'key_mlbam': int(all_ids['key_mlbam']),
|
||||
'key_fangraphs': int(all_ids['key_fangraphs']),
|
||||
'key_bbref': all_ids['key_bbref'],
|
||||
'key_retro': all_ids['key_retro'],
|
||||
'offense_col': random.randint(1, 3)
|
||||
})
|
||||
second_touch_bbref_ids.append(all_ids['key_bbref'])
|
||||
else:
|
||||
print(f'Duplicate bbref ID: {all_ids['key_bbref']}; not adding to new_mlbplayers')
|
||||
second_touch_players.append(pd_player)
|
||||
logging.info(f'Added {player_desc(pd_player)} to new_mlbplayers')
|
||||
|
||||
else:
|
||||
logging.info(f'No return from pybaseball for {player_desc(x)} / player_id {x["player_id"]}')
|
||||
print(f'No data for {player_desc(x)}')
|
||||
|
||||
start_time = datetime.datetime.now()
|
||||
count = 0
|
||||
for x in p_query['players']:
|
||||
count += 1
|
||||
if count > max_run:
|
||||
print(f'Count ({count}) exceeded max_run ({max_run})')
|
||||
break
|
||||
if count % 10 == 0:
|
||||
print(f'Processing player #{count}...')
|
||||
|
||||
if x['mlbplayer'] is None:
|
||||
if x['bbref_id'] in player_cache:
|
||||
await set_mlbplayer(x)
|
||||
else:
|
||||
await search_and_match(x)
|
||||
|
||||
else:
|
||||
logging.info(f'{player_desc(x)} / player_id {x["player_id"]} / bbref_id {x["bbref_id"]} not found')
|
||||
|
||||
start_time_two = datetime.datetime.now()
|
||||
run_time = start_time_two - start_time
|
||||
print(f'Done first pass ({run_time}s), posting {len(new_mlbplayers)} now...')
|
||||
count = 0
|
||||
|
||||
resp = await db_post(endpoint='mlbplayers', payload={'players': new_mlbplayers})
|
||||
if resp is not None:
|
||||
print(f'Starting second pass now...')
|
||||
for x in second_touch_players:
|
||||
count += 1
|
||||
if count % 10 == 0:
|
||||
print(f'Processing player #{count}...')
|
||||
if x['bbref_id'] in player_cache:
|
||||
await set_mlbplayer(x)
|
||||
else:
|
||||
await search_and_match(x)
|
||||
|
||||
start_time_three = datetime.datetime.now()
|
||||
p_run_time = datetime.datetime.now() - start_time_two
|
||||
print(f'Done second pass ({p_run_time}s), total run time: {datetime.datetime.now() - start_time}s')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main(sys.argv[1:]))
|
||||
Loading…
Reference in New Issue
Block a user