386 lines
15 KiB
Python
386 lines
15 KiB
Python
import logging
|
|
import datetime
|
|
import urllib.parse
|
|
import pandas as pd
|
|
|
|
from creation_helpers import get_all_pybaseball_ids, sanitize_name, CLUB_LIST, FRANCHISE_LIST, pd_players_df, \
|
|
mlbteam_and_franchise
|
|
from db_calls import db_post, db_get, db_put, db_patch
|
|
from . import calcs_batter as cba
|
|
|
|
|
|
async def pd_battingcards_df(cardset_id: int):
|
|
bc_query = await db_get('battingcards', params=[('cardset_id', cardset_id), ('short_output', True)])
|
|
if bc_query['count'] == 0:
|
|
raise ValueError(f'No batting cards returned from Paper Dynasty API')
|
|
return pd.DataFrame(bc_query['cards']).rename(columns={'id': 'battingcard_id', 'player': 'player_id'})
|
|
|
|
|
|
async def pd_battingcardratings_df(cardset_id: int):
|
|
vl_query = await db_get(
|
|
'battingcardratings', params=[
|
|
('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True), ('team_id', 31),
|
|
('ts', 's37136685556r6135248705')])
|
|
vr_query = await db_get(
|
|
'battingcardratings', params=[
|
|
('cardset_id', cardset_id), ('vs_hand', 'R'), ('short_output', True), ('team_id', 31),
|
|
('ts', 's37136685556r6135248705')])
|
|
if 0 in [vl_query['count'], vr_query['count']]:
|
|
raise ValueError(f'No batting card ratings returned from Paper Dynasty API')
|
|
vl = pd.DataFrame(vl_query['ratings'])
|
|
vr = pd.DataFrame(vr_query['ratings'])
|
|
ratings = (pd.merge(vl, vr, on='battingcard', suffixes=('_vL', '_vR'))
|
|
.rename(columns={'battingcard': 'battingcard_id'}))
|
|
|
|
def get_total_ops(df_data):
|
|
ops_vl = df_data['obp_vL'] + df_data['slg_vL']
|
|
ops_vr = df_data['obp_vR'] + df_data['slg_vR']
|
|
return (ops_vr + ops_vl + min(ops_vl, ops_vr)) / 3
|
|
ratings['total_OPS'] = ratings.apply(get_total_ops, axis=1)
|
|
|
|
def new_rarity_id(df_data):
|
|
if df_data['total_OPS'] >= 1.2:
|
|
return 99
|
|
elif df_data['total_OPS'] >= 1:
|
|
return 1
|
|
elif df_data['total_OPS'] >= .9:
|
|
return 2
|
|
elif df_data['total_OPS'] >= .8:
|
|
return 3
|
|
elif df_data['total_OPS'] >= .7:
|
|
return 4
|
|
else:
|
|
return 5
|
|
ratings['new_rarity_id'] = ratings.apply(new_rarity_id, axis=1)
|
|
|
|
return ratings
|
|
|
|
# return pd.DataFrame(bcr_query['ratings']).rename(columns={'battingcard': 'battingcard_id'})
|
|
|
|
|
|
def get_batting_stats(file_path: str = None, start_date: datetime.datetime = None, end_date: datetime.datetime = None):
|
|
if file_path is not None:
|
|
vl_basic = pd.read_csv(f'{file_path}vlhp-basic.csv').query('PA >= 20')
|
|
vr_basic = pd.read_csv(f'{file_path}vrhp-basic.csv').query('PA >= 40')
|
|
total_basic = pd.merge(vl_basic, vr_basic, on="playerId", suffixes=('_vL', '_vR'))
|
|
|
|
vl_rate = pd.read_csv(f'{file_path}vlhp-rate.csv').query('PA >= 20')
|
|
vr_rate = pd.read_csv(f'{file_path}vrhp-rate.csv').query('PA >= 40')
|
|
total_rate = pd.merge(vl_rate, vr_rate, on="playerId", suffixes=('_vL', '_vR'))
|
|
|
|
return pd.merge(total_basic, total_rate, on="playerId", suffixes=('', '_rate'))
|
|
|
|
else:
|
|
raise LookupError(f'Date-based stat pulls not implemented, yet. Please provide batting csv files.')
|
|
|
|
|
|
def match_player_lines(all_batting: pd.DataFrame, all_players: pd.DataFrame):
|
|
def get_pids(df_data):
|
|
return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs')
|
|
|
|
print(f'Now pulling mlbam player IDs...')
|
|
ids_and_names = all_batting.apply(get_pids, axis=1)
|
|
player_data = (ids_and_names
|
|
.merge(all_players, how='left', left_on='key_bbref', right_on='bbref_id')
|
|
.query('key_mlbam == key_mlbam')
|
|
.set_index('key_bbref', drop=False))
|
|
print(f'Matched mlbam to pd players.')
|
|
final_batting = pd.merge(
|
|
player_data, all_batting, left_on='key_fangraphs', right_on='playerId', sort=False
|
|
).set_index('key_bbref', drop=False)
|
|
|
|
return final_batting
|
|
|
|
|
|
async def create_new_players(
|
|
final_batting: pd.DataFrame, cardset: dict, card_base_url: str, release_dir: str, player_desc: str):
|
|
new_players = []
|
|
|
|
def create_batters(df_data):
|
|
f_name = sanitize_name(df_data["name_first"]).title()
|
|
l_name = sanitize_name(df_data["name_last"]).title()
|
|
new_players.append({
|
|
'p_name': f'{f_name} {l_name}',
|
|
'cost': 99999,
|
|
'image': f'{card_base_url}/{df_data["player_id"]}/battingcard'
|
|
f'{urllib.parse.quote("?d=")}{release_dir}',
|
|
'mlbclub': CLUB_LIST[df_data['Tm_vL']],
|
|
'franchise': FRANCHISE_LIST[df_data['Tm_vL']],
|
|
'cardset_id': cardset['id'],
|
|
'set_num': int(float(df_data['key_fangraphs'])),
|
|
'rarity_id': 99,
|
|
'pos_1': 'DH',
|
|
'description': f'{player_desc}',
|
|
'bbref_id': df_data.name,
|
|
'fangr_id': int(float(df_data['key_fangraphs'])),
|
|
'strat_code': int(float(df_data['key_mlbam']))
|
|
})
|
|
|
|
final_batting[final_batting['player_id'].isnull()].apply(create_batters, axis=1)
|
|
print(f'Creating {len(new_players)} new players...')
|
|
for x in new_players:
|
|
this_player = await db_post('players', payload=x)
|
|
final_batting.at[x['bbref_id'], 'player_id'] = this_player['player_id']
|
|
final_batting.at[x['bbref_id'], 'p_name'] = this_player['p_name']
|
|
|
|
print(f'Player IDs linked to batting stats.\n{len(final_batting.values)} players remain\n')
|
|
|
|
return len(new_players)
|
|
|
|
|
|
def get_offense_stats(final_batting: pd.DataFrame, input_path: str):
|
|
def get_hand(df_data):
|
|
if df_data['Name'][-1] == '*':
|
|
return 'L'
|
|
elif df_data['Name'][-1] == '#':
|
|
return 'S'
|
|
else:
|
|
return 'R'
|
|
|
|
print(f'Reading baserunning stats...')
|
|
run_data = (pd.read_csv(f'{input_path}running.csv')
|
|
.set_index('Name-additional'))
|
|
run_data['bat_hand'] = run_data.apply(get_hand, axis=1)
|
|
offense_stats = final_batting.join(run_data)
|
|
print(f'Stats are tallied\n{len(offense_stats.values)} players remain\n\nCollecting defensive data from bbref...')
|
|
|
|
return offense_stats
|
|
|
|
|
|
async def calculate_batting_cards(offense_stats: pd.DataFrame, cardset: dict, season_pct: float, to_post: bool):
|
|
batting_cards = []
|
|
|
|
def create_batting_card(df_data):
|
|
s_data = cba.stealing(
|
|
chances=df_data['SBO'],
|
|
sb2s=df_data['SB2'],
|
|
cs2s=df_data['CS2'],
|
|
sb3s=df_data['SB3'],
|
|
cs3s=df_data['CS3'],
|
|
season_pct=season_pct
|
|
)
|
|
batting_cards.append({
|
|
"player_id": df_data['player_id'],
|
|
"key_bbref": df_data.name,
|
|
"key_fangraphs": int(float(df_data['key_fangraphs'])),
|
|
"key_mlbam": df_data['key_mlbam'],
|
|
"key_retro": df_data['key_retro'],
|
|
"name_first": df_data["name_first"].title(),
|
|
"name_last": df_data["name_last"].title(),
|
|
"steal_low": s_data[0],
|
|
"steal_high": s_data[1],
|
|
"steal_auto": s_data[2],
|
|
"steal_jump": s_data[3],
|
|
"hit_and_run": cba.hit_and_run(
|
|
df_data['AB_vL'], df_data['AB_vR'], df_data['H_vL'], df_data['H_vR'],
|
|
df_data['HR_vL'], df_data['HR_vR'], df_data['SO_vL'], df_data['SO_vR']
|
|
),
|
|
"running": cba.running(df_data['XBT%']),
|
|
"hand": df_data['bat_hand']
|
|
})
|
|
|
|
print(f'Calculating batting cards...')
|
|
offense_stats.apply(create_batting_card, axis=1)
|
|
print(f'Cards are complete.\n\nPosting cards now...')
|
|
if to_post:
|
|
resp = await db_put('battingcards', payload={'cards': batting_cards}, timeout=30)
|
|
print(f'Response: {resp}\n\nMatching batting card database IDs to player stats...')
|
|
offense_stats = pd.merge(
|
|
offense_stats, await pd_battingcards_df(cardset['id']), on='player_id').set_index('key_bbref', drop=False)
|
|
|
|
return offense_stats
|
|
|
|
|
|
async def calculate_batting_ratings(offense_stats: pd.DataFrame, to_post: bool):
|
|
batting_ratings = []
|
|
|
|
def create_batting_card_ratings(df_data):
|
|
logging.debug(f'Calculating card ratings for {df_data.name}')
|
|
batting_ratings.extend(cba.get_batter_ratings(df_data))
|
|
|
|
print(f'Calculating card ratings...')
|
|
offense_stats.apply(create_batting_card_ratings, axis=1)
|
|
print(f'Ratings are complete\n\nPosting ratings now...')
|
|
if to_post:
|
|
resp = await db_put('battingcardratings', payload={'ratings': batting_ratings}, timeout=30)
|
|
print(f'Response: {resp}\n\nPulling fresh PD player data...')
|
|
|
|
return len(batting_ratings)
|
|
|
|
|
|
async def post_player_updates(
|
|
cardset: dict, card_base_url: str, release_dir: str, player_desc: str, is_liveseries: bool, to_post: bool):
|
|
"""
|
|
Pull fresh pd_players and set_index to player_id
|
|
Pull fresh battingcards and set_index to player
|
|
Pull fresh battingcardratings one hand at a time and join on battingcard (suffixes _vl and vR)
|
|
|
|
Join battingcards (left) with battingcardratings (right) as total_ratings on id (left) and battingcard (right)
|
|
Join pd_players (left) with total_ratings (right) on indeces
|
|
Output: PD player list with batting card, ratings vL, and ratings vR
|
|
|
|
Calculate Total OPS as OPSvL + OPSvR + min(OPSvL, OPSvR) / 3 and assign rarity_id
|
|
For players with cost of 99999, set cost to <Rarity Base Cost> * Total OPS / <Rarity Avg OPS>
|
|
"""
|
|
|
|
p_data = await pd_players_df(cardset['id'])
|
|
p_data.set_index('player_id', drop=False)
|
|
total_ratings = pd.merge(
|
|
await pd_battingcards_df(cardset['id']),
|
|
await pd_battingcardratings_df(cardset['id']),
|
|
on='battingcard_id'
|
|
)
|
|
player_data = pd.merge(
|
|
p_data,
|
|
total_ratings,
|
|
on='player_id'
|
|
).set_index('player_id', drop=False)
|
|
del total_ratings
|
|
|
|
def get_pids(df_data):
|
|
return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref')
|
|
|
|
ids_and_names = player_data.apply(get_pids, axis=1)
|
|
player_data = (ids_and_names
|
|
.merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id')
|
|
.query('key_mlbam == key_mlbam')
|
|
.set_index('key_bbref', drop=False))
|
|
|
|
player_updates = {} # { <player_id> : [ (param pairs) ] }
|
|
rarity_group = player_data.query('rarity == new_rarity_id').groupby('rarity')
|
|
average_ops = rarity_group['total_OPS'].mean().to_dict()
|
|
if 1 not in average_ops:
|
|
average_ops[1] = 1.066
|
|
if 2 not in average_ops:
|
|
average_ops[2] = 0.938
|
|
if 3 not in average_ops:
|
|
average_ops[3] = 0.844
|
|
if 4 not in average_ops:
|
|
average_ops[4] = 0.752
|
|
if 5 not in average_ops:
|
|
average_ops[5] = 0.612
|
|
|
|
def get_player_updates(df_data):
|
|
base_costs = {
|
|
1: 810,
|
|
2: 270,
|
|
3: 90,
|
|
4: 30,
|
|
5: 10,
|
|
99: 2400
|
|
}
|
|
params = []
|
|
|
|
if df_data['description'] != player_desc:
|
|
params = [('description', f'{player_desc}')]
|
|
|
|
if is_liveseries:
|
|
team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam'])))
|
|
|
|
if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None:
|
|
params.extend([('mlbclub', team_data['mlbclub'])])
|
|
if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None:
|
|
params.extend([('franchise', team_data['franchise'])])
|
|
|
|
# if release_directory not in df_data['image']:
|
|
params.extend([('image', f'{card_base_url}/{df_data["player_id"]}/battingcard'
|
|
f'{urllib.parse.quote("?d=")}{release_dir}')])
|
|
|
|
if df_data['cost'] == 99999:
|
|
params.extend([
|
|
('cost',
|
|
round(base_costs[df_data['new_rarity_id']] * df_data['total_OPS'] /
|
|
average_ops[df_data['new_rarity_id']])),
|
|
('rarity_id', df_data['new_rarity_id'])
|
|
])
|
|
|
|
elif df_data['rarity'] != df_data['new_rarity_id']:
|
|
old_rarity = df_data['rarity']
|
|
new_rarity = df_data['new_rarity_id']
|
|
old_cost = df_data['cost']
|
|
new_cost = 0
|
|
|
|
if old_rarity == 1:
|
|
if new_rarity == 2:
|
|
new_cost = max(old_cost - 540, 100)
|
|
elif new_rarity == 3:
|
|
new_cost = max(old_cost - 720, 50)
|
|
elif new_rarity == 4:
|
|
new_cost = max(old_cost - 780, 15)
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 800, 5)
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 1600
|
|
elif old_rarity == 2:
|
|
if new_rarity == 1:
|
|
new_cost = old_cost + 540
|
|
elif new_rarity == 3:
|
|
new_cost = max(old_cost - 180, 50)
|
|
elif new_rarity == 4:
|
|
new_cost = max(old_cost - 240, 15)
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 260, 5)
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 2140
|
|
elif old_rarity == 3:
|
|
if new_rarity == 1:
|
|
new_cost = old_cost + 720
|
|
elif new_rarity == 2:
|
|
new_cost = old_cost + 180
|
|
elif new_rarity == 4:
|
|
new_cost = max(old_cost - 60, 15)
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 80, 5)
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 2320
|
|
elif old_rarity == 4:
|
|
if new_rarity == 1:
|
|
new_cost = old_cost + 780
|
|
elif new_rarity == 2:
|
|
new_cost = old_cost + 240
|
|
elif new_rarity == 3:
|
|
new_cost = old_cost + 60
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 20, 5)
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 2380
|
|
elif old_rarity == 5:
|
|
if new_rarity == 1:
|
|
new_cost = old_cost + 800
|
|
elif new_rarity == 2:
|
|
new_cost = old_cost + 260
|
|
elif new_rarity == 3:
|
|
new_cost = old_cost + 80
|
|
elif new_rarity == 4:
|
|
new_cost = old_cost + 20
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 2400
|
|
elif old_rarity == 99:
|
|
if new_rarity == 1:
|
|
new_cost = max(old_cost - 1600, 800)
|
|
elif new_rarity == 2:
|
|
new_cost = max(old_cost - 2140, 100)
|
|
elif new_rarity == 3:
|
|
new_cost = max(old_cost - 2320, 50)
|
|
elif new_rarity == 4:
|
|
new_cost = max(old_cost - 2380, 15)
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 2400, 5)
|
|
|
|
if new_cost != 0:
|
|
params.extend([('cost', new_cost), ('rarity_id', new_rarity)])
|
|
|
|
if len(params) > 0:
|
|
if df_data.player_id not in player_updates.keys():
|
|
player_updates[df_data.player_id] = params
|
|
else:
|
|
player_updates[df_data.player_id].extend(params)
|
|
|
|
player_data.apply(get_player_updates, axis=1)
|
|
|
|
print(f'Sending {len(player_updates)} player updates to PD database...')
|
|
if to_post:
|
|
for x in player_updates:
|
|
await db_patch('players', object_id=x, params=player_updates[x])
|
|
|
|
return len(player_updates) |