1012 lines
42 KiB
Python
1012 lines
42 KiB
Python
import asyncio
|
|
import copy
|
|
import csv
|
|
import datetime
|
|
import html5lib
|
|
import logging
|
|
import random
|
|
import requests
|
|
import urllib.parse
|
|
|
|
import calcs_batter as cba
|
|
import calcs_defense as cde
|
|
import calcs_pitcher as cpi
|
|
import pandas as pd
|
|
import pybaseball as pb
|
|
import pydantic
|
|
import sys
|
|
|
|
from creation_helpers import pd_players_df, get_batting_stats, pd_battingcards_df, pd_battingcardratings_df, \
|
|
get_pitching_stats, get_all_pybaseball_ids, pd_pitchingcards_df, pd_pitchingcardratings_df, pd_positions_df, \
|
|
get_args, mlbteam_and_franchise, CLUB_LIST, FRANCHISE_LIST
|
|
from db_calls import db_get, db_put, db_post, db_patch, DB_URL
|
|
from typing import Literal
|
|
from bs4 import BeautifulSoup
|
|
|
|
date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}'
|
|
log_level = logging.INFO
|
|
logging.basicConfig(
|
|
filename=f'logs/{date}.log',
|
|
format='%(asctime)s - card-creation - %(levelname)s - %(message)s',
|
|
level=log_level
|
|
)
|
|
CARD_BASE_URL = f'{DB_URL}/v2/players'
|
|
|
|
|
|
def sanitize_name(start_name: str) -> str:
|
|
return (start_name
|
|
.replace("é", "e")
|
|
.replace("á", "a")
|
|
.replace(".", "")
|
|
.replace("Á", "A")
|
|
.replace("ñ", "n")
|
|
.replace("ó", "o")
|
|
.replace("í", "i")
|
|
.replace("ú", "u")
|
|
.replace("'", "")
|
|
.replace('-', ' '))
|
|
|
|
|
|
# class BattingStat(pydantic.BaseModel):
|
|
# fg_id: int
|
|
# vs_hand: Literal['L', 'R']
|
|
# pa: int
|
|
# hit: int
|
|
# single: int
|
|
# double: int
|
|
# triple: int
|
|
# homerun: int
|
|
# rbi: int
|
|
# bb: int
|
|
# ibb: int
|
|
# so: int
|
|
# hbp: int
|
|
# gidp: int
|
|
# sb: int
|
|
# cs: int
|
|
# avg: float
|
|
# hard_rate: float = None
|
|
# med_rate: float = None
|
|
# soft_rate: float = None
|
|
# ifh_rate: float = None
|
|
# hr_per_fb: float = None
|
|
# ld_rate: float = None
|
|
# iffb_rate: float = None
|
|
# fb_rate: float = None
|
|
# pull_rate: float = None
|
|
# center_rate: float = None
|
|
# oppo_rate: float = None
|
|
|
|
|
|
async def main(args):
|
|
"""
|
|
params:
|
|
cardset_name: str - to be searched in pd database
|
|
games_played: int - from 1 - 162
|
|
pull_fielding: bool - whether or not to pull fielding stats from bbref
|
|
post_batters: bool - whether or not to post batting cards, batting card ratings, and batter updates
|
|
post_pitchers: bool - whether or not to post pitching cards, pitching card ratings, and pitching updates
|
|
post_fielders: bool - whether or not to post card positions
|
|
post_players: bool - whether or not to post player updates
|
|
p_desc_prefix: str - shows as cardset on card image and prefixes player name in discord
|
|
is_liveseries: str - whether or not to look up players' current MLB club from MLB statsapi
|
|
"""
|
|
arg_data = get_args(args)
|
|
|
|
# cardset_name = input(f'What is the name of this Cardset? ')
|
|
cardset_name = arg_data['cardset_name']
|
|
print(f'Searching for cardset: {cardset_name}')
|
|
c_query = await db_get('cardsets', params=[('name', cardset_name)])
|
|
|
|
if c_query['count'] == 0:
|
|
print(f'I do not see a cardset named {cardset_name}')
|
|
return
|
|
cardset = c_query['cardsets'][0]
|
|
del c_query
|
|
|
|
if 'season' in arg_data:
|
|
season = arg_data['season']
|
|
else:
|
|
season = int(cardset['name'][:4])
|
|
|
|
game_count = int(arg_data['games_played'])
|
|
if game_count < 1 or game_count > 162:
|
|
print(f'Game count has to be between 1 and 162.')
|
|
return
|
|
season_pct = game_count / 162
|
|
print(f'Cardset ID: {cardset["id"]} / Season: {season}\nGame count: {game_count} / Season %: {season_pct}\n')
|
|
|
|
if 'player_description' in arg_data:
|
|
player_description = arg_data['player_description']
|
|
elif season_pct < 1:
|
|
player_description = f'Live'
|
|
else:
|
|
player_description = f'{season}'
|
|
|
|
start_time = datetime.datetime.now()
|
|
release_directory = f'{start_time.year}-{start_time.month}-{start_time.day}'
|
|
input_path = f'data-input/{cardset["name"]} Cardset/'
|
|
|
|
print('Reading batting stats...')
|
|
all_batting = get_batting_stats(file_path=input_path)
|
|
print(f'Processed {len(all_batting.values)} batters\n')
|
|
|
|
def get_pids(df_data):
|
|
return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs')
|
|
|
|
def get_hand(df_data):
|
|
if df_data['Name'][-1] == '*':
|
|
return 'L'
|
|
elif df_data['Name'][-1] == '#':
|
|
return 'S'
|
|
else:
|
|
return 'R'
|
|
|
|
print(f'Pulling PD player IDs...')
|
|
pd_players = await pd_players_df(cardset['id'])
|
|
# .set_index('bbref_id', drop=False)
|
|
|
|
print(f'Now pulling mlbam player IDs...')
|
|
ids_and_names = all_batting.apply(get_pids, axis=1)
|
|
player_data = (ids_and_names
|
|
.merge(pd_players, how='left', left_on='key_bbref', right_on='bbref_id')
|
|
.query('key_mlbam == key_mlbam')
|
|
.set_index('key_bbref', drop=False))
|
|
print(f'Matched mlbam to pd players.')
|
|
final_batting = pd.merge(
|
|
player_data, all_batting, left_on='key_fangraphs', right_on='playerId', sort=False
|
|
).set_index('key_bbref', drop=False)
|
|
|
|
new_players = []
|
|
|
|
def create_batters(df_data):
|
|
f_name = sanitize_name(df_data["name_first"]).title()
|
|
l_name = sanitize_name(df_data["name_last"]).title()
|
|
new_players.append({
|
|
'p_name': f'{f_name} {l_name}',
|
|
'cost': 99999,
|
|
'image': f'{CARD_BASE_URL}/{df_data["player_id"]}/battingcard'
|
|
f'{urllib.parse.quote("?d=")}{release_directory}',
|
|
'mlbclub': CLUB_LIST[df_data['Tm_vL']],
|
|
'franchise': FRANCHISE_LIST[df_data['Tm_vL']],
|
|
'cardset_id': cardset['id'],
|
|
'set_num': int(float(df_data['key_fangraphs'])),
|
|
'rarity_id': 99,
|
|
'pos_1': 'DH',
|
|
'description': f'{player_description}',
|
|
'bbref_id': df_data.name,
|
|
'fangr_id': int(float(df_data['key_fangraphs'])),
|
|
'strat_code': int(float(df_data['key_mlbam']))
|
|
})
|
|
|
|
final_batting[final_batting['player_id'].isnull()].apply(create_batters, axis=1)
|
|
print(f'Creating {len(new_players)} new players...')
|
|
for x in new_players:
|
|
this_player = await db_post('players', payload=x)
|
|
final_batting.at[x['bbref_id'], 'player_id'] = this_player['player_id']
|
|
final_batting.at[x['bbref_id'], 'p_name'] = this_player['p_name']
|
|
|
|
del ids_and_names, all_batting, pd_players
|
|
print(f'Player IDs linked to batting stats.\n{len(final_batting.values)} players remain\n')
|
|
|
|
print(f'Reading baserunning stats...')
|
|
run_data = (pd.read_csv(f'{input_path}running.csv')
|
|
.set_index('Name-additional'))
|
|
run_data['bat_hand'] = run_data.apply(get_hand, axis=1)
|
|
offense_stats = final_batting.join(run_data)
|
|
del final_batting, run_data
|
|
print(f'Stats are tallied\n{len(offense_stats.values)} players remain\n\nCollecting defensive data from bbref...')
|
|
|
|
print(f'Pulling pitcher defense...')
|
|
df_p = cde.get_bbref_fielding_df('p', season)
|
|
if 'pull_fielding' in arg_data and arg_data['pull_fielding'].lower() == 'true':
|
|
print(f'Pulling catcher defense...')
|
|
df_c = cde.get_bbref_fielding_df('c', season)
|
|
print(f'Pulling first base defense...')
|
|
df_1b = cde.get_bbref_fielding_df('1b', season)
|
|
print(f'Pulling second base defense...')
|
|
df_2b = cde.get_bbref_fielding_df('2b', season)
|
|
print(f'Pulling third base defense...')
|
|
df_3b = cde.get_bbref_fielding_df('3b', season)
|
|
print(f'Pulling short stop defense...')
|
|
df_ss = cde.get_bbref_fielding_df('ss', season)
|
|
print(f'Pulling left field defense...')
|
|
df_lf = cde.get_bbref_fielding_df('lf', season)
|
|
print(f'Pulling center field defense...')
|
|
df_cf = cde.get_bbref_fielding_df('cf', season)
|
|
print(f'Pulling right field defense...')
|
|
df_rf = cde.get_bbref_fielding_df('rf', season)
|
|
print(f'Pulling outfield defense...')
|
|
df_of = cde.get_bbref_fielding_df('of', season)
|
|
print(f'Positions data is retrieved')
|
|
|
|
batting_cards = []
|
|
|
|
def create_batting_card(df_data):
|
|
s_data = cba.stealing(
|
|
chances=df_data['SBO'],
|
|
sb2s=df_data['SB2'],
|
|
cs2s=df_data['CS2'],
|
|
sb3s=df_data['SB3'],
|
|
cs3s=df_data['CS3'],
|
|
season_pct=season_pct
|
|
)
|
|
batting_cards.append({
|
|
"player_id": df_data['player_id'],
|
|
"key_bbref": df_data.name,
|
|
"key_fangraphs": int(float(df_data['key_fangraphs'])),
|
|
"key_mlbam": df_data['key_mlbam'],
|
|
"key_retro": df_data['key_retro'],
|
|
"name_first": df_data["name_first"].title(),
|
|
"name_last": df_data["name_last"].title(),
|
|
"steal_low": s_data[0],
|
|
"steal_high": s_data[1],
|
|
"steal_auto": s_data[2],
|
|
"steal_jump": s_data[3],
|
|
"hit_and_run": cba.hit_and_run(
|
|
df_data['AB_vL'], df_data['AB_vR'], df_data['H_vL'], df_data['H_vR'],
|
|
df_data['HR_vL'], df_data['HR_vR'], df_data['SO_vL'], df_data['SO_vR']
|
|
),
|
|
"running": cba.running(df_data['XBT%']),
|
|
"hand": df_data['bat_hand']
|
|
})
|
|
|
|
print(f'Calculating batting cards...')
|
|
offense_stats.apply(create_batting_card, axis=1)
|
|
print(f'Cards are complete.\n\nPosting cards now...')
|
|
if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true':
|
|
resp = await db_put('battingcards', payload={'cards': batting_cards}, timeout=30)
|
|
print(f'Response: {resp}\n\nMatching batting card database IDs to player stats...')
|
|
offense_stats = pd.merge(
|
|
offense_stats, await pd_battingcards_df(cardset['id']), on='player_id').set_index('key_bbref', drop=False)
|
|
|
|
position_payload = []
|
|
|
|
def create_positions(df_data):
|
|
no_data = True
|
|
for pos_data in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]:
|
|
if df_data['key_bbref'] in pos_data[0].index:
|
|
logging.debug(f'Running {pos_data[1]} stats for {player_data.at[df_data["key_bbref"], "p_name"]}')
|
|
no_data = False
|
|
average_range = (int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']) +
|
|
int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) +
|
|
min(
|
|
int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']),
|
|
int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total'])
|
|
)) / 3
|
|
|
|
position_payload.append({
|
|
"player_id": int(df_data['player_id']),
|
|
"position": pos_data[1].upper(),
|
|
"innings": float(pos_data[0].at[df_data["key_bbref"], 'Inn_def']),
|
|
"range": cde.get_if_range(
|
|
pos_code=pos_data[1],
|
|
tz_runs=round(average_range),
|
|
r_dp=0,
|
|
season_pct=season_pct
|
|
),
|
|
"error": cde.get_any_error(
|
|
pos_code=pos_data[1],
|
|
errors=int(pos_data[0].at[df_data["key_bbref"], 'E_def']),
|
|
chances=int(pos_data[0].at[df_data["key_bbref"], 'chances']),
|
|
season_pct=season_pct
|
|
)
|
|
})
|
|
|
|
of_arms = []
|
|
of_payloads = []
|
|
for pos_data in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]:
|
|
if df_data["key_bbref"] in pos_data[0].index:
|
|
no_data = False
|
|
average_range = (int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']) +
|
|
int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) +
|
|
min(
|
|
int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']),
|
|
int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total'])
|
|
)) / 3
|
|
of_payloads.append({
|
|
"player_id": int(df_data['player_id']),
|
|
"position": pos_data[1].upper(),
|
|
"innings": float(pos_data[0].at[df_data["key_bbref"], 'Inn_def']),
|
|
"range": cde.get_of_range(
|
|
pos_code=pos_data[1],
|
|
tz_runs=round(average_range),
|
|
season_pct=season_pct
|
|
)
|
|
})
|
|
of_arms.append(int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_outfield']))
|
|
|
|
if df_data["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0:
|
|
no_data = False
|
|
error_rating = cde.get_any_error(
|
|
pos_code=pos_data[1],
|
|
errors=int(df_of.at[df_data["key_bbref"], 'E_def']),
|
|
chances=int(df_of.at[df_data["key_bbref"], 'chances']),
|
|
season_pct=season_pct
|
|
)
|
|
arm_rating = cde.arm_outfield(of_arms)
|
|
for f in of_payloads:
|
|
f['error'] = error_rating
|
|
f['arm'] = arm_rating
|
|
position_payload.append(f)
|
|
|
|
if df_data["key_bbref"] in df_c.index:
|
|
if df_c.at[df_data["key_bbref"], 'SB'] + df_c.at[df_data["key_bbref"], 'CS'] == 0:
|
|
arm_rating = 3
|
|
else:
|
|
arm_rating = cde.arm_catcher(
|
|
cs_pct=df_c.at[df_data["key_bbref"], 'caught_stealing_perc'],
|
|
raa=int(df_c.at[df_data["key_bbref"], 'bis_runs_catcher_sb']),
|
|
season_pct=season_pct
|
|
)
|
|
no_data = False
|
|
position_payload.append({
|
|
"player_id": int(df_data['player_id']),
|
|
"position": 'C',
|
|
"innings": float(df_c.at[df_data["key_bbref"], 'Inn_def']),
|
|
"range": cde.range_catcher(
|
|
rs_value=int(df_c.at[df_data["key_bbref"], 'tz_runs_catcher']),
|
|
season_pct=season_pct
|
|
),
|
|
"error": cde.get_any_error(
|
|
pos_code='c',
|
|
errors=int(df_c.at[df_data["key_bbref"], 'E_def']),
|
|
chances=int(df_c.at[df_data["key_bbref"], 'chances']),
|
|
season_pct=season_pct
|
|
),
|
|
"arm": arm_rating,
|
|
"pb": cde.pb_catcher(
|
|
pb=int(df_c.at[df_data["key_bbref"], 'PB']),
|
|
innings=int(float(df_c.at[df_data["key_bbref"], 'Inn_def'])),
|
|
season_pct=season_pct
|
|
),
|
|
"overthrow": cde.ot_catcher(
|
|
errors=int(df_c.at[df_data["key_bbref"], 'E_def']),
|
|
chances=int(df_c.at[df_data["key_bbref"], 'chances']),
|
|
season_pct=season_pct
|
|
)
|
|
})
|
|
|
|
if no_data:
|
|
position_payload.append({
|
|
"player_id": int(df_data['player_id']),
|
|
"position": 'DH',
|
|
"innings": df_data['PA_vL'] + df_data['PA_vR']
|
|
})
|
|
|
|
if 'pull_fielding' in arg_data and arg_data['pull_fielding'].lower() == 'true':
|
|
print(f'Calculating fielding lines now...')
|
|
offense_stats.apply(create_positions, axis=1)
|
|
print(f'Fielding is complete.\n\nPosting positions now...')
|
|
if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true':
|
|
resp = await db_put('cardpositions', payload={'positions': position_payload}, timeout=30)
|
|
print(f'Response: {resp}\n')
|
|
|
|
batting_ratings = []
|
|
|
|
def create_batting_card_ratings(df_data):
|
|
logging.debug(f'Calculating card ratings for {df_data.name}')
|
|
batting_ratings.extend(cba.get_batter_ratings(df_data))
|
|
|
|
print(f'Calculating card ratings...')
|
|
offense_stats.apply(create_batting_card_ratings, axis=1)
|
|
print(f'Ratings are complete\n\nPosting ratings now...')
|
|
if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true':
|
|
resp = await db_put('battingcardratings', payload={'ratings': batting_ratings}, timeout=30)
|
|
print(f'Response: {resp}\n\nPulling fresh PD player data...')
|
|
|
|
"""
|
|
Pull fresh pd_players and set_index to player_id
|
|
Pull fresh battingcards and set_index to player
|
|
Pull fresh battingcardratings one hand at a time and join on battingcard (suffixes _vl and vR)
|
|
|
|
Join battingcards (left) with battingcardratings (right) as total_ratings on id (left) and battingcard (right)
|
|
Join pd_players (left) with total_ratings (right) on indeces
|
|
Output: PD player list with batting card, ratings vL, and ratings vR
|
|
|
|
Calculate Total OPS as OPSvL + OPSvR + min(OPSvL, OPSvR) / 3 and assign rarity_id
|
|
For players with cost of 99999, set cost to <Rarity Base Cost> * Total OPS / <Rarity Avg OPS>
|
|
"""
|
|
p_data = await pd_players_df(cardset['id'])
|
|
p_data.set_index('player_id', drop=False)
|
|
total_ratings = pd.merge(
|
|
await pd_battingcards_df(cardset['id']),
|
|
await pd_battingcardratings_df(cardset['id']),
|
|
on='battingcard_id'
|
|
)
|
|
player_data = pd.merge(
|
|
p_data,
|
|
total_ratings,
|
|
on='player_id'
|
|
).set_index('player_id', drop=False)
|
|
del total_ratings, offense_stats
|
|
|
|
def get_pids(df_data):
|
|
return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref')
|
|
|
|
ids_and_names = player_data.apply(get_pids, axis=1)
|
|
player_data = (ids_and_names
|
|
.merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id')
|
|
.query('key_mlbam == key_mlbam')
|
|
.set_index('key_bbref', drop=False))
|
|
|
|
player_updates = {} # { <player_id> : [ (param pairs) ] }
|
|
rarity_group = player_data.query('rarity == new_rarity_id').groupby('rarity')
|
|
average_ops = rarity_group['total_OPS'].mean().to_dict()
|
|
if 1 not in average_ops:
|
|
average_ops[1] = 1.066
|
|
if 2 not in average_ops:
|
|
average_ops[2] = 0.938
|
|
if 3 not in average_ops:
|
|
average_ops[3] = 0.844
|
|
if 4 not in average_ops:
|
|
average_ops[4] = 0.752
|
|
if 5 not in average_ops:
|
|
average_ops[5] = 0.612
|
|
# cost_groups = rarity_group['cost'].mean()
|
|
|
|
def get_player_updates(df_data):
|
|
base_costs = {
|
|
1: 810,
|
|
2: 270,
|
|
3: 90,
|
|
4: 30,
|
|
5: 10,
|
|
99: 2400
|
|
}
|
|
params = []
|
|
|
|
if df_data['description'] != player_description:
|
|
params = [('description', f'{player_description}')]
|
|
|
|
if 'is_liveseries' in arg_data and arg_data['is_liveseries'].lower() == 'true':
|
|
team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam'])))
|
|
|
|
if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None:
|
|
params.extend([('mlbclub', team_data['mlbclub'])])
|
|
if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None:
|
|
params.extend([('franchise', team_data['franchise'])])
|
|
|
|
# if release_directory not in df_data['image']:
|
|
params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/battingcard'
|
|
f'{urllib.parse.quote("?d=")}{release_directory}')])
|
|
|
|
if df_data['cost'] == 99999:
|
|
params.extend([
|
|
('cost',
|
|
round(base_costs[df_data['new_rarity_id']] * df_data['total_OPS'] /
|
|
average_ops[df_data['new_rarity_id']])),
|
|
('rarity_id', df_data['new_rarity_id'])
|
|
])
|
|
|
|
elif df_data['rarity'] != df_data['new_rarity_id']:
|
|
old_rarity = df_data['rarity']
|
|
new_rarity = df_data['new_rarity_id']
|
|
old_cost = df_data['cost']
|
|
new_cost = 0
|
|
|
|
if old_rarity == 1:
|
|
if new_rarity == 2:
|
|
new_cost = max(old_cost - 540, 100)
|
|
elif new_rarity == 3:
|
|
new_cost = max(old_cost - 720, 50)
|
|
elif new_rarity == 4:
|
|
new_cost = max(old_cost - 780, 15)
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 800, 5)
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 1600
|
|
elif old_rarity == 2:
|
|
if new_rarity == 1:
|
|
new_cost = old_cost + 540
|
|
elif new_rarity == 3:
|
|
new_cost = max(old_cost - 180, 50)
|
|
elif new_rarity == 4:
|
|
new_cost = max(old_cost - 240, 15)
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 260, 5)
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 2140
|
|
elif old_rarity == 3:
|
|
if new_rarity == 1:
|
|
new_cost = old_cost + 720
|
|
elif new_rarity == 2:
|
|
new_cost = old_cost + 180
|
|
elif new_rarity == 4:
|
|
new_cost = max(old_cost - 60, 15)
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 80, 5)
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 2320
|
|
elif old_rarity == 4:
|
|
if new_rarity == 1:
|
|
new_cost = old_cost + 780
|
|
elif new_rarity == 2:
|
|
new_cost = old_cost + 240
|
|
elif new_rarity == 3:
|
|
new_cost = old_cost + 60
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 20, 5)
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 2380
|
|
elif old_rarity == 5:
|
|
if new_rarity == 1:
|
|
new_cost = old_cost + 800
|
|
elif new_rarity == 2:
|
|
new_cost = old_cost + 260
|
|
elif new_rarity == 3:
|
|
new_cost = old_cost + 80
|
|
elif new_rarity == 4:
|
|
new_cost = old_cost + 20
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 2400
|
|
elif old_rarity == 99:
|
|
if new_rarity == 1:
|
|
new_cost = max(old_cost - 1600, 800)
|
|
elif new_rarity == 2:
|
|
new_cost = max(old_cost - 2140, 100)
|
|
elif new_rarity == 3:
|
|
new_cost = max(old_cost - 2320, 50)
|
|
elif new_rarity == 4:
|
|
new_cost = max(old_cost - 2380, 15)
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 2400, 5)
|
|
|
|
if new_cost != 0:
|
|
params.extend([('cost', new_cost), ('rarity_id', new_rarity)])
|
|
|
|
if len(params) > 0:
|
|
if df_data.player_id not in player_updates.keys():
|
|
player_updates[df_data.player_id] = params
|
|
else:
|
|
player_updates[df_data.player_id].extend(params)
|
|
|
|
player_data.apply(get_player_updates, axis=1)
|
|
|
|
print(f'Sending {len(player_updates)} player updates to PD database...')
|
|
if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true':
|
|
for x in player_updates:
|
|
await db_patch('players', object_id=x, params=player_updates[x])
|
|
|
|
del player_updates
|
|
print(f'Batter updates are complete')
|
|
start_time_two = datetime.datetime.now()
|
|
run_time = start_time_two - start_time
|
|
print(f'Total batting cards: {len(batting_cards)}\nNew cardset batters: {len(new_players)}\n'
|
|
f'Batter runtime: {round(run_time.total_seconds())} seconds\n')
|
|
|
|
print('Reading pitching stats...')
|
|
all_pitching = get_pitching_stats(file_path=input_path)
|
|
print(f'Processed {len(all_pitching.values)} pitchers\n')
|
|
|
|
def get_pids(df_data):
|
|
return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs')
|
|
|
|
print(f'Now pulling mlbam player IDs...')
|
|
ids_and_names = all_pitching.apply(get_pids, axis=1)
|
|
player_data = (ids_and_names
|
|
.merge(p_data, how='left', left_on='key_bbref', right_on='bbref_id')
|
|
.query('key_mlbam == key_mlbam')
|
|
.set_index('key_bbref', drop=False))
|
|
print(f'Matched mlbam to pd players.')
|
|
|
|
step_pitching = pd.merge(
|
|
player_data, all_pitching, left_on='key_fangraphs', right_on='playerId', sort=False
|
|
).set_index('key_bbref', drop=False)
|
|
final_pitching = step_pitching.join(df_p, rsuffix='_r')
|
|
|
|
new_players = []
|
|
|
|
def create_pitchers(df_data):
|
|
f_name = sanitize_name(df_data["name_first"]).title()
|
|
l_name = sanitize_name(df_data["name_last"]).title()
|
|
new_players.append({
|
|
'p_name': f'{f_name} {l_name}',
|
|
'cost': 99999,
|
|
'image': f'{CARD_BASE_URL}/{df_data["player_id"]}/'
|
|
f'pitchingcard{urllib.parse.quote("?d=")}{release_directory}',
|
|
'mlbclub': CLUB_LIST[df_data['Tm_vL']],
|
|
'franchise': FRANCHISE_LIST[df_data['Tm_vL']],
|
|
'cardset_id': cardset['id'],
|
|
'set_num': int(float(df_data['key_fangraphs'])),
|
|
'rarity_id': 99,
|
|
'pos_1': 'P',
|
|
'description': f'{player_description}',
|
|
'bbref_id': df_data.name,
|
|
'fangr_id': int(float(df_data['key_fangraphs'])),
|
|
'strat_code': int(float(df_data['key_mlbam']))
|
|
})
|
|
|
|
final_pitching[final_pitching['player_id'].isnull()].apply(create_pitchers, axis=1)
|
|
print(f'Creating {len(new_players)} new players...')
|
|
for x in new_players:
|
|
this_player = await db_post('players', payload=x)
|
|
final_pitching.at[x['bbref_id'], 'player_id'] = this_player['player_id']
|
|
final_pitching.at[x['bbref_id'], 'p_name'] = this_player['p_name']
|
|
del ids_and_names, all_pitching, p_data, step_pitching
|
|
print(f'Player IDs linked to pitching stats.\n{len(final_pitching.values)} players remain\n')
|
|
|
|
print(f'Reading pitching peripheral stats...')
|
|
pit_data = (pd.read_csv(f'{input_path}pitching.csv')
|
|
.drop_duplicates(subset=['Name-additional'], keep='first')
|
|
.set_index('Name-additional'))
|
|
pit_data['pitch_hand'] = pit_data.apply(get_hand, axis=1)
|
|
pitching_stats = final_pitching.join(pit_data, lsuffix='_l')
|
|
del final_pitching, pit_data
|
|
print(f'Stats are tallied\n{len(pitching_stats.values)} players remain\n')
|
|
|
|
pitching_cards = []
|
|
|
|
def create_pitching_card(df_data):
|
|
pow_data = cde.pow_ratings(float(df_data['Inn_def']), int(df_data['GS']), int(df_data['G']))
|
|
pitching_cards.append({
|
|
"player_id": int(float(df_data['player_id'])),
|
|
"key_bbref": df_data.name,
|
|
"key_fangraphs": int(float(df_data['key_fangraphs'])),
|
|
"key_mlbam": int(float(df_data['key_mlbam'])),
|
|
"key_retro": df_data['key_retro'],
|
|
"name_first": df_data["name_first"].title(),
|
|
"name_last": df_data["name_last"].title(),
|
|
"balk": cpi.balks(df_data['BK'], df_data['IP'], season_pct),
|
|
"wild_pitch": cpi.wild_pitches(df_data['WP'], df_data['IP'], season_pct),
|
|
"hold": cde.hold_pitcher(df_data['caught_stealing_perc'], int(df_data['pickoffs']), season_pct),
|
|
"starter_rating": pow_data[0],
|
|
"relief_rating": pow_data[1],
|
|
"closer_rating": cpi.closer_rating(int(df_data['GF']), int(df_data['SV']), int(df_data['G'])),
|
|
"hand": df_data['pitch_hand'],
|
|
"batting": f'#1W{df_data["pitch_hand"]}-C'
|
|
})
|
|
|
|
print(f'Calculating pitching cards...')
|
|
pitching_stats.apply(create_pitching_card, axis=1)
|
|
print(f'Cards are complete.\n\nPosting cards now...')
|
|
if 'post_pitchers' not in arg_data or arg_data['post_pitchers'].lower() == 'true':
|
|
resp = await db_put('pitchingcards', payload={'cards': pitching_cards}, timeout=30)
|
|
print(f'Response: {resp}\n\nMatching pitching card database IDs to player stats...')
|
|
# final_pitching_stats = pd.merge(
|
|
# pitching_stats, await pd_pitchingcards_df(cardset['id']), on='player_id')
|
|
# final_pitching_stats.set_index('key_bbref', drop=False, inplace=True)
|
|
# final_pitching_stats = final_pitching_stats.astype({'player_id': int})
|
|
|
|
pc_df = await pd_pitchingcards_df(cardset['id'])
|
|
pitching_stats = pitching_stats.merge(pc_df, how='left', on='player_id').set_index('key_bbref', drop=False)
|
|
|
|
pit_positions = []
|
|
|
|
def create_pit_position(df_data):
|
|
if df_data["key_bbref"] in df_p.index:
|
|
logging.debug(f'Running P stats for {df_data["p_name"]}')
|
|
pit_positions.append({
|
|
"player_id": int(df_data['player_id']),
|
|
"position": 'P',
|
|
"innings": float(df_p.at[df_data["key_bbref"], 'Inn_def']),
|
|
"range": cde.range_pitcher(
|
|
rs_value=int(df_p.at[df_data["key_bbref"], 'bis_runs_total']),
|
|
season_pct=season_pct
|
|
),
|
|
"error": cde.get_any_error(
|
|
pos_code='p',
|
|
errors=int(df_p.at[df_data["key_bbref"], 'E_def']),
|
|
chances=int(df_p.at[df_data["key_bbref"], 'chances']),
|
|
season_pct=season_pct
|
|
)
|
|
})
|
|
else:
|
|
pit_positions.append({
|
|
"player_id": int(player_data.at[df_data["key_bbref"], 'player_id']),
|
|
"position": 'P',
|
|
"innings": 1,
|
|
"range": 5,
|
|
"error": 51
|
|
})
|
|
|
|
print(f'Calculating pitcher fielding lines now...')
|
|
pitching_stats.apply(create_pit_position, axis=1)
|
|
print(f'Fielding is complete.\n\nPosting positions now...')
|
|
if 'post_pitchers' not in arg_data or arg_data['post_pitchers'].lower() == 'true':
|
|
resp = await db_put('cardpositions', payload={'positions': pit_positions}, timeout=30)
|
|
print(f'Response: {resp}\n')
|
|
|
|
pitching_ratings = []
|
|
|
|
def create_pitching_card_ratings(df_data):
|
|
logging.info(f'Calculating pitching card ratings for {df_data.name}')
|
|
pitching_ratings.extend(cpi.get_pitcher_ratings(df_data))
|
|
|
|
print(f'Calculating card ratings...')
|
|
pitching_stats.apply(create_pitching_card_ratings, axis=1)
|
|
print(f'Ratings are complete\n\nPosting ratings now...')
|
|
if 'post_pitchers' not in arg_data or arg_data['post_pitchers'].lower() == 'true':
|
|
resp = await db_put('pitchingcardratings', payload={'ratings': pitching_ratings}, timeout=30)
|
|
print(f'Response: {resp}\n\nPulling all positions to set player positions...')
|
|
|
|
print(f'Pitcher updates are complete')
|
|
start_time_three = datetime.datetime.now()
|
|
p_run_time = datetime.datetime.now() - start_time_two
|
|
print(f'Total pitching cards: {len(pitching_cards)}\nNew cardset pitchers: {len(new_players)}\n'
|
|
f'Pitcher runtime: {round(p_run_time.total_seconds())} seconds\n')
|
|
print(f'Checking for player updates...')
|
|
|
|
"""
|
|
Pull fresh pd_players and set_index to player_id
|
|
Pull fresh battingcards and set_index to player
|
|
Pull fresh battingcardratings one hand at a time and join on battingcard (suffixes _vl and vR)
|
|
|
|
Join battingcards (left) with battingcardratings (right) as total_ratings on id (left) and battingcard (right)
|
|
Join pd_players (left) with total_ratings (right) on indeces
|
|
Output: PD player list with batting card, ratings vL, and ratings vR
|
|
|
|
Calculate Total OPS as OPSvL + OPSvR + min(OPSvL, OPSvR) / 3 and assign rarity_id
|
|
For players with cost of 99999, set cost to <Rarity Base Cost> * Total OPS / <Rarity Avg OPS>
|
|
"""
|
|
|
|
def new_rarity_id(df_data):
|
|
if df_data['starter_rating'] > 3:
|
|
if df_data['total_OPS'] <= 0.4:
|
|
return 99
|
|
elif df_data['total_OPS'] <= 0.475:
|
|
return 1
|
|
elif df_data['total_OPS'] <= 0.53:
|
|
return 2
|
|
elif df_data['total_OPS'] <= 0.6:
|
|
return 3
|
|
elif df_data['total_OPS'] <= 0.675:
|
|
return 4
|
|
else:
|
|
return 5
|
|
else:
|
|
if df_data['total_OPS'] <= 0.325:
|
|
return 99
|
|
elif df_data['total_OPS'] <= 0.4:
|
|
return 1
|
|
elif df_data['total_OPS'] <= 0.475:
|
|
return 2
|
|
elif df_data['total_OPS'] <= 0.55:
|
|
return 3
|
|
elif df_data['total_OPS'] <= 0.625:
|
|
return 4
|
|
else:
|
|
return 5
|
|
p_data = await pd_players_df(cardset['id'])
|
|
p_data.set_index('player_id', drop=False)
|
|
total_ratings = pd.merge(
|
|
await pd_pitchingcards_df(cardset['id']),
|
|
await pd_pitchingcardratings_df(cardset['id']),
|
|
on='pitchingcard_id'
|
|
)
|
|
total_ratings['new_rarity_id'] = total_ratings.apply(new_rarity_id, axis=1)
|
|
|
|
player_data = pd.merge(
|
|
p_data,
|
|
total_ratings,
|
|
on='player_id'
|
|
).set_index('player_id', drop=False)
|
|
del total_ratings, pitching_stats
|
|
|
|
def get_pids(df_data):
|
|
return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref')
|
|
|
|
ids_and_names = player_data.apply(get_pids, axis=1)
|
|
player_data = (ids_and_names
|
|
.merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id')
|
|
.query('key_mlbam == key_mlbam')
|
|
.set_index('key_bbref', drop=False))
|
|
|
|
player_updates = {} # { <player_id> : [ (param pairs) ] }
|
|
sp_rarity_group = player_data.query('rarity == new_rarity_id and starter_rating >= 4').groupby('rarity')
|
|
sp_average_ops = sp_rarity_group['total_OPS'].mean().to_dict()
|
|
rp_rarity_group = player_data.query('rarity == new_rarity_id and starter_rating < 4').groupby('rarity')
|
|
rp_average_ops = rp_rarity_group['total_OPS'].mean().to_dict()
|
|
# cost_groups = rarity_group['cost'].mean()
|
|
if 99 not in sp_average_ops:
|
|
sp_average_ops[99] = 0.388
|
|
if 1 not in sp_average_ops:
|
|
sp_average_ops[1] = 0.445
|
|
if 2 not in sp_average_ops:
|
|
sp_average_ops[2] = 0.504
|
|
if 3 not in sp_average_ops:
|
|
sp_average_ops[3] = 0.568
|
|
if 4 not in sp_average_ops:
|
|
sp_average_ops[4] = 0.634
|
|
if 5 not in sp_average_ops:
|
|
sp_average_ops[5] = 0.737
|
|
|
|
if 99 not in rp_average_ops:
|
|
rp_average_ops[99] = 0.282
|
|
if 1 not in rp_average_ops:
|
|
rp_average_ops[1] = 0.375
|
|
if 2 not in rp_average_ops:
|
|
rp_average_ops[2] = 0.442
|
|
if 3 not in rp_average_ops:
|
|
rp_average_ops[3] = 0.516
|
|
if 4 not in rp_average_ops:
|
|
rp_average_ops[4] = 0.591
|
|
if 5 not in rp_average_ops:
|
|
rp_average_ops[5] = 0.702
|
|
|
|
def get_player_updates(df_data):
|
|
base_costs = {
|
|
1: 810,
|
|
2: 270,
|
|
3: 90,
|
|
4: 30,
|
|
5: 10,
|
|
99: 2400
|
|
}
|
|
|
|
def avg_ops(rarity_id, starter_rating):
|
|
if starter_rating >= 4:
|
|
return sp_average_ops[rarity_id]
|
|
else:
|
|
return rp_average_ops[rarity_id]
|
|
|
|
params = []
|
|
|
|
if df_data['description'] != player_description:
|
|
params = [('description', f'{player_description}')]
|
|
|
|
if 'is_liveseries' in arg_data and arg_data['is_liveseries'].lower() == 'true':
|
|
team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam'])))
|
|
|
|
if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None:
|
|
params.extend([('mlbclub', team_data['mlbclub'])])
|
|
if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None:
|
|
params.extend([('franchise', team_data['franchise'])])
|
|
|
|
# if release_directory not in df_data['image']:
|
|
params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/pitchingcard'
|
|
f'{urllib.parse.quote("?d=")}{release_directory}')])
|
|
|
|
if df_data['cost'] == 99999:
|
|
params.extend([
|
|
('cost',
|
|
round(base_costs[df_data['new_rarity_id']] * df_data['total_OPS'] /
|
|
avg_ops(df_data['new_rarity_id'], df_data['starter_rating']))),
|
|
('rarity_id', df_data['new_rarity_id'])
|
|
])
|
|
|
|
elif df_data['rarity'] != df_data['new_rarity_id']:
|
|
old_rarity = df_data['rarity']
|
|
new_rarity = df_data['new_rarity_id']
|
|
old_cost = df_data['cost']
|
|
new_cost = 0
|
|
|
|
if old_rarity == 1:
|
|
if new_rarity == 2:
|
|
new_cost = max(old_cost - 540, 100)
|
|
elif new_rarity == 3:
|
|
new_cost = max(old_cost - 720, 50)
|
|
elif new_rarity == 4:
|
|
new_cost = max(old_cost - 780, 15)
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 800, 5)
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 1600
|
|
elif old_rarity == 2:
|
|
if new_rarity == 1:
|
|
new_cost = old_cost + 540
|
|
elif new_rarity == 3:
|
|
new_cost = max(old_cost - 180, 50)
|
|
elif new_rarity == 4:
|
|
new_cost = max(old_cost - 240, 15)
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 260, 5)
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 2140
|
|
elif old_rarity == 3:
|
|
if new_rarity == 1:
|
|
new_cost = old_cost + 720
|
|
elif new_rarity == 2:
|
|
new_cost = old_cost + 180
|
|
elif new_rarity == 4:
|
|
new_cost = max(old_cost - 60, 15)
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 80, 5)
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 2320
|
|
elif old_rarity == 4:
|
|
if new_rarity == 1:
|
|
new_cost = old_cost + 780
|
|
elif new_rarity == 2:
|
|
new_cost = old_cost + 240
|
|
elif new_rarity == 3:
|
|
new_cost = old_cost + 60
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 20, 5)
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 2380
|
|
elif old_rarity == 5:
|
|
if new_rarity == 1:
|
|
new_cost = old_cost + 800
|
|
elif new_rarity == 2:
|
|
new_cost = old_cost + 260
|
|
elif new_rarity == 3:
|
|
new_cost = old_cost + 80
|
|
elif new_rarity == 4:
|
|
new_cost = old_cost + 20
|
|
elif new_rarity == 99:
|
|
new_cost = old_cost + 2400
|
|
elif old_rarity == 99:
|
|
if new_rarity == 1:
|
|
new_cost = max(old_cost - 1600, 800)
|
|
elif new_rarity == 2:
|
|
new_cost = max(old_cost - 2140, 100)
|
|
elif new_rarity == 3:
|
|
new_cost = max(old_cost - 2320, 50)
|
|
elif new_rarity == 4:
|
|
new_cost = max(old_cost - 2380, 15)
|
|
elif new_rarity == 5:
|
|
new_cost = max(old_cost - 2400, 5)
|
|
|
|
if new_cost != 0:
|
|
params.extend([('cost', new_cost), ('rarity_id', new_rarity)])
|
|
|
|
if len(params) > 0:
|
|
if df_data.player_id not in player_updates.keys():
|
|
player_updates[df_data.player_id] = params
|
|
else:
|
|
player_updates[df_data.player_id].extend(params)
|
|
|
|
player_data.apply(get_player_updates, axis=1)
|
|
|
|
print(f'Running player position updates..')
|
|
all_pos = await pd_positions_df(cardset['id'])
|
|
|
|
def set_all_positions(df_data):
|
|
pos_series = all_pos.query(f'player_id == {df_data["player_id"]}')['position']
|
|
pos_updates = []
|
|
count = 1
|
|
for this_pos in pos_series:
|
|
if this_pos == 'P':
|
|
this_pitcher = player_data.loc[df_data['bbref_id']]
|
|
if this_pitcher['starter_rating'] > 3:
|
|
pos_updates.append((f'pos_{count}', 'SP'))
|
|
count += 1
|
|
if this_pitcher['relief_rating'] > 1 or not pd.isna(this_pitcher['closer_rating']):
|
|
pos_updates.append((f'pos_{count}', 'RP'))
|
|
count += 1
|
|
else:
|
|
pos_updates.append((f'pos_{count}', 'RP'))
|
|
count += 1
|
|
|
|
if not pd.isna(this_pitcher['closer_rating']):
|
|
pos_updates.append((f'pos_{count}', 'CP'))
|
|
count += 1
|
|
else:
|
|
pos_updates.append((f'pos_{count}', this_pos))
|
|
count += 1
|
|
|
|
if count == 1:
|
|
pos_updates.append(('pos_1', 'DH'))
|
|
count += 1
|
|
|
|
while count <= 9:
|
|
pos_updates.append((f'pos_{count}', 'False'))
|
|
count += 1
|
|
|
|
if len(pos_updates) > 0:
|
|
if df_data.player_id not in player_updates.keys():
|
|
player_updates[df_data.player_id] = pos_updates
|
|
else:
|
|
player_updates[df_data.player_id].extend(pos_updates)
|
|
|
|
p_data.apply(set_all_positions, axis=1)
|
|
# Get all positions from each player in p_data and send position updates
|
|
# Consider combining all player updates into one master call to keep from updating each player twice
|
|
# (once in batter/pitcher and then here)
|
|
|
|
print(f'Sending {len(player_updates)} player updates to PD database...')
|
|
if 'post_players' not in arg_data or arg_data['post_players'].lower() == 'true':
|
|
for x in player_updates:
|
|
await db_patch('players', object_id=x, params=player_updates[x])
|
|
print(f'Player updates are complete\n')
|
|
|
|
p_run_time = datetime.datetime.now() - start_time_three
|
|
print(f'Player update runtime: {round(p_run_time.total_seconds())} seconds')
|
|
t_run_time = datetime.datetime.now() - start_time
|
|
print(f'Total runtime: {round(t_run_time.total_seconds())} seconds')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
asyncio.run(main(sys.argv[1:]))
|