import asyncio import copy import csv import datetime import html5lib import logging import random import requests import urllib.parse import calcs_batter as cba import calcs_defense as cde import calcs_pitcher as cpi import pandas as pd import pybaseball as pb import pydantic import sys from creation_helpers import pd_players_df, get_batting_stats, pd_battingcards_df, pd_battingcardratings_df, \ get_pitching_stats, get_all_pybaseball_ids, pd_pitchingcards_df, pd_pitchingcardratings_df, pd_positions_df, \ get_args, mlbteam_and_franchise, CLUB_LIST from db_calls import db_get, db_put, db_post, db_patch from typing import Literal from bs4 import BeautifulSoup date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}' log_level = logging.INFO logging.basicConfig( filename=f'logs/{date}.log', format='%(asctime)s - card-creation - %(levelname)s - %(message)s', level=log_level ) CARD_BASE_URL = 'https://pddev.manticorum.com/api/v2/players' def sanitize_name(start_name: str) -> str: return (start_name .replace("é", "e") .replace("á", "a") .replace(".", "") .replace("Á", "A") .replace("ñ", "n") .replace("ó", "o") .replace("í", "i") .replace("ú", "u") .replace("'", "") .replace('-', ' ')) # class BattingStat(pydantic.BaseModel): # fg_id: int # vs_hand: Literal['L', 'R'] # pa: int # hit: int # single: int # double: int # triple: int # homerun: int # rbi: int # bb: int # ibb: int # so: int # hbp: int # gidp: int # sb: int # cs: int # avg: float # hard_rate: float = None # med_rate: float = None # soft_rate: float = None # ifh_rate: float = None # hr_per_fb: float = None # ld_rate: float = None # iffb_rate: float = None # fb_rate: float = None # pull_rate: float = None # center_rate: float = None # oppo_rate: float = None async def main(args): """ params: cardset_name: str - to be searched in pd database games_played: int - from 1 - 162 pull_fielding: bool - whether or not to pull fielding stats from bbref post_batters: bool - whether or not to post batting cards, batting card ratings, and batter updates post_pitchers: bool - whether or not to post pitching cards, pitching card ratings, and pitching updates post_fielders: bool - whether or not to post card positions post_players: bool - whether or not to post player updates p_desc_prefix: str - shows as cardset on card image and prefixes player name in discord is_liveseries: str - whether or not to look up players' current MLB club from MLB statsapi """ arg_data = get_args(args) # cardset_name = input(f'What is the name of this Cardset? ') cardset_name = arg_data['cardset_name'] print(f'Searching for cardset: {cardset_name}') c_query = await db_get('cardsets', params=[('name', cardset_name)]) if c_query['count'] == 0: print(f'I do not see a cardset named {cardset_name}') return cardset = c_query['cardsets'][0] del c_query if 'season' in arg_data: season = arg_data['season'] else: season = int(cardset['name'][:4]) game_count = int(arg_data['games_played']) if game_count < 1 or game_count > 162: print(f'Game count has to be between 1 and 162.') return season_pct = game_count / 162 print(f'Cardset ID: {cardset["id"]} / Season: {season}\nGame count: {game_count} / Season %: {season_pct}\n') if 'player_description' in arg_data: player_description = arg_data['player_description'] elif season_pct < 1: player_description = f'Live' else: player_description = f'{season}' start_time = datetime.datetime.now() release_directory = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}' input_path = f'data-input/{cardset["name"]} Cardset/' print('Reading batting stats...') all_batting = get_batting_stats(file_path=input_path) print(f'Processed {len(all_batting.values)} batters\n') def get_pids(df_data): return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs') def get_hand(df_data): if df_data['Name'][-1] == '*': return 'L' elif df_data['Name'][-1] == '#': return 'S' else: return 'R' print(f'Pulling PD player IDs...') pd_players = await pd_players_df(cardset['id']) # .set_index('bbref_id', drop=False) print(f'Now pulling mlbam player IDs...') ids_and_names = all_batting.apply(get_pids, axis=1) player_data = (ids_and_names .merge(pd_players, how='left', left_on='key_bbref', right_on='bbref_id') .query('key_mlbam == key_mlbam') .set_index('key_bbref', drop=False)) print(f'Matched mlbam to pd players.') final_batting = pd.merge( player_data, all_batting, left_on='key_fangraphs', right_on='playerId', sort=False ).set_index('key_bbref', drop=False) new_players = [] def create_batters(df_data): f_name = sanitize_name(df_data["name_first"]).title() l_name = sanitize_name(df_data["name_last"]).title() new_players.append({ 'p_name': f'{f_name} {l_name}', 'cost': 99999, 'image': f'{CARD_BASE_URL}/{df_data["player_id"]}/battingcard' f'{urllib.parse.quote("?d=")}{release_directory}', 'mlbclub': CLUB_LIST[df_data['Tm_vL']], 'franchise': CLUB_LIST[df_data['Tm_vL']], 'cardset_id': cardset['id'], 'set_num': int(float(df_data['key_fangraphs'])), 'rarity_id': 99, 'pos_1': 'DH', 'description': f'{player_description}', 'bbref_id': df_data.name, 'fangr_id': int(float(df_data['key_fangraphs'])), 'strat_code': int(float(df_data['key_mlbam'])) }) final_batting[final_batting['player_id'].isnull()].apply(create_batters, axis=1) print(f'Creating {len(new_players)} new players...') for x in new_players: this_player = await db_post('players', payload=x) final_batting.at[x['bbref_id'], 'player_id'] = this_player['player_id'] final_batting.at[x['bbref_id'], 'p_name'] = this_player['p_name'] del ids_and_names, all_batting, pd_players print(f'Player IDs linked to batting stats.\n{len(final_batting.values)} players remain\n') print(f'Reading baserunning stats...') run_data = (pd.read_csv(f'{input_path}running.csv') .set_index('Name-additional')) run_data['bat_hand'] = run_data.apply(get_hand, axis=1) offense_stats = final_batting.join(run_data) del final_batting, run_data print(f'Stats are tallied\n{len(offense_stats.values)} players remain\n\nCollecting defensive data from bbref...') print(f'Pulling pitcher defense...') df_p = cde.get_bbref_fielding_df('p', season) if 'pull_fielding' in arg_data and arg_data['pull_fielding'].lower() == 'true': print(f'Pulling catcher defense...') df_c = cde.get_bbref_fielding_df('c', season) print(f'Pulling first base defense...') df_1b = cde.get_bbref_fielding_df('1b', season) print(f'Pulling second base defense...') df_2b = cde.get_bbref_fielding_df('2b', season) print(f'Pulling third base defense...') df_3b = cde.get_bbref_fielding_df('3b', season) print(f'Pulling short stop defense...') df_ss = cde.get_bbref_fielding_df('ss', season) print(f'Pulling left field defense...') df_lf = cde.get_bbref_fielding_df('lf', season) print(f'Pulling center field defense...') df_cf = cde.get_bbref_fielding_df('cf', season) print(f'Pulling right field defense...') df_rf = cde.get_bbref_fielding_df('rf', season) print(f'Pulling outfield defense...') df_of = cde.get_bbref_fielding_df('of', season) print(f'Positions data is retrieved') batting_cards = [] def create_batting_card(df_data): s_data = cba.stealing( chances=df_data['SBO'], sb2s=df_data['SB2'], cs2s=df_data['CS2'], sb3s=df_data['SB3'], cs3s=df_data['CS3'], season_pct=season_pct ) batting_cards.append({ "player_id": df_data['player_id'], "key_bbref": df_data.name, "key_fangraphs": int(float(df_data['key_fangraphs'])), "key_mlbam": df_data['key_mlbam'], "key_retro": df_data['key_retro'], "name_first": df_data["name_first"].title(), "name_last": df_data["name_last"].title(), "steal_low": s_data[0], "steal_high": s_data[1], "steal_auto": s_data[2], "steal_jump": s_data[3], "hit_and_run": cba.hit_and_run( df_data['AB_vL'], df_data['AB_vR'], df_data['H_vL'], df_data['H_vR'], df_data['HR_vL'], df_data['HR_vR'], df_data['SO_vL'], df_data['SO_vR'] ), "running": cba.running(df_data['XBT%']), "hand": df_data['bat_hand'] }) print(f'Calculating batting cards...') offense_stats.apply(create_batting_card, axis=1) print(f'Cards are complete.\n\nPosting cards now...') if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true': resp = await db_put('battingcards', payload={'cards': batting_cards}, timeout=30) print(f'Response: {resp}\n\nMatching batting card database IDs to player stats...') offense_stats = pd.merge( offense_stats, await pd_battingcards_df(cardset['id']), on='player_id') position_payload = [] def create_positions(df_data): no_data = True for pos_data in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]: if df_data['key_bbref'] in pos_data[0].index: logging.debug(f'Running {pos_data[1]} stats for {player_data.at[df_data["key_bbref"], "p_name"]}') no_data = False average_range = (int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']) + int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) + min( int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']), int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) )) / 3 position_payload.append({ "player_id": int(player_data.at[df_data["key_bbref"], 'player_id']), "position": pos_data[1].upper(), "innings": float(pos_data[0].at[df_data["key_bbref"], 'Inn_def']), "range": cde.get_if_range( pos_code=pos_data[1], tz_runs=round(average_range), r_dp=0, season_pct=season_pct ), "error": cde.get_any_error( pos_code=pos_data[1], errors=int(pos_data[0].at[df_data["key_bbref"], 'E_def']), chances=int(pos_data[0].at[df_data["key_bbref"], 'chances']), season_pct=season_pct ) }) of_arms = [] of_payloads = [] for pos_data in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]: if df_data["key_bbref"] in pos_data[0].index: no_data = False average_range = (int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']) + int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) + min( int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']), int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) )) / 3 of_payloads.append({ "player_id": int(player_data.at[df_data["key_bbref"], 'player_id']), "position": pos_data[1].upper(), "innings": float(pos_data[0].at[df_data["key_bbref"], 'Inn_def']), "range": cde.get_of_range( pos_code=pos_data[1], tz_runs=round(average_range), season_pct=season_pct ) }) of_arms.append(int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_outfield'])) if df_data["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0: no_data = False error_rating = cde.get_any_error( pos_code=pos_data[1], errors=int(df_of.at[df_data["key_bbref"], 'E_def']), chances=int(df_of.at[df_data["key_bbref"], 'chances']), season_pct=season_pct ) arm_rating = cde.arm_outfield(of_arms) for f in of_payloads: f['error'] = error_rating f['arm'] = arm_rating position_payload.append(f) if df_data["key_bbref"] in df_c.index: if df_c.at[df_data["key_bbref"], 'SB'] + df_c.at[df_data["key_bbref"], 'CS'] == 0: arm_rating = 3 else: arm_rating = cde.arm_catcher( cs_pct=df_c.at[df_data["key_bbref"], 'caught_stealing_perc'], raa=int(df_c.at[df_data["key_bbref"], 'bis_runs_catcher_sb']), season_pct=season_pct ) no_data = False position_payload.append({ "player_id": int(player_data.at[df_data["key_bbref"], 'player_id']), "position": 'C', "innings": float(df_c.at[df_data["key_bbref"], 'Inn_def']), "range": cde.range_catcher( rs_value=int(df_c.at[df_data["key_bbref"], 'tz_runs_catcher']), season_pct=season_pct ), "error": cde.get_any_error( pos_code='c', errors=int(df_c.at[df_data["key_bbref"], 'E_def']), chances=int(df_c.at[df_data["key_bbref"], 'chances']), season_pct=season_pct ), "arm": arm_rating, "pb": cde.pb_catcher( pb=int(df_c.at[df_data["key_bbref"], 'PB']), innings=int(float(df_c.at[df_data["key_bbref"], 'Inn_def'])), season_pct=season_pct ), "overthrow": cde.ot_catcher( errors=int(df_c.at[df_data["key_bbref"], 'E_def']), chances=int(df_c.at[df_data["key_bbref"], 'chances']), season_pct=season_pct ) }) if no_data: position_payload.append({ "player_id": int(player_data.at[df_data["key_bbref"], 'player_id']), "position": 'DH', "innings": df_data['PA_vL'] + df_data['PA_vR'] }) if 'pull_fielding' in arg_data and arg_data['pull_fielding'].lower() == 'true': print(f'Calculating fielding lines now...') offense_stats.apply(create_positions, axis=1) print(f'Fielding is complete.\n\nPosting positions now...') if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true': resp = await db_put('cardpositions', payload={'positions': position_payload}, timeout=30) print(f'Response: {resp}\n') batting_ratings = [] def create_batting_card_ratings(df_data): logging.debug(f'Calculating card ratings for {df_data.name}') batting_ratings.extend(cba.get_batter_ratings(df_data)) print(f'Calculating card ratings...') offense_stats.apply(create_batting_card_ratings, axis=1) print(f'Ratings are complete\n\nPosting ratings now...') if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true': resp = await db_put('battingcardratings', payload={'ratings': batting_ratings}, timeout=30) print(f'Response: {resp}\n\nPulling fresh PD player data...') """ Pull fresh pd_players and set_index to player_id Pull fresh battingcards and set_index to player Pull fresh battingcardratings one hand at a time and join on battingcard (suffixes _vl and vR) Join battingcards (left) with battingcardratings (right) as total_ratings on id (left) and battingcard (right) Join pd_players (left) with total_ratings (right) on indeces Output: PD player list with batting card, ratings vL, and ratings vR Calculate Total OPS as OPSvL + OPSvR + min(OPSvL, OPSvR) / 3 and assign rarity_id For players with cost of 99999, set cost to * Total OPS / """ p_data = await pd_players_df(cardset['id']) p_data.set_index('player_id', drop=False) total_ratings = pd.merge( await pd_battingcards_df(cardset['id']), await pd_battingcardratings_df(cardset['id']), on='battingcard_id' ) player_data = pd.merge( p_data, total_ratings, on='player_id' ).set_index('player_id', drop=False) del total_ratings, offense_stats def get_pids(df_data): return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref') ids_and_names = player_data.apply(get_pids, axis=1) player_data = (ids_and_names .merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id') .query('key_mlbam == key_mlbam') .set_index('key_bbref', drop=False)) player_updates = {} # { : [ (param pairs) ] } rarity_group = player_data.query('rarity == new_rarity_id').groupby('rarity') average_ops = rarity_group['total_OPS'].mean().to_dict() if 1 not in average_ops: average_ops[1] = 1.066 if 2 not in average_ops: average_ops[2] = 0.938 if 3 not in average_ops: average_ops[3] = 0.844 if 4 not in average_ops: average_ops[4] = 0.752 if 5 not in average_ops: average_ops[5] = 0.612 # cost_groups = rarity_group['cost'].mean() def get_player_updates(df_data): base_costs = { 1: 810, 2: 270, 3: 90, 4: 30, 5: 10, 99: 2400 } params = [] if df_data['description'] != player_description: params = [('description', f'{player_description}')] if 'is_liveseries' in arg_data and arg_data['is_liveseries'].lower() == 'true': team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam']))) if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None: params.extend([('mlbclub', team_data['mlbclub'])]) if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None: params.extend([('franchise', team_data['franchise'])]) # if release_directory not in df_data['image']: params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/battingcard' f'{urllib.parse.quote("?d=")}{release_directory}')]) if df_data['cost'] == 99999: params.extend([ ('cost', round(base_costs[df_data['new_rarity_id']] * df_data['total_OPS'] / average_ops[df_data['new_rarity_id']])), ('rarity_id', df_data['new_rarity_id']) ]) elif df_data['rarity'] != df_data['new_rarity_id']: old_rarity = df_data['rarity'] new_rarity = df_data['new_rarity_id'] old_cost = df_data['cost'] new_cost = 0 if old_rarity == 1: if new_rarity == 2: new_cost = max(old_cost - 540, 100) elif new_rarity == 3: new_cost = max(old_cost - 720, 50) elif new_rarity == 4: new_cost = max(old_cost - 780, 15) elif new_rarity == 5: new_cost = max(old_cost - 800, 5) elif new_rarity == 99: new_cost = old_cost + 1600 elif old_rarity == 2: if new_rarity == 1: new_cost = old_cost + 540 elif new_rarity == 3: new_cost = max(old_cost - 180, 50) elif new_rarity == 4: new_cost = max(old_cost - 240, 15) elif new_rarity == 5: new_cost = max(old_cost - 260, 5) elif new_rarity == 99: new_cost = old_cost + 2140 elif old_rarity == 3: if new_rarity == 1: new_cost = old_cost + 720 elif new_rarity == 2: new_cost = old_cost + 180 elif new_rarity == 4: new_cost = max(old_cost - 60, 15) elif new_rarity == 5: new_cost = max(old_cost - 80, 5) elif new_rarity == 99: new_cost = old_cost + 2320 elif old_rarity == 4: if new_rarity == 1: new_cost = old_cost + 780 elif new_rarity == 2: new_cost = old_cost + 240 elif new_rarity == 3: new_cost = old_cost + 60 elif new_rarity == 5: new_cost = max(old_cost - 20, 5) elif new_rarity == 99: new_cost = old_cost + 2380 elif old_rarity == 5: if new_rarity == 1: new_cost = old_cost + 800 elif new_rarity == 2: new_cost = old_cost + 260 elif new_rarity == 3: new_cost = old_cost + 80 elif new_rarity == 4: new_cost = old_cost + 20 elif new_rarity == 99: new_cost = old_cost + 2400 elif old_rarity == 99: if new_rarity == 1: new_cost = max(old_cost - 1600, 800) elif new_rarity == 2: new_cost = max(old_cost - 2140, 100) elif new_rarity == 3: new_cost = max(old_cost - 2320, 50) elif new_rarity == 4: new_cost = max(old_cost - 2380, 15) elif new_rarity == 5: new_cost = max(old_cost - 2400, 5) if new_cost != 0: params.extend([('cost', new_cost), ('rarity_id', new_rarity)]) if len(params) > 0: if df_data.player_id not in player_updates.keys(): player_updates[df_data.player_id] = params else: player_updates[df_data.player_id].extend(params) player_data.apply(get_player_updates, axis=1) print(f'Sending {len(player_updates)} player updates to PD database...') if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true': for x in player_updates: await db_patch('players', object_id=x, params=player_updates[x]) del player_updates print(f'Batter updates are complete') start_time_two = datetime.datetime.now() run_time = start_time_two - start_time print(f'Total batting cards: {len(batting_cards)}\nNew cardset batters: {len(new_players)}\n' f'Batter runtime: {round(run_time.total_seconds())} seconds\n') print('Reading pitching stats...') all_pitching = get_pitching_stats(file_path=input_path) print(f'Processed {len(all_pitching.values)} pitchers\n') def get_pids(df_data): return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs') print(f'Now pulling mlbam player IDs...') ids_and_names = all_pitching.apply(get_pids, axis=1) player_data = (ids_and_names .merge(p_data, how='left', left_on='key_bbref', right_on='bbref_id') .query('key_mlbam == key_mlbam') .set_index('key_bbref', drop=False)) print(f'Matched mlbam to pd players.') step_pitching = pd.merge( player_data, all_pitching, left_on='key_fangraphs', right_on='playerId', sort=False ).set_index('key_bbref', drop=False) final_pitching = step_pitching.join(df_p, rsuffix='_r') new_players = [] def create_pitchers(df_data): f_name = sanitize_name(df_data["name_first"]).title() l_name = sanitize_name(df_data["name_last"]).title() new_players.append({ 'p_name': f'{f_name} {l_name}', 'cost': 99999, 'image': f'{CARD_BASE_URL}/{df_data["player_id"]}/' f'pitchingcard{urllib.parse.quote("?d=")}{release_directory}', 'mlbclub': CLUB_LIST[df_data['Tm_vL']], 'franchise': CLUB_LIST[df_data['Tm_vL']], 'cardset_id': cardset['id'], 'set_num': int(float(df_data['key_fangraphs'])), 'rarity_id': 99, 'pos_1': 'P', 'description': f'{player_description}', 'bbref_id': df_data.name, 'fangr_id': int(float(df_data['key_fangraphs'])), 'strat_code': int(float(df_data['key_mlbam'])) }) final_pitching[final_pitching['player_id'].isnull()].apply(create_pitchers, axis=1) print(f'Creating {len(new_players)} new players...') for x in new_players: this_player = await db_post('players', payload=x) final_pitching.at[x['bbref_id'], 'player_id'] = this_player['player_id'] final_pitching.at[x['bbref_id'], 'p_name'] = this_player['p_name'] del ids_and_names, all_pitching, p_data, step_pitching print(f'Player IDs linked to pitching stats.\n{len(final_pitching.values)} players remain\n') print(f'Reading pitching peripheral stats...') pit_data = (pd.read_csv(f'{input_path}pitching.csv') .drop_duplicates(subset=['Name-additional'], keep='first') .set_index('Name-additional')) pit_data['pitch_hand'] = pit_data.apply(get_hand, axis=1) pitching_stats = final_pitching.join(pit_data, lsuffix='_l') del final_pitching, pit_data print(f'Stats are tallied\n{len(pitching_stats.values)} players remain\n') pitching_cards = [] def create_pitching_card(df_data): pow_data = cde.pow_ratings(float(df_data['Inn_def']), int(df_data['GS']), int(df_data['G'])) pitching_cards.append({ "player_id": int(float(df_data['player_id'])), "key_bbref": df_data.name, "key_fangraphs": int(float(df_data['key_fangraphs'])), "key_mlbam": int(float(df_data['key_mlbam'])), "key_retro": df_data['key_retro'], "name_first": df_data["name_first"].title(), "name_last": df_data["name_last"].title(), "balk": cpi.balks(df_data['BK'], df_data['IP'], season_pct), "wild_pitch": cpi.wild_pitches(df_data['WP'], df_data['IP'], season_pct), "hold": cde.hold_pitcher(df_data['caught_stealing_perc'], int(df_data['pickoffs']), season_pct), "starter_rating": pow_data[0], "relief_rating": pow_data[1], "closer_rating": cpi.closer_rating(int(df_data['GF']), int(df_data['SV']), int(df_data['G'])), "hand": df_data['pitch_hand'], "batting": f'#1W{df_data["pitch_hand"]}-C' }) print(f'Calculating pitching cards...') pitching_stats.apply(create_pitching_card, axis=1) print(f'Cards are complete.\n\nPosting cards now...') if 'post_pitchers' not in arg_data or arg_data['post_pitchers'].lower() == 'true': resp = await db_put('pitchingcards', payload={'cards': pitching_cards}, timeout=30) print(f'Response: {resp}\n\nMatching pitching card database IDs to player stats...') # final_pitching_stats = pd.merge( # pitching_stats, await pd_pitchingcards_df(cardset['id']), on='player_id') # final_pitching_stats.set_index('key_bbref', drop=False, inplace=True) # final_pitching_stats = final_pitching_stats.astype({'player_id': int}) pc_df = await pd_pitchingcards_df(cardset['id']) pitching_stats = pitching_stats.merge(pc_df, how='left', on='player_id').set_index('key_bbref', drop=False) pit_positions = [] def create_pit_position(df_data): if df_data["key_bbref"] in df_p.index: logging.debug(f'Running P stats for {df_data["p_name"]}') pit_positions.append({ "player_id": int(player_data.at[df_data["key_bbref"], 'player_id']), "position": 'P', "innings": float(df_p.at[df_data["key_bbref"], 'Inn_def']), "range": cde.range_pitcher( rs_value=int(df_p.at[df_data["key_bbref"], 'bis_runs_total']), season_pct=season_pct ), "error": cde.get_any_error( pos_code='p', errors=int(df_p.at[df_data["key_bbref"], 'E_def']), chances=int(df_p.at[df_data["key_bbref"], 'chances']), season_pct=season_pct ) }) else: pit_positions.append({ "player_id": int(player_data.at[df_data["key_bbref"], 'player_id']), "position": 'P', "innings": 1, "range": 5, "error": 51 }) print(f'Calculating pitcher fielding lines now...') pitching_stats.apply(create_pit_position, axis=1) print(f'Fielding is complete.\n\nPosting positions now...') if 'post_pitchers' not in arg_data or arg_data['post_pitchers'].lower() == 'true': resp = await db_put('cardpositions', payload={'positions': pit_positions}, timeout=30) print(f'Response: {resp}\n') pitching_ratings = [] def create_pitching_card_ratings(df_data): logging.info(f'Calculating pitching card ratings for {df_data.name}') pitching_ratings.extend(cpi.get_pitcher_ratings(df_data)) print(f'Calculating card ratings...') pitching_stats.apply(create_pitching_card_ratings, axis=1) print(f'Ratings are complete\n\nPosting ratings now...') if 'post_pitchers' not in arg_data or arg_data['post_pitchers'].lower() == 'true': resp = await db_put('pitchingcardratings', payload={'ratings': pitching_ratings}, timeout=30) print(f'Response: {resp}\n\nPulling all positions to set player positions...') print(f'Pitcher updates are complete') start_time_three = datetime.datetime.now() p_run_time = datetime.datetime.now() - start_time_two print(f'Total pitching cards: {len(pitching_cards)}\nNew cardset pitchers: {len(new_players)}\n' f'Pitcher runtime: {round(p_run_time.total_seconds())} seconds\n') print(f'Checking for player updates...') """ Pull fresh pd_players and set_index to player_id Pull fresh battingcards and set_index to player Pull fresh battingcardratings one hand at a time and join on battingcard (suffixes _vl and vR) Join battingcards (left) with battingcardratings (right) as total_ratings on id (left) and battingcard (right) Join pd_players (left) with total_ratings (right) on indeces Output: PD player list with batting card, ratings vL, and ratings vR Calculate Total OPS as OPSvL + OPSvR + min(OPSvL, OPSvR) / 3 and assign rarity_id For players with cost of 99999, set cost to * Total OPS / """ def new_rarity_id(df_data): if df_data['starter_rating'] > 3: if df_data['total_OPS'] <= 0.4: return 99 elif df_data['total_OPS'] <= 0.475: return 1 elif df_data['total_OPS'] <= 0.53: return 2 elif df_data['total_OPS'] <= 0.6: return 3 elif df_data['total_OPS'] <= 0.675: return 4 else: return 5 else: if df_data['total_OPS'] <= 0.325: return 99 elif df_data['total_OPS'] <= 0.4: return 1 elif df_data['total_OPS'] <= 0.475: return 2 elif df_data['total_OPS'] <= 0.55: return 3 elif df_data['total_OPS'] <= 0.625: return 4 else: return 5 p_data = await pd_players_df(cardset['id']) p_data.set_index('player_id', drop=False) total_ratings = pd.merge( await pd_pitchingcards_df(cardset['id']), await pd_pitchingcardratings_df(cardset['id']), on='pitchingcard_id' ) total_ratings['new_rarity_id'] = total_ratings.apply(new_rarity_id, axis=1) player_data = pd.merge( p_data, total_ratings, on='player_id' ).set_index('player_id', drop=False) del total_ratings, pitching_stats def get_pids(df_data): return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref') ids_and_names = player_data.apply(get_pids, axis=1) player_data = (ids_and_names .merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id') .query('key_mlbam == key_mlbam') .set_index('key_bbref', drop=False)) player_updates = {} # { : [ (param pairs) ] } sp_rarity_group = player_data.query('rarity == new_rarity_id and starter_rating >= 4').groupby('rarity') sp_average_ops = sp_rarity_group['total_OPS'].mean().to_dict() rp_rarity_group = player_data.query('rarity == new_rarity_id and starter_rating < 4').groupby('rarity') rp_average_ops = rp_rarity_group['total_OPS'].mean().to_dict() # cost_groups = rarity_group['cost'].mean() if 99 not in sp_average_ops: sp_average_ops[99] = 0.388 if 1 not in sp_average_ops: sp_average_ops[1] = 0.445 if 2 not in sp_average_ops: sp_average_ops[2] = 0.504 if 3 not in sp_average_ops: sp_average_ops[3] = 0.568 if 4 not in sp_average_ops: sp_average_ops[4] = 0.634 if 5 not in sp_average_ops: sp_average_ops[5] = 0.737 if 99 not in rp_average_ops: rp_average_ops[99] = 0.282 if 1 not in rp_average_ops: rp_average_ops[1] = 0.375 if 2 not in rp_average_ops: rp_average_ops[2] = 0.442 if 3 not in rp_average_ops: rp_average_ops[3] = 0.516 if 4 not in rp_average_ops: rp_average_ops[4] = 0.591 if 5 not in rp_average_ops: rp_average_ops[5] = 0.702 def get_player_updates(df_data): base_costs = { 1: 810, 2: 270, 3: 90, 4: 30, 5: 10, 99: 2400 } def avg_ops(rarity_id, starter_rating): if starter_rating >= 4: return sp_average_ops[rarity_id] else: return rp_average_ops[rarity_id] params = [] if df_data['description'] != player_description: params = [('description', f'{player_description}')] if 'is_liveseries' in arg_data and arg_data['is_liveseries'].lower() == 'true': team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam']))) if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None: params.extend([('mlbclub', team_data['mlbclub'])]) if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None: params.extend([('franchise', team_data['franchise'])]) # if release_directory not in df_data['image']: params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/pitchingcard' f'{urllib.parse.quote("?d=")}{release_directory}')]) if df_data['cost'] == 99999: params.extend([ ('cost', round(base_costs[df_data['new_rarity_id']] * df_data['total_OPS'] / avg_ops(df_data['new_rarity_id'], df_data['starter_rating']))), ('rarity_id', df_data['new_rarity_id']) ]) elif df_data['rarity'] != df_data['new_rarity_id']: old_rarity = df_data['rarity'] new_rarity = df_data['new_rarity_id'] old_cost = df_data['cost'] new_cost = 0 if old_rarity == 1: if new_rarity == 2: new_cost = max(old_cost - 540, 100) elif new_rarity == 3: new_cost = max(old_cost - 720, 50) elif new_rarity == 4: new_cost = max(old_cost - 780, 15) elif new_rarity == 5: new_cost = max(old_cost - 800, 5) elif new_rarity == 99: new_cost = old_cost + 1600 elif old_rarity == 2: if new_rarity == 1: new_cost = old_cost + 540 elif new_rarity == 3: new_cost = max(old_cost - 180, 50) elif new_rarity == 4: new_cost = max(old_cost - 240, 15) elif new_rarity == 5: new_cost = max(old_cost - 260, 5) elif new_rarity == 99: new_cost = old_cost + 2140 elif old_rarity == 3: if new_rarity == 1: new_cost = old_cost + 720 elif new_rarity == 2: new_cost = old_cost + 180 elif new_rarity == 4: new_cost = max(old_cost - 60, 15) elif new_rarity == 5: new_cost = max(old_cost - 80, 5) elif new_rarity == 99: new_cost = old_cost + 2320 elif old_rarity == 4: if new_rarity == 1: new_cost = old_cost + 780 elif new_rarity == 2: new_cost = old_cost + 240 elif new_rarity == 3: new_cost = old_cost + 60 elif new_rarity == 5: new_cost = max(old_cost - 20, 5) elif new_rarity == 99: new_cost = old_cost + 2380 elif old_rarity == 5: if new_rarity == 1: new_cost = old_cost + 800 elif new_rarity == 2: new_cost = old_cost + 260 elif new_rarity == 3: new_cost = old_cost + 80 elif new_rarity == 4: new_cost = old_cost + 20 elif new_rarity == 99: new_cost = old_cost + 2400 elif old_rarity == 99: if new_rarity == 1: new_cost = max(old_cost - 1600, 800) elif new_rarity == 2: new_cost = max(old_cost - 2140, 100) elif new_rarity == 3: new_cost = max(old_cost - 2320, 50) elif new_rarity == 4: new_cost = max(old_cost - 2380, 15) elif new_rarity == 5: new_cost = max(old_cost - 2400, 5) if new_cost != 0: params.extend([('cost', new_cost), ('rarity_id', new_rarity)]) if len(params) > 0: if df_data.player_id not in player_updates.keys(): player_updates[df_data.player_id] = params else: player_updates[df_data.player_id].extend(params) player_data.apply(get_player_updates, axis=1) print(f'Running player position updates..') all_pos = await pd_positions_df(cardset['id']) def set_all_positions(df_data): pos_series = all_pos.query(f'player_id == {df_data["player_id"]}')['position'] pos_updates = [] count = 1 for this_pos in pos_series: if this_pos == 'P': this_pitcher = player_data.loc[df_data['bbref_id']] if this_pitcher['starter_rating'] > 3: pos_updates.append((f'pos_{count}', 'SP')) count += 1 if this_pitcher['relief_rating'] > 1 or not pd.isna(this_pitcher['closer_rating']): pos_updates.append((f'pos_{count}', 'RP')) count += 1 else: pos_updates.append((f'pos_{count}', 'RP')) count += 1 if not pd.isna(this_pitcher['closer_rating']): pos_updates.append((f'pos_{count}', 'CP')) count += 1 else: pos_updates.append((f'pos_{count}', this_pos)) count += 1 if count == 1: pos_updates.append(('pos_1', 'DH')) count += 1 while count <= 9: pos_updates.append((f'pos_{count}', 'False')) count += 1 if len(pos_updates) > 0: if df_data.player_id not in player_updates.keys(): player_updates[df_data.player_id] = pos_updates else: player_updates[df_data.player_id].extend(pos_updates) p_data.apply(set_all_positions, axis=1) # Get all positions from each player in p_data and send position updates # Consider combining all player updates into one master call to keep from updating each player twice # (once in batter/pitcher and then here) print(f'Sending {len(player_updates)} player updates to PD database...') if 'post_players' not in arg_data or arg_data['post_players'].lower() == 'true': for x in player_updates: await db_patch('players', object_id=x, params=player_updates[x]) print(f'Player updates are complete\n') p_run_time = datetime.datetime.now() - start_time_three print(f'Player update runtime: {round(p_run_time.total_seconds())} seconds') t_run_time = datetime.datetime.now() - start_time print(f'Total runtime: {round(t_run_time.total_seconds())} seconds') if __name__ == '__main__': asyncio.run(main(sys.argv[1:]))