diff --git a/batters/__init__.py b/batters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/calcs_batter.py b/batters/calcs_batter.py similarity index 100% rename from calcs_batter.py rename to batters/calcs_batter.py diff --git a/batters/creation.py b/batters/creation.py new file mode 100644 index 0000000..b3ab8be --- /dev/null +++ b/batters/creation.py @@ -0,0 +1,386 @@ +import logging +import datetime +import urllib.parse +import pandas as pd + +from creation_helpers import get_all_pybaseball_ids, sanitize_name, CLUB_LIST, FRANCHISE_LIST, pd_players_df, \ + mlbteam_and_franchise +from db_calls import db_post, db_get, db_put, db_patch +from . import calcs_batter as cba + + +async def pd_battingcards_df(cardset_id: int): + bc_query = await db_get('battingcards', params=[('cardset_id', cardset_id), ('short_output', True)]) + if bc_query['count'] == 0: + raise ValueError(f'No batting cards returned from Paper Dynasty API') + return pd.DataFrame(bc_query['cards']).rename(columns={'id': 'battingcard_id', 'player': 'player_id'}) + + +async def pd_battingcardratings_df(cardset_id: int): + vl_query = await db_get( + 'battingcardratings', params=[ + ('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True), ('team_id', 31), + ('ts', 's37136685556r6135248705')]) + vr_query = await db_get( + 'battingcardratings', params=[ + ('cardset_id', cardset_id), ('vs_hand', 'R'), ('short_output', True), ('team_id', 31), + ('ts', 's37136685556r6135248705')]) + if 0 in [vl_query['count'], vr_query['count']]: + raise ValueError(f'No batting card ratings returned from Paper Dynasty API') + vl = pd.DataFrame(vl_query['ratings']) + vr = pd.DataFrame(vr_query['ratings']) + ratings = (pd.merge(vl, vr, on='battingcard', suffixes=('_vL', '_vR')) + .rename(columns={'battingcard': 'battingcard_id'})) + + def get_total_ops(df_data): + ops_vl = df_data['obp_vL'] + df_data['slg_vL'] + ops_vr = df_data['obp_vR'] + df_data['slg_vR'] + return (ops_vr + ops_vl + min(ops_vl, ops_vr)) / 3 + ratings['total_OPS'] = ratings.apply(get_total_ops, axis=1) + + def new_rarity_id(df_data): + if df_data['total_OPS'] >= 1.2: + return 99 + elif df_data['total_OPS'] >= 1: + return 1 + elif df_data['total_OPS'] >= .9: + return 2 + elif df_data['total_OPS'] >= .8: + return 3 + elif df_data['total_OPS'] >= .7: + return 4 + else: + return 5 + ratings['new_rarity_id'] = ratings.apply(new_rarity_id, axis=1) + + return ratings + + # return pd.DataFrame(bcr_query['ratings']).rename(columns={'battingcard': 'battingcard_id'}) + + +def get_batting_stats(file_path: str = None, start_date: datetime.datetime = None, end_date: datetime.datetime = None): + if file_path is not None: + vl_basic = pd.read_csv(f'{file_path}vlhp-basic.csv').query('PA >= 20') + vr_basic = pd.read_csv(f'{file_path}vrhp-basic.csv').query('PA >= 40') + total_basic = pd.merge(vl_basic, vr_basic, on="playerId", suffixes=('_vL', '_vR')) + + vl_rate = pd.read_csv(f'{file_path}vlhp-rate.csv').query('PA >= 20') + vr_rate = pd.read_csv(f'{file_path}vrhp-rate.csv').query('PA >= 40') + total_rate = pd.merge(vl_rate, vr_rate, on="playerId", suffixes=('_vL', '_vR')) + + return pd.merge(total_basic, total_rate, on="playerId", suffixes=('', '_rate')) + + else: + raise LookupError(f'Date-based stat pulls not implemented, yet. Please provide batting csv files.') + + +def match_player_lines(all_batting: pd.DataFrame, all_players: pd.DataFrame): + def get_pids(df_data): + return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs') + + print(f'Now pulling mlbam player IDs...') + ids_and_names = all_batting.apply(get_pids, axis=1) + player_data = (ids_and_names + .merge(all_players, how='left', left_on='key_bbref', right_on='bbref_id') + .query('key_mlbam == key_mlbam') + .set_index('key_bbref', drop=False)) + print(f'Matched mlbam to pd players.') + final_batting = pd.merge( + player_data, all_batting, left_on='key_fangraphs', right_on='playerId', sort=False + ).set_index('key_bbref', drop=False) + + return final_batting + + +async def create_new_players( + final_batting: pd.DataFrame, cardset: dict, card_base_url: str, release_dir: str, player_desc: str): + new_players = [] + + def create_batters(df_data): + f_name = sanitize_name(df_data["name_first"]).title() + l_name = sanitize_name(df_data["name_last"]).title() + new_players.append({ + 'p_name': f'{f_name} {l_name}', + 'cost': 99999, + 'image': f'{card_base_url}/{df_data["player_id"]}/battingcard' + f'{urllib.parse.quote("?d=")}{release_dir}', + 'mlbclub': CLUB_LIST[df_data['Tm_vL']], + 'franchise': FRANCHISE_LIST[df_data['Tm_vL']], + 'cardset_id': cardset['id'], + 'set_num': int(float(df_data['key_fangraphs'])), + 'rarity_id': 99, + 'pos_1': 'DH', + 'description': f'{player_desc}', + 'bbref_id': df_data.name, + 'fangr_id': int(float(df_data['key_fangraphs'])), + 'strat_code': int(float(df_data['key_mlbam'])) + }) + + final_batting[final_batting['player_id'].isnull()].apply(create_batters, axis=1) + print(f'Creating {len(new_players)} new players...') + for x in new_players: + this_player = await db_post('players', payload=x) + final_batting.at[x['bbref_id'], 'player_id'] = this_player['player_id'] + final_batting.at[x['bbref_id'], 'p_name'] = this_player['p_name'] + + print(f'Player IDs linked to batting stats.\n{len(final_batting.values)} players remain\n') + + return len(new_players) + + +def get_offense_stats(final_batting: pd.DataFrame, input_path: str): + def get_hand(df_data): + if df_data['Name'][-1] == '*': + return 'L' + elif df_data['Name'][-1] == '#': + return 'S' + else: + return 'R' + + print(f'Reading baserunning stats...') + run_data = (pd.read_csv(f'{input_path}running.csv') + .set_index('Name-additional')) + run_data['bat_hand'] = run_data.apply(get_hand, axis=1) + offense_stats = final_batting.join(run_data) + print(f'Stats are tallied\n{len(offense_stats.values)} players remain\n\nCollecting defensive data from bbref...') + + return offense_stats + + +async def calculate_batting_cards(offense_stats: pd.DataFrame, cardset: dict, season_pct: float, to_post: bool): + batting_cards = [] + + def create_batting_card(df_data): + s_data = cba.stealing( + chances=df_data['SBO'], + sb2s=df_data['SB2'], + cs2s=df_data['CS2'], + sb3s=df_data['SB3'], + cs3s=df_data['CS3'], + season_pct=season_pct + ) + batting_cards.append({ + "player_id": df_data['player_id'], + "key_bbref": df_data.name, + "key_fangraphs": int(float(df_data['key_fangraphs'])), + "key_mlbam": df_data['key_mlbam'], + "key_retro": df_data['key_retro'], + "name_first": df_data["name_first"].title(), + "name_last": df_data["name_last"].title(), + "steal_low": s_data[0], + "steal_high": s_data[1], + "steal_auto": s_data[2], + "steal_jump": s_data[3], + "hit_and_run": cba.hit_and_run( + df_data['AB_vL'], df_data['AB_vR'], df_data['H_vL'], df_data['H_vR'], + df_data['HR_vL'], df_data['HR_vR'], df_data['SO_vL'], df_data['SO_vR'] + ), + "running": cba.running(df_data['XBT%']), + "hand": df_data['bat_hand'] + }) + + print(f'Calculating batting cards...') + offense_stats.apply(create_batting_card, axis=1) + print(f'Cards are complete.\n\nPosting cards now...') + if to_post: + resp = await db_put('battingcards', payload={'cards': batting_cards}, timeout=30) + print(f'Response: {resp}\n\nMatching batting card database IDs to player stats...') + offense_stats = pd.merge( + offense_stats, await pd_battingcards_df(cardset['id']), on='player_id').set_index('key_bbref', drop=False) + + return offense_stats + + +async def calculate_batting_ratings(offense_stats: pd.DataFrame, to_post: bool): + batting_ratings = [] + + def create_batting_card_ratings(df_data): + logging.debug(f'Calculating card ratings for {df_data.name}') + batting_ratings.extend(cba.get_batter_ratings(df_data)) + + print(f'Calculating card ratings...') + offense_stats.apply(create_batting_card_ratings, axis=1) + print(f'Ratings are complete\n\nPosting ratings now...') + if to_post: + resp = await db_put('battingcardratings', payload={'ratings': batting_ratings}, timeout=30) + print(f'Response: {resp}\n\nPulling fresh PD player data...') + + return len(batting_ratings) + + +async def post_player_updates( + cardset: dict, card_base_url: str, release_dir: str, player_desc: str, is_liveseries: bool, to_post: bool): + """ + Pull fresh pd_players and set_index to player_id + Pull fresh battingcards and set_index to player + Pull fresh battingcardratings one hand at a time and join on battingcard (suffixes _vl and vR) + + Join battingcards (left) with battingcardratings (right) as total_ratings on id (left) and battingcard (right) + Join pd_players (left) with total_ratings (right) on indeces + Output: PD player list with batting card, ratings vL, and ratings vR + + Calculate Total OPS as OPSvL + OPSvR + min(OPSvL, OPSvR) / 3 and assign rarity_id + For players with cost of 99999, set cost to * Total OPS / + """ + + p_data = await pd_players_df(cardset['id']) + p_data.set_index('player_id', drop=False) + total_ratings = pd.merge( + await pd_battingcards_df(cardset['id']), + await pd_battingcardratings_df(cardset['id']), + on='battingcard_id' + ) + player_data = pd.merge( + p_data, + total_ratings, + on='player_id' + ).set_index('player_id', drop=False) + del total_ratings + + def get_pids(df_data): + return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref') + + ids_and_names = player_data.apply(get_pids, axis=1) + player_data = (ids_and_names + .merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id') + .query('key_mlbam == key_mlbam') + .set_index('key_bbref', drop=False)) + + player_updates = {} # { : [ (param pairs) ] } + rarity_group = player_data.query('rarity == new_rarity_id').groupby('rarity') + average_ops = rarity_group['total_OPS'].mean().to_dict() + if 1 not in average_ops: + average_ops[1] = 1.066 + if 2 not in average_ops: + average_ops[2] = 0.938 + if 3 not in average_ops: + average_ops[3] = 0.844 + if 4 not in average_ops: + average_ops[4] = 0.752 + if 5 not in average_ops: + average_ops[5] = 0.612 + + def get_player_updates(df_data): + base_costs = { + 1: 810, + 2: 270, + 3: 90, + 4: 30, + 5: 10, + 99: 2400 + } + params = [] + + if df_data['description'] != player_desc: + params = [('description', f'{player_desc}')] + + if is_liveseries: + team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam']))) + + if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None: + params.extend([('mlbclub', team_data['mlbclub'])]) + if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None: + params.extend([('franchise', team_data['franchise'])]) + + # if release_directory not in df_data['image']: + params.extend([('image', f'{card_base_url}/{df_data["player_id"]}/battingcard' + f'{urllib.parse.quote("?d=")}{release_dir}')]) + + if df_data['cost'] == 99999: + params.extend([ + ('cost', + round(base_costs[df_data['new_rarity_id']] * df_data['total_OPS'] / + average_ops[df_data['new_rarity_id']])), + ('rarity_id', df_data['new_rarity_id']) + ]) + + elif df_data['rarity'] != df_data['new_rarity_id']: + old_rarity = df_data['rarity'] + new_rarity = df_data['new_rarity_id'] + old_cost = df_data['cost'] + new_cost = 0 + + if old_rarity == 1: + if new_rarity == 2: + new_cost = max(old_cost - 540, 100) + elif new_rarity == 3: + new_cost = max(old_cost - 720, 50) + elif new_rarity == 4: + new_cost = max(old_cost - 780, 15) + elif new_rarity == 5: + new_cost = max(old_cost - 800, 5) + elif new_rarity == 99: + new_cost = old_cost + 1600 + elif old_rarity == 2: + if new_rarity == 1: + new_cost = old_cost + 540 + elif new_rarity == 3: + new_cost = max(old_cost - 180, 50) + elif new_rarity == 4: + new_cost = max(old_cost - 240, 15) + elif new_rarity == 5: + new_cost = max(old_cost - 260, 5) + elif new_rarity == 99: + new_cost = old_cost + 2140 + elif old_rarity == 3: + if new_rarity == 1: + new_cost = old_cost + 720 + elif new_rarity == 2: + new_cost = old_cost + 180 + elif new_rarity == 4: + new_cost = max(old_cost - 60, 15) + elif new_rarity == 5: + new_cost = max(old_cost - 80, 5) + elif new_rarity == 99: + new_cost = old_cost + 2320 + elif old_rarity == 4: + if new_rarity == 1: + new_cost = old_cost + 780 + elif new_rarity == 2: + new_cost = old_cost + 240 + elif new_rarity == 3: + new_cost = old_cost + 60 + elif new_rarity == 5: + new_cost = max(old_cost - 20, 5) + elif new_rarity == 99: + new_cost = old_cost + 2380 + elif old_rarity == 5: + if new_rarity == 1: + new_cost = old_cost + 800 + elif new_rarity == 2: + new_cost = old_cost + 260 + elif new_rarity == 3: + new_cost = old_cost + 80 + elif new_rarity == 4: + new_cost = old_cost + 20 + elif new_rarity == 99: + new_cost = old_cost + 2400 + elif old_rarity == 99: + if new_rarity == 1: + new_cost = max(old_cost - 1600, 800) + elif new_rarity == 2: + new_cost = max(old_cost - 2140, 100) + elif new_rarity == 3: + new_cost = max(old_cost - 2320, 50) + elif new_rarity == 4: + new_cost = max(old_cost - 2380, 15) + elif new_rarity == 5: + new_cost = max(old_cost - 2400, 5) + + if new_cost != 0: + params.extend([('cost', new_cost), ('rarity_id', new_rarity)]) + + if len(params) > 0: + if df_data.player_id not in player_updates.keys(): + player_updates[df_data.player_id] = params + else: + player_updates[df_data.player_id].extend(params) + + player_data.apply(get_player_updates, axis=1) + + print(f'Sending {len(player_updates)} player updates to PD database...') + if to_post: + for x in player_updates: + await db_patch('players', object_id=x, params=player_updates[x]) + + return len(player_updates) \ No newline at end of file diff --git a/card_creation.py b/card_creation.py index 08b2029..ee44298 100644 --- a/card_creation.py +++ b/card_creation.py @@ -1,17 +1,11 @@ import copy -import math -import pathlib import sys -import csv -import logging -import datetime import asyncio -from db_calls import db_get from creation_helpers import * -import calcs_pitcher as p -import calcs_batter as b -import calcs_defense as d +from pitchers import calcs_pitcher as p +from batters import calcs_batter as b +from defenders import calcs_defense as d date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}' log_level = logging.INFO diff --git a/check_cards.py b/check_cards.py index 5cbaa57..fad0621 100644 --- a/check_cards.py +++ b/check_cards.py @@ -70,6 +70,7 @@ async def main(args): timeout = 6 try: + logging.info(f'calling the card url') resp = await url_get(card_url, timeout=timeout) except ConnectionError as e: @@ -83,8 +84,6 @@ async def main(args): errors.append((x, e)) else: - successes.append(x) - if x['image2'] is not None: if 'html_cards' in arg_data and arg_data['html_cards'].lower() == 'true': card_url = f'{x["image2"]}&html=true' @@ -108,7 +107,9 @@ async def main(args): errors.append((x, e)) else: - successes.pop(x) + successes.append(x) + else: + successes.append(x) if len(errors) > 0: logging.error(f'All Errors:') @@ -116,7 +117,7 @@ async def main(args): logging.error(f'ID {x[0]["player_id"]} {x[0]["p_name"]} - Error: {x[1]}') if len(successes) > 0: - logging.info(f'All Successes:') + logging.debug(f'All Successes:') for x in successes: logging.info(f'ID {x["player_id"]} {x["p_name"]}') diff --git a/creation_helpers.py b/creation_helpers.py index 0bb31c3..0d97458 100644 --- a/creation_helpers.py +++ b/creation_helpers.py @@ -524,13 +524,6 @@ async def pd_players_df(cardset_id: int): return pd.DataFrame(p_query['players']) -async def pd_battingcards_df(cardset_id: int): - bc_query = await db_get('battingcards', params=[('cardset_id', cardset_id), ('short_output', True)]) - if bc_query['count'] == 0: - raise ValueError(f'No batting cards returned from Paper Dynasty API') - return pd.DataFrame(bc_query['cards']).rename(columns={'id': 'battingcard_id', 'player': 'player_id'}) - - async def pd_pitchingcards_df(cardset_id: int): bc_query = await db_get('pitchingcards', params=[('cardset_id', cardset_id), ('short_output', True)]) if bc_query['count'] == 0: @@ -538,48 +531,6 @@ async def pd_pitchingcards_df(cardset_id: int): return pd.DataFrame(bc_query['cards']).rename(columns={'id': 'pitchingcard_id', 'player': 'player_id'}) -async def pd_battingcardratings_df(cardset_id: int): - vl_query = await db_get( - 'battingcardratings', params=[ - ('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True), ('team_id', 31), - ('ts', 's37136685556r6135248705')]) - vr_query = await db_get( - 'battingcardratings', params=[ - ('cardset_id', cardset_id), ('vs_hand', 'R'), ('short_output', True), ('team_id', 31), - ('ts', 's37136685556r6135248705')]) - if 0 in [vl_query['count'], vr_query['count']]: - raise ValueError(f'No batting card ratings returned from Paper Dynasty API') - vl = pd.DataFrame(vl_query['ratings']) - vr = pd.DataFrame(vr_query['ratings']) - ratings = (pd.merge(vl, vr, on='battingcard', suffixes=('_vL', '_vR')) - .rename(columns={'battingcard': 'battingcard_id'})) - - def get_total_ops(df_data): - ops_vl = df_data['obp_vL'] + df_data['slg_vL'] - ops_vr = df_data['obp_vR'] + df_data['slg_vR'] - return (ops_vr + ops_vl + min(ops_vl, ops_vr)) / 3 - ratings['total_OPS'] = ratings.apply(get_total_ops, axis=1) - - def new_rarity_id(df_data): - if df_data['total_OPS'] >= 1.2: - return 99 - elif df_data['total_OPS'] >= 1: - return 1 - elif df_data['total_OPS'] >= .9: - return 2 - elif df_data['total_OPS'] >= .8: - return 3 - elif df_data['total_OPS'] >= .7: - return 4 - else: - return 5 - ratings['new_rarity_id'] = ratings.apply(new_rarity_id, axis=1) - - return ratings - - # return pd.DataFrame(bcr_query['ratings']).rename(columns={'battingcard': 'battingcard_id'}) - - async def pd_pitchingcardratings_df(cardset_id: int): vl_query = await db_get( 'pitchingcardratings', params=[('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True)]) @@ -611,22 +562,6 @@ async def pd_positions_df(cardset_id: int): return all_pos -def get_batting_stats(file_path: str = None, start_date: datetime.datetime = None, end_date: datetime.datetime = None): - if file_path is not None: - vl_basic = pd.read_csv(f'{file_path}vlhp-basic.csv').query('PA >= 20') - vr_basic = pd.read_csv(f'{file_path}vrhp-basic.csv').query('PA >= 40') - total_basic = pd.merge(vl_basic, vr_basic, on="playerId", suffixes=('_vL', '_vR')) - - vl_rate = pd.read_csv(f'{file_path}vlhp-rate.csv').query('PA >= 20') - vr_rate = pd.read_csv(f'{file_path}vrhp-rate.csv').query('PA >= 40') - total_rate = pd.merge(vl_rate, vr_rate, on="playerId", suffixes=('_vL', '_vR')) - - return pd.merge(total_basic, total_rate, on="playerId", suffixes=('', '_rate')) - - else: - raise LookupError(f'Date-based stat pulls not implemented, yet. Please provide batting csv files.') - - def get_pitching_stats(file_path: str = None, start_date: datetime.datetime = None, end_date: datetime.datetime = None): if file_path is not None: vl_basic = pd.read_csv(f'{file_path}vlhh-basic.csv').query('TBF >= 20') @@ -1085,3 +1020,17 @@ def get_all_pybaseball_ids(player_id: list, key_type: str): return_val = None return return_val + + +def sanitize_name(start_name: str) -> str: + return (start_name + .replace("é", "e") + .replace("á", "a") + .replace(".", "") + .replace("Á", "A") + .replace("ñ", "n") + .replace("ó", "o") + .replace("í", "i") + .replace("ú", "u") + .replace("'", "") + .replace('-', ' ')) \ No newline at end of file diff --git a/db_calls.py b/db_calls.py index 0864804..99500e6 100644 --- a/db_calls.py +++ b/db_calls.py @@ -8,7 +8,7 @@ from typing import Literal, Optional AUTH_TOKEN = {'Authorization': f'Bearer Tp3aO3jhYve5NJF1IqOmJTmk'} DB_URL = 'https://pd.manticorum.com/api' master_debug = True -alt_database = False +alt_database = 'dev' if alt_database == 'dev': DB_URL = 'https://pddev.manticorum.com/api' @@ -92,7 +92,7 @@ async def url_get(url: str, timeout: int = 3): retries += 1 if resp.status_code == 200: - logging.debug(f'return: {resp.text}') + log_string = f'200 received' if master_debug: logging.info(f'return: {log_string[:1200]}{" [ S N I P P E D ]" if len(log_string) > 1200 else ""}') else: @@ -236,3 +236,9 @@ def get_player_data( return q.loc[0] else: return q.loc[0].to_dict() + + +def player_desc(this_player) -> str: + if this_player['p_name'] in this_player['description']: + return this_player['description'] + return f'{this_player["description"]} {this_player["p_name"]}' diff --git a/defenders/__init__.py b/defenders/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/calcs_defense.py b/defenders/calcs_defense.py similarity index 66% rename from calcs_defense.py rename to defenders/calcs_defense.py index a77e0d4..399eee7 100644 --- a/calcs_defense.py +++ b/defenders/calcs_defense.py @@ -5,6 +5,136 @@ import requests from bs4 import BeautifulSoup from typing import Literal +from db_calls import db_put + + +async def create_positions( + all_stats: pd.DataFrame, season_pct: float, post_pos: bool, df_c: pd.DataFrame, df_1b: pd.DataFrame, + df_2b: pd.DataFrame, df_3b: pd.DataFrame, df_ss: pd.DataFrame, df_lf: pd.DataFrame, df_cf: pd.DataFrame, + df_rf: pd.DataFrame, df_of: pd.DataFrame): + position_payload = [] + + def create_positions(df_data): + no_data = True + for pos_data in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]: + if df_data['key_bbref'] in pos_data[0].index: + logging.debug(f'Running {pos_data[1]} stats for {df_data["p_name"]}') + no_data = False + average_range = (int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']) + + int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) + + min( + int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']), + int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) + )) / 3 + + position_payload.append({ + "player_id": int(df_data['player_id']), + "position": pos_data[1].upper(), + "innings": float(pos_data[0].at[df_data["key_bbref"], 'Inn_def']), + "range": get_if_range( + pos_code=pos_data[1], + tz_runs=round(average_range), + r_dp=0, + season_pct=season_pct + ), + "error": get_any_error( + pos_code=pos_data[1], + errors=int(pos_data[0].at[df_data["key_bbref"], 'E_def']), + chances=int(pos_data[0].at[df_data["key_bbref"], 'chances']), + season_pct=season_pct + ) + }) + + of_arms = [] + of_payloads = [] + for pos_data in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]: + if df_data["key_bbref"] in pos_data[0].index: + no_data = False + average_range = (int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']) + + int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) + + min( + int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']), + int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) + )) / 3 + of_payloads.append({ + "player_id": int(df_data['player_id']), + "position": pos_data[1].upper(), + "innings": float(pos_data[0].at[df_data["key_bbref"], 'Inn_def']), + "range": get_of_range( + pos_code=pos_data[1], + tz_runs=round(average_range), + season_pct=season_pct + ) + }) + of_arms.append(int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_outfield'])) + + if df_data["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0: + no_data = False + error_rating = get_any_error( + pos_code=pos_data[1], + errors=int(df_of.at[df_data["key_bbref"], 'E_def']), + chances=int(df_of.at[df_data["key_bbref"], 'chances']), + season_pct=season_pct + ) + arm_rating = arm_outfield(of_arms) + for f in of_payloads: + f['error'] = error_rating + f['arm'] = arm_rating + position_payload.append(f) + + if df_data["key_bbref"] in df_c.index: + if df_c.at[df_data["key_bbref"], 'SB'] + df_c.at[df_data["key_bbref"], 'CS'] == 0: + arm_rating = 3 + else: + arm_rating = arm_catcher( + cs_pct=df_c.at[df_data["key_bbref"], 'caught_stealing_perc'], + raa=int(df_c.at[df_data["key_bbref"], 'bis_runs_catcher_sb']), + season_pct=season_pct + ) + no_data = False + position_payload.append({ + "player_id": int(df_data['player_id']), + "position": 'C', + "innings": float(df_c.at[df_data["key_bbref"], 'Inn_def']), + "range": range_catcher( + rs_value=int(df_c.at[df_data["key_bbref"], 'tz_runs_catcher']), + season_pct=season_pct + ), + "error": get_any_error( + pos_code='c', + errors=int(df_c.at[df_data["key_bbref"], 'E_def']), + chances=int(df_c.at[df_data["key_bbref"], 'chances']), + season_pct=season_pct + ), + "arm": arm_rating, + "pb": pb_catcher( + pb=int(df_c.at[df_data["key_bbref"], 'PB']), + innings=int(float(df_c.at[df_data["key_bbref"], 'Inn_def'])), + season_pct=season_pct + ), + "overthrow": ot_catcher( + errors=int(df_c.at[df_data["key_bbref"], 'E_def']), + chances=int(df_c.at[df_data["key_bbref"], 'chances']), + season_pct=season_pct + ) + }) + + if no_data: + position_payload.append({ + "player_id": int(df_data['player_id']), + "position": 'DH', + "innings": df_data['PA_vL'] + df_data['PA_vR'] + }) + + print(f'Calculating fielding lines now...') + all_stats.apply(create_positions, axis=1) + print(f'Fielding is complete.\n\nPosting positions now...') + if post_pos: + resp = await db_put('cardpositions', payload={'positions': position_payload}, timeout=30) + print(f'Response: {resp}\n') + + return len(position_payload) + def range_pitcher(rs_value: int, season_pct: float): if rs_value >= (3 * season_pct): diff --git a/live_series_update.py b/live_series_update.py index c331ce9..9686ff1 100644 --- a/live_series_update.py +++ b/live_series_update.py @@ -1,27 +1,19 @@ import asyncio -import copy -import csv import datetime -import html5lib import logging -import random -import requests import urllib.parse -import calcs_batter as cba -import calcs_defense as cde -import calcs_pitcher as cpi +import batters.creation +import defenders.calcs_defense +from defenders import calcs_defense as cde +from pitchers import calcs_pitcher as cpi import pandas as pd -import pybaseball as pb -import pydantic import sys -from creation_helpers import pd_players_df, get_batting_stats, pd_battingcards_df, pd_battingcardratings_df, \ +from creation_helpers import pd_players_df, \ get_pitching_stats, get_all_pybaseball_ids, pd_pitchingcards_df, pd_pitchingcardratings_df, pd_positions_df, \ - get_args, mlbteam_and_franchise, CLUB_LIST, FRANCHISE_LIST + get_args, mlbteam_and_franchise, CLUB_LIST, FRANCHISE_LIST, sanitize_name from db_calls import db_get, db_put, db_post, db_patch, DB_URL -from typing import Literal -from bs4 import BeautifulSoup date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}' log_level = logging.INFO @@ -33,51 +25,6 @@ logging.basicConfig( CARD_BASE_URL = f'{DB_URL}/v2/players' -def sanitize_name(start_name: str) -> str: - return (start_name - .replace("é", "e") - .replace("á", "a") - .replace(".", "") - .replace("Á", "A") - .replace("ñ", "n") - .replace("ó", "o") - .replace("í", "i") - .replace("ú", "u") - .replace("'", "") - .replace('-', ' ')) - - -# class BattingStat(pydantic.BaseModel): -# fg_id: int -# vs_hand: Literal['L', 'R'] -# pa: int -# hit: int -# single: int -# double: int -# triple: int -# homerun: int -# rbi: int -# bb: int -# ibb: int -# so: int -# hbp: int -# gidp: int -# sb: int -# cs: int -# avg: float -# hard_rate: float = None -# med_rate: float = None -# soft_rate: float = None -# ifh_rate: float = None -# hr_per_fb: float = None -# ld_rate: float = None -# iffb_rate: float = None -# fb_rate: float = None -# pull_rate: float = None -# center_rate: float = None -# oppo_rate: float = None - - async def main(args): """ params: @@ -123,83 +70,36 @@ async def main(args): else: player_description = f'{season}' + post_batters = True if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true' else False + post_players = True if 'post_players' not in arg_data or arg_data['post_players'].lower() == 'true' else False + pull_fielding = True if 'pull_fielding' not in arg_data or arg_data['pull_fielding'].lower() == 'true' else False + start_time = datetime.datetime.now() release_directory = f'{start_time.year}-{start_time.month}-{start_time.day}' input_path = f'data-input/{cardset["name"]} Cardset/' print('Reading batting stats...') - all_batting = get_batting_stats(file_path=input_path) + all_batting = batters.creation.get_batting_stats(file_path=input_path) print(f'Processed {len(all_batting.values)} batters\n') - def get_pids(df_data): - return get_all_pybaseball_ids([df_data["playerId"]], 'fangraphs') - - def get_hand(df_data): - if df_data['Name'][-1] == '*': - return 'L' - elif df_data['Name'][-1] == '#': - return 'S' - else: - return 'R' - print(f'Pulling PD player IDs...') pd_players = await pd_players_df(cardset['id']) - # .set_index('bbref_id', drop=False) + bat_step1 = batters.creation.match_player_lines(all_batting, pd_players) + if post_players: + new_batters = await batters.creation.create_new_players( + bat_step1, cardset, CARD_BASE_URL, release_directory, player_description + ) + else: + new_batters = 0 + offense_stats = batters.creation.get_offense_stats(bat_step1, input_path) + del bat_step1 - print(f'Now pulling mlbam player IDs...') - ids_and_names = all_batting.apply(get_pids, axis=1) - player_data = (ids_and_names - .merge(pd_players, how='left', left_on='key_bbref', right_on='bbref_id') - .query('key_mlbam == key_mlbam') - .set_index('key_bbref', drop=False)) - print(f'Matched mlbam to pd players.') - final_batting = pd.merge( - player_data, all_batting, left_on='key_fangraphs', right_on='playerId', sort=False - ).set_index('key_bbref', drop=False) - - new_players = [] - - def create_batters(df_data): - f_name = sanitize_name(df_data["name_first"]).title() - l_name = sanitize_name(df_data["name_last"]).title() - new_players.append({ - 'p_name': f'{f_name} {l_name}', - 'cost': 99999, - 'image': f'{CARD_BASE_URL}/{df_data["player_id"]}/battingcard' - f'{urllib.parse.quote("?d=")}{release_directory}', - 'mlbclub': CLUB_LIST[df_data['Tm_vL']], - 'franchise': FRANCHISE_LIST[df_data['Tm_vL']], - 'cardset_id': cardset['id'], - 'set_num': int(float(df_data['key_fangraphs'])), - 'rarity_id': 99, - 'pos_1': 'DH', - 'description': f'{player_description}', - 'bbref_id': df_data.name, - 'fangr_id': int(float(df_data['key_fangraphs'])), - 'strat_code': int(float(df_data['key_mlbam'])) - }) - - final_batting[final_batting['player_id'].isnull()].apply(create_batters, axis=1) - print(f'Creating {len(new_players)} new players...') - for x in new_players: - this_player = await db_post('players', payload=x) - final_batting.at[x['bbref_id'], 'player_id'] = this_player['player_id'] - final_batting.at[x['bbref_id'], 'p_name'] = this_player['p_name'] - - del ids_and_names, all_batting, pd_players - print(f'Player IDs linked to batting stats.\n{len(final_batting.values)} players remain\n') - - print(f'Reading baserunning stats...') - run_data = (pd.read_csv(f'{input_path}running.csv') - .set_index('Name-additional')) - run_data['bat_hand'] = run_data.apply(get_hand, axis=1) - offense_stats = final_batting.join(run_data) - del final_batting, run_data - print(f'Stats are tallied\n{len(offense_stats.values)} players remain\n\nCollecting defensive data from bbref...') + offense_stats = await batters.creation.calculate_batting_cards(offense_stats, cardset, season_pct, post_batters) + await batters.creation.calculate_batting_ratings(offense_stats, post_batters) print(f'Pulling pitcher defense...') df_p = cde.get_bbref_fielding_df('p', season) - if 'pull_fielding' in arg_data and arg_data['pull_fielding'].lower() == 'true': + if pull_fielding: print(f'Pulling catcher defense...') df_c = cde.get_bbref_fielding_df('c', season) print(f'Pulling first base defense...') @@ -220,359 +120,19 @@ async def main(args): df_of = cde.get_bbref_fielding_df('of', season) print(f'Positions data is retrieved') - batting_cards = [] - - def create_batting_card(df_data): - s_data = cba.stealing( - chances=df_data['SBO'], - sb2s=df_data['SB2'], - cs2s=df_data['CS2'], - sb3s=df_data['SB3'], - cs3s=df_data['CS3'], - season_pct=season_pct + await defenders.calcs_defense.create_positions( + offense_stats, season_pct, post_batters, df_c, df_1b, df_2b, df_3b, df_ss, df_lf, df_cf, df_rf, df_of ) - batting_cards.append({ - "player_id": df_data['player_id'], - "key_bbref": df_data.name, - "key_fangraphs": int(float(df_data['key_fangraphs'])), - "key_mlbam": df_data['key_mlbam'], - "key_retro": df_data['key_retro'], - "name_first": df_data["name_first"].title(), - "name_last": df_data["name_last"].title(), - "steal_low": s_data[0], - "steal_high": s_data[1], - "steal_auto": s_data[2], - "steal_jump": s_data[3], - "hit_and_run": cba.hit_and_run( - df_data['AB_vL'], df_data['AB_vR'], df_data['H_vL'], df_data['H_vR'], - df_data['HR_vL'], df_data['HR_vR'], df_data['SO_vL'], df_data['SO_vR'] - ), - "running": cba.running(df_data['XBT%']), - "hand": df_data['bat_hand'] - }) - print(f'Calculating batting cards...') - offense_stats.apply(create_batting_card, axis=1) - print(f'Cards are complete.\n\nPosting cards now...') - if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true': - resp = await db_put('battingcards', payload={'cards': batting_cards}, timeout=30) - print(f'Response: {resp}\n\nMatching batting card database IDs to player stats...') - offense_stats = pd.merge( - offense_stats, await pd_battingcards_df(cardset['id']), on='player_id').set_index('key_bbref', drop=False) - - position_payload = [] - - def create_positions(df_data): - no_data = True - for pos_data in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]: - if df_data['key_bbref'] in pos_data[0].index: - logging.debug(f'Running {pos_data[1]} stats for {player_data.at[df_data["key_bbref"], "p_name"]}') - no_data = False - average_range = (int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']) + - int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) + - min( - int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']), - int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) - )) / 3 - - position_payload.append({ - "player_id": int(df_data['player_id']), - "position": pos_data[1].upper(), - "innings": float(pos_data[0].at[df_data["key_bbref"], 'Inn_def']), - "range": cde.get_if_range( - pos_code=pos_data[1], - tz_runs=round(average_range), - r_dp=0, - season_pct=season_pct - ), - "error": cde.get_any_error( - pos_code=pos_data[1], - errors=int(pos_data[0].at[df_data["key_bbref"], 'E_def']), - chances=int(pos_data[0].at[df_data["key_bbref"], 'chances']), - season_pct=season_pct - ) - }) - - of_arms = [] - of_payloads = [] - for pos_data in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]: - if df_data["key_bbref"] in pos_data[0].index: - no_data = False - average_range = (int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']) + - int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) + - min( - int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']), - int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) - )) / 3 - of_payloads.append({ - "player_id": int(df_data['player_id']), - "position": pos_data[1].upper(), - "innings": float(pos_data[0].at[df_data["key_bbref"], 'Inn_def']), - "range": cde.get_of_range( - pos_code=pos_data[1], - tz_runs=round(average_range), - season_pct=season_pct - ) - }) - of_arms.append(int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_outfield'])) - - if df_data["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0: - no_data = False - error_rating = cde.get_any_error( - pos_code=pos_data[1], - errors=int(df_of.at[df_data["key_bbref"], 'E_def']), - chances=int(df_of.at[df_data["key_bbref"], 'chances']), - season_pct=season_pct - ) - arm_rating = cde.arm_outfield(of_arms) - for f in of_payloads: - f['error'] = error_rating - f['arm'] = arm_rating - position_payload.append(f) - - if df_data["key_bbref"] in df_c.index: - if df_c.at[df_data["key_bbref"], 'SB'] + df_c.at[df_data["key_bbref"], 'CS'] == 0: - arm_rating = 3 - else: - arm_rating = cde.arm_catcher( - cs_pct=df_c.at[df_data["key_bbref"], 'caught_stealing_perc'], - raa=int(df_c.at[df_data["key_bbref"], 'bis_runs_catcher_sb']), - season_pct=season_pct - ) - no_data = False - position_payload.append({ - "player_id": int(df_data['player_id']), - "position": 'C', - "innings": float(df_c.at[df_data["key_bbref"], 'Inn_def']), - "range": cde.range_catcher( - rs_value=int(df_c.at[df_data["key_bbref"], 'tz_runs_catcher']), - season_pct=season_pct - ), - "error": cde.get_any_error( - pos_code='c', - errors=int(df_c.at[df_data["key_bbref"], 'E_def']), - chances=int(df_c.at[df_data["key_bbref"], 'chances']), - season_pct=season_pct - ), - "arm": arm_rating, - "pb": cde.pb_catcher( - pb=int(df_c.at[df_data["key_bbref"], 'PB']), - innings=int(float(df_c.at[df_data["key_bbref"], 'Inn_def'])), - season_pct=season_pct - ), - "overthrow": cde.ot_catcher( - errors=int(df_c.at[df_data["key_bbref"], 'E_def']), - chances=int(df_c.at[df_data["key_bbref"], 'chances']), - season_pct=season_pct - ) - }) - - if no_data: - position_payload.append({ - "player_id": int(df_data['player_id']), - "position": 'DH', - "innings": df_data['PA_vL'] + df_data['PA_vR'] - }) - - if 'pull_fielding' in arg_data and arg_data['pull_fielding'].lower() == 'true': - print(f'Calculating fielding lines now...') - offense_stats.apply(create_positions, axis=1) - print(f'Fielding is complete.\n\nPosting positions now...') - if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true': - resp = await db_put('cardpositions', payload={'positions': position_payload}, timeout=30) - print(f'Response: {resp}\n') - - batting_ratings = [] - - def create_batting_card_ratings(df_data): - logging.debug(f'Calculating card ratings for {df_data.name}') - batting_ratings.extend(cba.get_batter_ratings(df_data)) - - print(f'Calculating card ratings...') - offense_stats.apply(create_batting_card_ratings, axis=1) - print(f'Ratings are complete\n\nPosting ratings now...') - if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true': - resp = await db_put('battingcardratings', payload={'ratings': batting_ratings}, timeout=30) - print(f'Response: {resp}\n\nPulling fresh PD player data...') - - """ - Pull fresh pd_players and set_index to player_id - Pull fresh battingcards and set_index to player - Pull fresh battingcardratings one hand at a time and join on battingcard (suffixes _vl and vR) - - Join battingcards (left) with battingcardratings (right) as total_ratings on id (left) and battingcard (right) - Join pd_players (left) with total_ratings (right) on indeces - Output: PD player list with batting card, ratings vL, and ratings vR - - Calculate Total OPS as OPSvL + OPSvR + min(OPSvL, OPSvR) / 3 and assign rarity_id - For players with cost of 99999, set cost to * Total OPS / - """ - p_data = await pd_players_df(cardset['id']) - p_data.set_index('player_id', drop=False) - total_ratings = pd.merge( - await pd_battingcards_df(cardset['id']), - await pd_battingcardratings_df(cardset['id']), - on='battingcard_id' + is_liveseries = True if 'is_liveseries' not in arg_data or arg_data['is_liveseries'].lower() == 'true' else False + batter_updates = await batters.creation.post_player_updates( + cardset, CARD_BASE_URL, release_directory, player_description, is_liveseries, post_batters ) - player_data = pd.merge( - p_data, - total_ratings, - on='player_id' - ).set_index('player_id', drop=False) - del total_ratings, offense_stats - def get_pids(df_data): - return get_all_pybaseball_ids([df_data["bbref_id"]], 'bbref') - - ids_and_names = player_data.apply(get_pids, axis=1) - player_data = (ids_and_names - .merge(player_data, how='left', left_on='key_bbref', right_on='bbref_id') - .query('key_mlbam == key_mlbam') - .set_index('key_bbref', drop=False)) - - player_updates = {} # { : [ (param pairs) ] } - rarity_group = player_data.query('rarity == new_rarity_id').groupby('rarity') - average_ops = rarity_group['total_OPS'].mean().to_dict() - if 1 not in average_ops: - average_ops[1] = 1.066 - if 2 not in average_ops: - average_ops[2] = 0.938 - if 3 not in average_ops: - average_ops[3] = 0.844 - if 4 not in average_ops: - average_ops[4] = 0.752 - if 5 not in average_ops: - average_ops[5] = 0.612 - # cost_groups = rarity_group['cost'].mean() - - def get_player_updates(df_data): - base_costs = { - 1: 810, - 2: 270, - 3: 90, - 4: 30, - 5: 10, - 99: 2400 - } - params = [] - - if df_data['description'] != player_description: - params = [('description', f'{player_description}')] - - if 'is_liveseries' in arg_data and arg_data['is_liveseries'].lower() == 'true': - team_data = mlbteam_and_franchise(int(float(df_data['key_mlbam']))) - - if df_data['mlbclub'] != team_data['mlbclub'] and team_data['mlbclub'] is not None: - params.extend([('mlbclub', team_data['mlbclub'])]) - if df_data['franchise'] != team_data['franchise'] and team_data['franchise'] is not None: - params.extend([('franchise', team_data['franchise'])]) - - # if release_directory not in df_data['image']: - params.extend([('image', f'{CARD_BASE_URL}/{df_data["player_id"]}/battingcard' - f'{urllib.parse.quote("?d=")}{release_directory}')]) - - if df_data['cost'] == 99999: - params.extend([ - ('cost', - round(base_costs[df_data['new_rarity_id']] * df_data['total_OPS'] / - average_ops[df_data['new_rarity_id']])), - ('rarity_id', df_data['new_rarity_id']) - ]) - - elif df_data['rarity'] != df_data['new_rarity_id']: - old_rarity = df_data['rarity'] - new_rarity = df_data['new_rarity_id'] - old_cost = df_data['cost'] - new_cost = 0 - - if old_rarity == 1: - if new_rarity == 2: - new_cost = max(old_cost - 540, 100) - elif new_rarity == 3: - new_cost = max(old_cost - 720, 50) - elif new_rarity == 4: - new_cost = max(old_cost - 780, 15) - elif new_rarity == 5: - new_cost = max(old_cost - 800, 5) - elif new_rarity == 99: - new_cost = old_cost + 1600 - elif old_rarity == 2: - if new_rarity == 1: - new_cost = old_cost + 540 - elif new_rarity == 3: - new_cost = max(old_cost - 180, 50) - elif new_rarity == 4: - new_cost = max(old_cost - 240, 15) - elif new_rarity == 5: - new_cost = max(old_cost - 260, 5) - elif new_rarity == 99: - new_cost = old_cost + 2140 - elif old_rarity == 3: - if new_rarity == 1: - new_cost = old_cost + 720 - elif new_rarity == 2: - new_cost = old_cost + 180 - elif new_rarity == 4: - new_cost = max(old_cost - 60, 15) - elif new_rarity == 5: - new_cost = max(old_cost - 80, 5) - elif new_rarity == 99: - new_cost = old_cost + 2320 - elif old_rarity == 4: - if new_rarity == 1: - new_cost = old_cost + 780 - elif new_rarity == 2: - new_cost = old_cost + 240 - elif new_rarity == 3: - new_cost = old_cost + 60 - elif new_rarity == 5: - new_cost = max(old_cost - 20, 5) - elif new_rarity == 99: - new_cost = old_cost + 2380 - elif old_rarity == 5: - if new_rarity == 1: - new_cost = old_cost + 800 - elif new_rarity == 2: - new_cost = old_cost + 260 - elif new_rarity == 3: - new_cost = old_cost + 80 - elif new_rarity == 4: - new_cost = old_cost + 20 - elif new_rarity == 99: - new_cost = old_cost + 2400 - elif old_rarity == 99: - if new_rarity == 1: - new_cost = max(old_cost - 1600, 800) - elif new_rarity == 2: - new_cost = max(old_cost - 2140, 100) - elif new_rarity == 3: - new_cost = max(old_cost - 2320, 50) - elif new_rarity == 4: - new_cost = max(old_cost - 2380, 15) - elif new_rarity == 5: - new_cost = max(old_cost - 2400, 5) - - if new_cost != 0: - params.extend([('cost', new_cost), ('rarity_id', new_rarity)]) - - if len(params) > 0: - if df_data.player_id not in player_updates.keys(): - player_updates[df_data.player_id] = params - else: - player_updates[df_data.player_id].extend(params) - - player_data.apply(get_player_updates, axis=1) - - print(f'Sending {len(player_updates)} player updates to PD database...') - if 'post_batters' not in arg_data or arg_data['post_batters'].lower() == 'true': - for x in player_updates: - await db_patch('players', object_id=x, params=player_updates[x]) - - del player_updates print(f'Batter updates are complete') start_time_two = datetime.datetime.now() run_time = start_time_two - start_time - print(f'Total batting cards: {len(batting_cards)}\nNew cardset batters: {len(new_players)}\n' + print(f'Total batting cards: {len(offense_stats.index)}\nNew cardset batters: {new_batters}\n' f'Batter runtime: {round(run_time.total_seconds())} seconds\n') print('Reading pitching stats...') diff --git a/pitchers/__init__.py b/pitchers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/calcs_pitcher.py b/pitchers/calcs_pitcher.py similarity index 100% rename from calcs_pitcher.py rename to pitchers/calcs_pitcher.py index d9d6711..aaf41ce 100644 --- a/calcs_pitcher.py +++ b/pitchers/calcs_pitcher.py @@ -8,82 +8,6 @@ from decimal import Decimal from typing import List, Literal -def get_pitcher_ratings(df_data) -> List[dict]: - vl = PitchingCardRatingsModel( - pitchingcard_id=df_data.pitchingcard_id, - pit_hand=df_data.pitch_hand, - vs_hand='L', - all_hits=sanitize_chance_output((df_data['AVG_vL'] - 0.05) * 108), # Subtracting chances from BP results - all_other_ob=sanitize_chance_output(108 * (df_data['BB_vL'] + df_data['HBP_vL']) / df_data['TBF_vL']), - hard_rate=df_data['Hard%_vL'], - med_rate=df_data['Med%_vL'], - soft_rate=df_data['Soft%_vL'] - ) - vr = PitchingCardRatingsModel( - pitchingcard_id=df_data.pitchingcard_id, - pit_hand=df_data.hand, - vs_hand='R', - all_hits=sanitize_chance_output((df_data['AVG_vR'] - 0.05) * 108), # Subtracting chances from BP results - all_other_ob=sanitize_chance_output(108 * (df_data['BB_vR'] + df_data['HBP_vR']) / df_data['TBF_vR']), - hard_rate=df_data['Hard%_vR'], - med_rate=df_data['Med%_vR'], - soft_rate=df_data['Soft%_vR'] - ) - vl.all_outs = Decimal(108 - vl.all_hits - vl.all_other_ob).quantize(Decimal("0.05")) - vr.all_outs = Decimal(108 - vr.all_hits - vr.all_other_ob).quantize(Decimal("0.05")) - - logging.info( - f'vL - All Hits: {vl.all_hits} / Other OB: {vl.all_other_ob} / All Outs: {vl.all_outs} ' - f'/ Total: {vl.total_chances()}' - ) - logging.info( - f'vR - All Hits: {vr.all_hits} / Other OB: {vr.all_other_ob} / All Outs: {vr.all_outs} ' - f'/ Total: {vr.total_chances()}' - ) - - vl.calculate_singles(df_data['H_vL'], df_data['H_vL'] - df_data['2B_vL'] - df_data['3B_vL'] - df_data['HR_vL']) - vr.calculate_singles(df_data['H_vR'], df_data['H_vR'] - df_data['2B_vR'] - df_data['3B_vR'] - df_data['HR_vR']) - - logging.info(f'vL: All Hits: {vl.all_hits} / BP Singles: {vl.bp_single} / Single 2: {vl.single_two} / ' - f'Single 1: {vl.single_one} / Single CF: {vl.single_center}') - logging.info(f'vR: All Hits: {vr.all_hits} / BP Singles: {vr.bp_single} / Single 2: {vr.single_two} / ' - f'Single 1: {vr.single_one} / Single CF: {vr.single_center}') - - vl.calculate_xbh(df_data['2B_vL'], df_data['3B_vL'], df_data['HR_vL'], df_data['HR/FB_vL']) - vr.calculate_xbh(df_data['2B_vR'], df_data['3B_vR'], df_data['HR_vR'], df_data['HR/FB_vR']) - - logging.debug(f'vL: All XBH: {vl.all_hits - vl.single_one - vl.single_two - vl.single_center - vl.bp_single} / ' - f'Double**: {vl.double_two} / Double(cf): {vl.double_cf} / Triple: {vl.triple} / ' - f'BP HR: {vl.bp_homerun} / ND HR: {vl.homerun}') - logging.debug(f'vR: All XBH: {vr.all_hits - vr.single_one - vr.single_two - vr.single_center - vr.bp_single} / ' - f'Double**: {vr.double_two} / Double(cf): {vr.double_cf} / Triple: {vr.triple} / ' - f'BP HR: {vr.bp_homerun} / ND HR: {vr.homerun}') - - vl.calculate_other_ob(df_data['BB_vL'], df_data['HBP_vL']) - vr.calculate_other_ob(df_data['BB_vR'], df_data['HBP_vR']) - - logging.info(f'vL: All other OB: {vl.all_other_ob} / HBP: {vl.hbp} / BB: {vl.walk} / ' - f'Total Chances: {vl.total_chances()}') - logging.info(f'vR: All other OB: {vr.all_other_ob} / HBP: {vr.hbp} / BB: {vr.walk} / ' - f'Total Chances: {vr.total_chances()}') - - vl.calculate_strikouts( - df_data['SO_vL'], df_data['TBF_vL'] - df_data['BB_vL'] - df_data['IBB_vL'] - df_data['HBP_vL'], df_data['H_vL']) - vr.calculate_strikouts( - df_data['SO_vR'], df_data['TBF_vR'] - df_data['BB_vR'] - df_data['IBB_vR'] - df_data['HBP_vR'], df_data['H_vR']) - - logging.info(f'vL: All Outs: {vl.all_outs} / Ks: {vl.strikeout} / Current Outs: {vl.total_outs()}') - logging.info(f'vR: All Outs: {vr.all_outs} / Ks: {vr.strikeout} / Current Outs: {vr.total_outs()}') - - vl.calculate_other_outs(df_data['FB%_vL'], df_data['GB%_vL'], df_data['Oppo%_vL']) - vr.calculate_other_outs(df_data['FB%_vR'], df_data['GB%_vR'], df_data['Oppo%_vR']) - - logging.info(f'vL: Total chances: {vl.total_chances()}') - logging.info(f'vR: Total chances: {vr.total_chances()}') - - return [vl.custom_to_dict(), vr.custom_to_dict()] - - class PitchingCardRatingsModel(pydantic.BaseModel): pitchingcard_id: int pit_hand: Literal['R', 'L'] @@ -357,6 +281,82 @@ class PitchingCardRatingsModel(pydantic.BaseModel): raise ValueError(f'Could not complete card') +def get_pitcher_ratings(df_data) -> List[dict]: + vl = PitchingCardRatingsModel( + pitchingcard_id=df_data.pitchingcard_id, + pit_hand=df_data.pitch_hand, + vs_hand='L', + all_hits=sanitize_chance_output((df_data['AVG_vL'] - 0.05) * 108), # Subtracting chances from BP results + all_other_ob=sanitize_chance_output(108 * (df_data['BB_vL'] + df_data['HBP_vL']) / df_data['TBF_vL']), + hard_rate=df_data['Hard%_vL'], + med_rate=df_data['Med%_vL'], + soft_rate=df_data['Soft%_vL'] + ) + vr = PitchingCardRatingsModel( + pitchingcard_id=df_data.pitchingcard_id, + pit_hand=df_data.hand, + vs_hand='R', + all_hits=sanitize_chance_output((df_data['AVG_vR'] - 0.05) * 108), # Subtracting chances from BP results + all_other_ob=sanitize_chance_output(108 * (df_data['BB_vR'] + df_data['HBP_vR']) / df_data['TBF_vR']), + hard_rate=df_data['Hard%_vR'], + med_rate=df_data['Med%_vR'], + soft_rate=df_data['Soft%_vR'] + ) + vl.all_outs = Decimal(108 - vl.all_hits - vl.all_other_ob).quantize(Decimal("0.05")) + vr.all_outs = Decimal(108 - vr.all_hits - vr.all_other_ob).quantize(Decimal("0.05")) + + logging.info( + f'vL - All Hits: {vl.all_hits} / Other OB: {vl.all_other_ob} / All Outs: {vl.all_outs} ' + f'/ Total: {vl.total_chances()}' + ) + logging.info( + f'vR - All Hits: {vr.all_hits} / Other OB: {vr.all_other_ob} / All Outs: {vr.all_outs} ' + f'/ Total: {vr.total_chances()}' + ) + + vl.calculate_singles(df_data['H_vL'], df_data['H_vL'] - df_data['2B_vL'] - df_data['3B_vL'] - df_data['HR_vL']) + vr.calculate_singles(df_data['H_vR'], df_data['H_vR'] - df_data['2B_vR'] - df_data['3B_vR'] - df_data['HR_vR']) + + logging.info(f'vL: All Hits: {vl.all_hits} / BP Singles: {vl.bp_single} / Single 2: {vl.single_two} / ' + f'Single 1: {vl.single_one} / Single CF: {vl.single_center}') + logging.info(f'vR: All Hits: {vr.all_hits} / BP Singles: {vr.bp_single} / Single 2: {vr.single_two} / ' + f'Single 1: {vr.single_one} / Single CF: {vr.single_center}') + + vl.calculate_xbh(df_data['2B_vL'], df_data['3B_vL'], df_data['HR_vL'], df_data['HR/FB_vL']) + vr.calculate_xbh(df_data['2B_vR'], df_data['3B_vR'], df_data['HR_vR'], df_data['HR/FB_vR']) + + logging.debug(f'vL: All XBH: {vl.all_hits - vl.single_one - vl.single_two - vl.single_center - vl.bp_single} / ' + f'Double**: {vl.double_two} / Double(cf): {vl.double_cf} / Triple: {vl.triple} / ' + f'BP HR: {vl.bp_homerun} / ND HR: {vl.homerun}') + logging.debug(f'vR: All XBH: {vr.all_hits - vr.single_one - vr.single_two - vr.single_center - vr.bp_single} / ' + f'Double**: {vr.double_two} / Double(cf): {vr.double_cf} / Triple: {vr.triple} / ' + f'BP HR: {vr.bp_homerun} / ND HR: {vr.homerun}') + + vl.calculate_other_ob(df_data['BB_vL'], df_data['HBP_vL']) + vr.calculate_other_ob(df_data['BB_vR'], df_data['HBP_vR']) + + logging.info(f'vL: All other OB: {vl.all_other_ob} / HBP: {vl.hbp} / BB: {vl.walk} / ' + f'Total Chances: {vl.total_chances()}') + logging.info(f'vR: All other OB: {vr.all_other_ob} / HBP: {vr.hbp} / BB: {vr.walk} / ' + f'Total Chances: {vr.total_chances()}') + + vl.calculate_strikouts( + df_data['SO_vL'], df_data['TBF_vL'] - df_data['BB_vL'] - df_data['IBB_vL'] - df_data['HBP_vL'], df_data['H_vL']) + vr.calculate_strikouts( + df_data['SO_vR'], df_data['TBF_vR'] - df_data['BB_vR'] - df_data['IBB_vR'] - df_data['HBP_vR'], df_data['H_vR']) + + logging.info(f'vL: All Outs: {vl.all_outs} / Ks: {vl.strikeout} / Current Outs: {vl.total_outs()}') + logging.info(f'vR: All Outs: {vr.all_outs} / Ks: {vr.strikeout} / Current Outs: {vr.total_outs()}') + + vl.calculate_other_outs(df_data['FB%_vL'], df_data['GB%_vL'], df_data['Oppo%_vL']) + vr.calculate_other_outs(df_data['FB%_vR'], df_data['GB%_vR'], df_data['Oppo%_vR']) + + logging.info(f'vL: Total chances: {vl.total_chances()}') + logging.info(f'vR: Total chances: {vr.total_chances()}') + + return [vl.custom_to_dict(), vr.custom_to_dict()] + + def total_chances(chance_data): sum_chances = 0 for key in chance_data: diff --git a/pitchers/creation.py b/pitchers/creation.py new file mode 100644 index 0000000..e69de29