From 443eaa3a41c1e7785158d8dd50e2315274a7b7e1 Mon Sep 17 00:00:00 2001 From: Cal Corum Date: Fri, 22 Sep 2023 01:29:51 -0500 Subject: [PATCH] Phase 1 of creation complete --- calcs_batter.py | 227 +++++++++++++++++------ calcs_defense.py | 145 +++++++++++---- db_calls.py | 31 ++++ live_series_update.py | 412 +++++++++++++++++++++++++----------------- 4 files changed, 558 insertions(+), 257 deletions(-) diff --git a/calcs_batter.py b/calcs_batter.py index f06a11a..2533c47 100644 --- a/calcs_batter.py +++ b/calcs_batter.py @@ -1,43 +1,53 @@ +import decimal +import logging +import math import pydantic from creation_helpers import mround -from typing import Literal +from typing import List, Literal +from decimal import Decimal class BattingCardRatingsModel(pydantic.BaseModel): - vs_hand: Literal['R', 'L', 'vR', 'vL'] - all_hits: float = 0.0 - other_ob: float = 0.0 - all_outs: float = 0.0 - all_singles: float = 0.0 - all_xbh: float = 0.0 - all_hr: float = 0.0 - all_doubles: float = 0.0 - homerun: float = 0.0 - bp_homerun: float = 0.0 - triple: float = 0.0 - double_three: float = 0.0 - double_two: float = 0.0 - double_pull: float = 0.0 - single_two: float = 0.0 - single_one: float = 0.0 - single_center: float = 0.0 - bp_single: float = 0.0 - hbp: float = 0.0 - walk: float = 0.0 - strikeout: float = 0.0 - lineout: float = 0.0 - popout: float = 0.0 - flyout_a: float = 0.0 - flyout_bq: float = 0.0 - flyout_lf_b: float = 0.0 - flyout_rf_b: float = 0.0 - groundout_a: float = 0.0 - groundout_b: float = 0.0 - groundout_c: float = 0.0 - avg: float = 0.0 - obp: float = 0.0 - slg: float = 0.0 + battingcard_id: int + vs_hand: Literal['R', 'L'] + all_hits: Decimal = Decimal(0.0) + other_ob: Decimal = Decimal(0.0) + all_outs: Decimal = Decimal(0.0) + rem_singles: Decimal = Decimal(0.0) + rem_xbh: Decimal = Decimal(0.0) + rem_hr: Decimal = Decimal(0.0) + rem_doubles: Decimal = Decimal(0.0) + hard_rate: Decimal + med_rate: Decimal + soft_rate: Decimal + homerun: Decimal = Decimal(0.0) + bp_homerun: Decimal = Decimal(0.0) + triple: Decimal = Decimal(0.0) + double_three: Decimal = Decimal(0.0) + double_two: Decimal = Decimal(0.0) + double_pull: Decimal = Decimal(0.0) + single_two: Decimal = Decimal(0.0) + single_one: Decimal = Decimal(0.0) + single_center: Decimal = Decimal(0.0) + bp_single: Decimal = Decimal(0.0) + hbp: Decimal = Decimal(0.0) + walk: Decimal = Decimal(0.0) + strikeout: Decimal = Decimal(0.0) + lineout: Decimal = Decimal(0.0) + popout: Decimal = Decimal(0.0) + rem_flyballs: Decimal = Decimal(0.0) + flyout_a: Decimal = Decimal(0.0) + flyout_bq: Decimal = Decimal(0.0) + flyout_lf_b: Decimal = Decimal(0.0) + flyout_rf_b: Decimal = Decimal(0.0) + rem_groundballs: Decimal = Decimal(0.0) + groundout_a: Decimal = Decimal(0.0) + groundout_b: Decimal = Decimal(0.0) + groundout_c: Decimal = Decimal(0.0) + avg: Decimal = 0.0 + obp: Decimal = 0.0 + slg: Decimal = 0.0 def total_chances(self): return sum([ @@ -64,39 +74,91 @@ class BattingCardRatingsModel(pydantic.BaseModel): def rem_other_ob(self): return self.other_ob - self.hbp - self.walk + def calculate_singles(self, szn_singles, szn_hits, ifh_rate: Decimal): + tot = sanitize_chance_output(self.all_hits * Decimal((szn_singles * .8) / szn_hits)) + logging.debug(f'tot: {tot}') + self.rem_singles = tot -def total_chances(chance_data): - sum_chances = 0 - for key in chance_data: - if key not in ['id', 'player_id', 'cardset_id', 'vs_hand', 'is_prep']: - sum_chances += chance_data[key] + self.bp_single = bp_singles(self.rem_singles) + self.rem_singles -= self.bp_single - return mround(sum_chances) + self.single_two = wh_singles(self.rem_singles, self.hard_rate) + self.rem_singles -= self.single_two + + self.single_one = one_singles(self.rem_singles, ifh_rate) + self.rem_singles -= self.single_one + + self.single_center = sanitize_chance_output(self.rem_singles) + self.rem_singles -= self.single_center + + self.rem_xbh = self.all_hits - self.bp_single - self.single_two - self.single_one - self.single_center + + def calculate_xbh(self, szn_triples, szn_doubles, szn_hr, hr_per_fb: Decimal): + self.triple = triples(self.rem_xbh, szn_triples, szn_doubles + szn_hr) + self.rem_xbh -= self.triple + + tot_doubles = sanitize_chance_output(self.rem_xbh * Decimal(szn_doubles / max(szn_hr + szn_doubles, 1))) + self.double_two = two_doubles(tot_doubles, self.soft_rate) + self.double_pull = sanitize_chance_output(tot_doubles - self.double_two) + self.rem_xbh -= Decimal(self.double_two + self.double_pull) + + if self.rem_xbh > Decimal(0): + self.bp_homerun = bp_homeruns(self.rem_xbh, hr_per_fb) + self.homerun = sanitize_chance_output(self.rem_xbh - self.bp_homerun, min_chances=0.5) + self.rem_xbh -= Decimal(self.bp_homerun + self.homerun) + + if szn_triples > 0 and self.rem_xbh > 0: + self.triple = sanitize_chance_output(self.rem_xbh, min_chances=0.5) + if self.rem_xbh > 0: + logging.error(f'Adding {self.rem_xbh} results to all outs') + print(self) + self.all_outs += self.rem_xbh + + +# def total_chances(chance_data): +# sum_chances = 0 +# for key in chance_data: +# if key not in ['id', 'player_id', 'cardset_id', 'vs_hand', 'is_prep']: +# sum_chances += chance_data[key] +# +# return mround(sum_chances) + + +def sanitize_chance_output(total_chances, min_chances=1.0, rounding=0.05): + # r_val = mround(total_chances) if total_chances >= min_chances else 0 + r_val = Decimal(total_chances) if total_chances >= min_chances else Decimal(0) + logging.debug(f'r_val: {r_val}') + return Decimal(float(round(total_chances / Decimal(rounding)) * Decimal(rounding))).quantize(Decimal("0.05")) + # return r_val.quantize(Decimal(rounding)) + + +def total_singles(all_hits, szn_singles, szn_hits): + return sanitize_chance_output(all_hits * ((szn_singles * .8) / szn_hits)) def bp_singles(all_singles): if all_singles < 6: - return 0 + return Decimal(0) else: - return 5 + return Decimal(5) def wh_singles(rem_singles, hard_rate): if rem_singles == 0 or hard_rate < .2: return 0 elif hard_rate > .4: - return mround(rem_singles * .666) + return sanitize_chance_output(rem_singles * Decimal(.666), min_chances=2) else: - return mround(rem_singles * .333) + return sanitize_chance_output(rem_singles * Decimal(.333), min_chances=2) -def one_singles(rem_singles, ifh_rate, force_rem): +def one_singles(rem_singles, ifh_rate, force_rem=False): if force_rem: return mround(rem_singles) elif rem_singles == 0 or ifh_rate < .05: - return 0 + return Decimal(0) else: - return mround(rem_singles * ifh_rate * 3) + return sanitize_chance_output(rem_singles * ifh_rate * Decimal(3), min_chances=2) def all_homeruns(rem_hits, all_hits, hrs, hits, singles): @@ -108,27 +170,36 @@ def all_homeruns(rem_hits, all_hits, hrs, hits, singles): def nd_homeruns(all_hr, hr_rate): if all_hr == 0 or hr_rate == 0: - return 0 + return Decimal(0) elif hr_rate > .2: - return mround(all_hr * .6) + return sanitize_chance_output(all_hr * .6) else: - return mround(all_hr * .25) + return sanitize_chance_output(all_hr * .25) + + +def bp_homeruns(all_hr, hr_rate): + if all_hr == 0 or hr_rate == 0: + return Decimal(0) + elif hr_rate > .2: + return sanitize_chance_output(all_hr * Decimal(.4), rounding=1.0) + else: + return sanitize_chance_output(all_hr * Decimal(.75), rounding=1.0) def triples(all_xbh, tr_count, do_count): - if all_xbh == 0 or tr_count == 0: - return 0 + if all_xbh == Decimal(0) or tr_count == Decimal(0): + return Decimal(0) else: - return mround(all_xbh * (tr_count / (tr_count + do_count))) + return sanitize_chance_output(all_xbh * Decimal(tr_count / (tr_count + do_count)), min_chances=1) def two_doubles(all_doubles, soft_rate): if all_doubles == 0 or soft_rate == 0: - return 0 + return Decimal(0) elif soft_rate > .2: - return mround(all_doubles / 2) + return sanitize_chance_output(all_doubles / 2) else: - return mround(all_doubles / 4) + return sanitize_chance_output(all_doubles / 4) def hit_by_pitch(other_ob, hbps, walks): @@ -336,3 +407,47 @@ def hit_and_run(ab_vl: int, ab_vr: int, hits_vl: int, hits_vr: int, hr_vl: int, return 'C' else: return 'D' + + +def get_batter_ratings(df_data) -> List[BattingCardRatingsModel]: + offense_mod = 1.2 + vl = BattingCardRatingsModel( + battingcard_id=df_data.key_fangraphs, + vs_hand='L', + all_hits=mround(108 * offense_mod * df_data['AVG_vL']), + other_ob=mround(108 * offense_mod * ((df_data['BB_vL'] + df_data['HBP_vL']) / df_data['PA_vL'])), + hard_rate=df_data['Hard%_vL'], + med_rate=df_data['Med%_vL'], + soft_rate=df_data['Soft%_vL'] + ) + vr = BattingCardRatingsModel( + battingcard_id=df_data.key_fangraphs, + vs_hand='R', + all_hits=mround(108 * offense_mod * df_data['AVG_vR']), + other_ob=mround(108 * offense_mod * ((df_data['BB_vR'] + df_data['HBP_vR']) / df_data['PA_vR'])), + hard_rate=df_data['Hard%_vR'], + med_rate=df_data['Med%_vR'], + soft_rate=df_data['Soft%_vR'] + ) + vl.all_outs = Decimal(108 - vl.all_hits - vl.other_ob).quantize(Decimal("0.05")) + vr.all_outs = Decimal(108 - vr.all_hits - vr.other_ob).quantize(Decimal("0.05")) + + vl.calculate_singles(df_data['1B_vL'], df_data['H_vL'], Decimal(df_data['IFH%_vL'])) + vr.calculate_singles(df_data['1B_vR'], df_data['H_vR'], Decimal(df_data['IFH%_vR'])) + + logging.debug( + f'vL - All Hits: {vl.all_hits} / Other OB: {vl.other_ob} / All Outs: {vl.all_outs} ' + f'/ Total: {vl.all_hits + vl.other_ob + vl.all_outs}' + ) + logging.debug( + f'vR - All Hits: {vr.all_hits} / Other OB: {vr.other_ob} / All Outs: {vr.all_outs} ' + f'/ Total: {vr.all_hits + vr.other_ob + vr.all_outs}' + ) + + vl.calculate_xbh(df_data['3B_vL'], df_data['2B_vL'], df_data['HR_vL'], df_data['HR/FB_vL']) + vr.calculate_xbh(df_data['3B_vR'], df_data['2B_vR'], df_data['HR_vR'], df_data['HR/FB_vR']) + + logging.info(f'all_hits: {vl.all_hits} / sum of hits: {Decimal(vl.bp_single + vl.single_one + vl.single_two + vl.single_center + vl.double_two + vl.double_pull + vl.double_three + vl.triple + vl.homerun + vl.bp_homerun)}') + logging.info(f'all_hits: {vr.all_hits} / sum of hits: {Decimal(vr.bp_single + vr.single_one + vr.single_two + vr.single_center + vr.double_two + vr.double_pull + vr.double_three + vr.triple + vr.homerun + vr.bp_homerun)}') + + return [vl, vr] diff --git a/calcs_defense.py b/calcs_defense.py index b3f17c7..aa9cf45 100644 --- a/calcs_defense.py +++ b/calcs_defense.py @@ -1,4 +1,9 @@ -import math +import logging + +import pandas as pd +import requests +from bs4 import BeautifulSoup +from typing import Literal def range_pitcher(rs_value: int, season_pct: float): @@ -27,62 +32,76 @@ def range_catcher(rs_value: int, season_pct: float): return 5 -def range_first_base(drs: int, r_dp: int, season_pct: float): - if (drs + r_dp) >= (4 * season_pct): +def range_first_base(tz_runs: int, r_dp: int, season_pct: float): + if (tz_runs + r_dp) >= max(6 * season_pct, 2): return 1 - elif (drs + r_dp) >= (1 * season_pct): + elif (tz_runs + r_dp) >= max(2 * season_pct, 1): return 2 - elif (drs + r_dp) >= (-1 * season_pct): + elif (tz_runs + r_dp) >= min(-1 * season_pct, -1): return 3 - elif (drs + r_dp) >= (-3 * season_pct): + elif (tz_runs + r_dp) >= min(-3 * season_pct, -3): return 4 else: return 5 -def range_second_base(drs: int, r_dp: int, season_pct: float): - if (drs + r_dp) >= (8 * season_pct): +def range_second_base(tz_runs: int, r_dp: int, season_pct: float): + if (tz_runs + r_dp) >= max(6 * season_pct, 2): return 1 - elif (drs + r_dp) >= (2 * season_pct): + elif (tz_runs + r_dp) >= max(2 * season_pct, 1): return 2 - elif (drs + r_dp) >= (0 * season_pct): + elif (tz_runs + r_dp) >= min(-1 * season_pct, -1): return 3 - elif (drs + r_dp) >= (-3 * season_pct): + elif (tz_runs + r_dp) >= min(-3 * season_pct, -3): return 4 else: return 5 -def range_third_base(drs: int, r_dp: int, season_pct: float): - if (drs + r_dp) >= (5 * season_pct): +def range_third_base(tz_runs: int, r_dp: int, season_pct: float): + if (tz_runs + r_dp) >= max(6 * season_pct, 2): return 1 - elif (drs + r_dp) >= (2 * season_pct): + elif (tz_runs + r_dp) >= max(2 * season_pct, 1): return 2 - elif (drs + r_dp) >= (0 * season_pct): + elif (tz_runs + r_dp) >= min(-1 * season_pct, -1): return 3 - elif (drs + r_dp) >= (-3 * season_pct): + elif (tz_runs + r_dp) >= min(-3 * season_pct, -3): return 4 else: return 5 -def range_shortstop(drs: int, r_dp: int, season_pct: float): - if (drs + r_dp) >= (9 * season_pct): +def range_shortstop(tz_runs: int, r_dp: int, season_pct: float): + if (tz_runs + r_dp) >= max(8 * season_pct, 2): return 1 - elif (drs + r_dp) >= (2 * season_pct): + elif (tz_runs + r_dp) >= max(2 * season_pct, 1): return 2 - elif (drs + r_dp) >= (0 * season_pct): + elif (tz_runs + r_dp) >= min(-1 * season_pct, -1): return 3 - elif (drs + r_dp) >= (-3 * season_pct): + elif (tz_runs + r_dp) >= min(-3 * season_pct, -3): return 4 else: return 5 +def get_if_range(pos_code: str, tz_runs: int, r_dp: int, season_pct: float): + logging.debug(f'pos: {pos_code} / tz_runs: {tz_runs} ({type(tz_runs)})') + if pos_code == '1b': + return range_first_base(tz_runs, 0, season_pct) + elif pos_code == '2b': + return range_second_base(tz_runs, 0, season_pct) + elif pos_code == '3b': + return range_third_base(tz_runs, 0, season_pct) + elif pos_code == 'ss': + return range_shortstop(tz_runs, 0, season_pct) + else: + raise ValueError(f'get_if_range - pos_code must be one of 1b, 2b, 3b, ss / {pos_code} not valid') + + def range_center_field(drs: int, season_pct: float): if drs >= 9 * season_pct: return 1 - elif drs >= 2 * season_pct: + elif drs >= 3 * season_pct: return 2 elif drs >= -1 * season_pct: return 3 @@ -100,6 +119,16 @@ def range_right_field(drs: int, season_pct: float): return range_center_field(drs, season_pct) +def get_of_range(pos_code: str, tz_runs: int, season_pct: float): + logging.info(f'pos: {pos_code} / tz_runs: {tz_runs}') + if pos_code == 'lf': + return range_left_field(tz_runs, season_pct) + elif pos_code == 'cf': + return range_center_field(tz_runs, season_pct) + else: + return range_right_field(tz_runs, season_pct) + + def valid_error_ratings(err_num: int, position: str) -> int: if position.lower() == 'p': valid_err = [ @@ -175,24 +204,40 @@ def error_outfield(errors: int, chances: int, season_pct: float): return valid_error_ratings(int(raw_error(errors, chances, season_pct, 250)), 'of') +def get_any_error(pos_code: str, errors: int, chances: int, season_pct: float): + if pos_code == 'p': + return error_pitcher(errors, chances, season_pct) + elif pos_code == 'c': + return error_catcher(errors, chances, season_pct) + elif pos_code == '1b': + return error_first_base(errors, chances, season_pct) + elif pos_code == '2b': + return error_second_base(errors, chances, season_pct) + elif pos_code == '3b': + return error_third_base(errors, chances, season_pct) + elif pos_code == 'ss': + return error_shortstop(errors, chances, season_pct) + elif pos_code in ['lf', 'cf', 'rf', 'of']: + return error_outfield(errors, chances, season_pct) + + def arm_outfield(all_arms: list): if not all_arms: - return '+5' + return 5 if max(all_arms) > 8: - return '-6' + return -6 elif max(all_arms) > 4: - return '-5' + return -5 elif max(all_arms) < -4: - return '+5' + return +5 else: - final_arm = max(all_arms) * -1 - return f'{"+" if final_arm >= 0 else ""}{final_arm}' + return max(all_arms) * -1 -def arm_catcher(cs_pct: str, raa: int, season_pct: float) -> str: +def arm_catcher(cs_pct: str, raa: int, season_pct: float) -> int: if cs_pct == '': - return '+3' + return 3 cs_pct = float(cs_pct.strip("%")) / 100 if raa > 5 * season_pct: @@ -229,15 +274,14 @@ def arm_catcher(cs_pct: str, raa: int, season_pct: float) -> str: else: raw_arm = 5 - final_arm = min(max_arm, raw_arm) - return f'{"+" if final_arm >= 0 else ""}{final_arm}' + return int(min(max_arm, raw_arm)) def pb_catcher(pb: int, innings: int, season_pct: float): if pb == 0 or innings == 0: return 0 - return abs(min(pb * 1000 * season_pct / innings, 20)) + return int(abs(min(pb * 1000 * season_pct / innings, 20))) def ot_catcher(errors: int, chances: int, season_pct: float): @@ -245,7 +289,7 @@ def ot_catcher(errors: int, chances: int, season_pct: float): return 0 c_max = 3000 * season_pct - return min(errors * c_max / chances / 3, 20) + return int(min(errors * c_max / chances / 3, 20)) def hold_pitcher(raw_cs: str, picks: int, season_pct: float) -> str: @@ -328,3 +372,36 @@ def innings_float(innings: str) -> float: decimal = "0" return float(int(whole) + int(decimal) * .333) + + +# Get position stats into dataframes +def get_bbref_fielding_df( + position: Literal['p', 'c', '1b', '2b', '3b', 'ss', 'lf', 'cf', 'rf', 'of'], s_num: int): + url = f'https://www.baseball-reference.com/leagues/majors/{s_num}-specialpos_{position}-fielding.shtml' + soup = BeautifulSoup(requests.get(url).text, 'html.parser') + table = soup.find('table', {'id': 'players_players_standard_fielding_fielding'}) + headers = [] + data = [] + indeces = [] + for row in table.find_all('tr'): + row_data = [] + col_names = [] + for cell in row.find_all('td'): + try: + player_id = cell['data-append-csv'] + row_data.append(player_id) + if len(headers) == 0: + col_names.append('key_bbref') + except Exception as e: + pass + row_data.append(cell.text) + if len(headers) == 0: + col_names.append(cell['data-stat']) + if len(row_data) > 0: + data.append(row_data) + indeces.append(row_data[0]) + if len(headers) == 0: + headers.extend(col_names) + pos_frame = pd.DataFrame(data, index=indeces, columns=headers).query('key_bbref == key_bbref') + tmp = pos_frame[~pos_frame['chances'].isin(['0', '1', '2'])] + return tmp.drop_duplicates(subset=['key_bbref'], keep='first') diff --git a/db_calls.py b/db_calls.py index ca64a87..5ae2158 100644 --- a/db_calls.py +++ b/db_calls.py @@ -136,6 +136,37 @@ async def db_post(endpoint: str, api_ver: int = 2, payload: dict = None, timeout raise ValueError(f'DB: {resp.text}') +async def db_put(endpoint: str, api_ver: int = 2, payload: dict = None, timeout: int = 3): + req_url = get_req_url(endpoint, api_ver=api_ver) + log_string = f'post:\n{endpoint} payload: {payload}\ntype: {type(payload)}' + logging.info(log_string) if master_debug else logging.debug(log_string) + + retries = 0 + while True: + try: + resp = requests.put(req_url, json=payload, headers=AUTH_TOKEN, timeout=timeout) + break + except requests.Timeout as e: + logging.error(f'Post Timeout: {req_url} / retries: {retries} / timeout: {timeout}') + if retries > 1: + raise ConnectionError(f'DB: The internet was a bit too slow for me to grab the data I needed. Please ' + f'hang on a few extra seconds and try again.') + timeout += [min(3, timeout), min(5, timeout)][retries] + retries += 1 + + if resp.status_code == 200: + data = resp.json() + log_string = f'{data}' + if master_debug: + logging.info(f'return: {log_string[:1200]}{" [ S N I P P E D ]" if len(log_string) > 1200 else ""}') + else: + logging.debug(f'return: {log_string[:1200]}{" [ S N I P P E D ]" if len(log_string) > 1200 else ""}') + return data + else: + logging.warning(resp.text) + raise ValueError(f'DB: {resp.text}') + + async def db_delete(endpoint: str, object_id: int, api_ver: int = 2, timeout=3): req_url = get_req_url(endpoint, api_ver=api_ver, object_id=object_id) log_string = f'delete:\n{endpoint} {object_id}' diff --git a/live_series_update.py b/live_series_update.py index c001288..d17e84d 100644 --- a/live_series_update.py +++ b/live_series_update.py @@ -1,17 +1,21 @@ import asyncio +import copy import csv import datetime +import html5lib import logging import random import requests -import calcs_batter as cb +import calcs_batter as cba +import calcs_defense as cde +import calcs_pitcher as cpi import pandas as pd import pybaseball as pb import pydantic import sys -from db_calls import db_get +from db_calls import db_get, db_put, db_post from typing import Literal from bs4 import BeautifulSoup @@ -22,6 +26,36 @@ logging.basicConfig( format='%(asctime)s - card-creation - %(levelname)s - %(message)s', level=log_level ) +CARD_BASE_URL = 'https://sombaseball.ddns.net/cards/pd' + + +def sanitize_name(start_name: str) -> str: + return (start_name + .replace("é", "e") + .replace("á", "a") + .replace(".", "") + .replace("Á", "A") + .replace("ñ", "n") + .replace("ó", "o") + .replace("í", "i") + .replace("ú", "u")) + + +def get_args(args): + logging.info(f'Process arguments: {args}') + final_args = {} + for x in args: + if "=" not in x: + raise TypeError(f'Invalid = argument: {x}') + + key, value = x.split("=") + logging.info(f'key: {key} / value: {value}') + + if key in final_args: + raise ValueError(f'Duplicate argument: {key}') + + final_args[key] = value + return final_args # class BattingStat(pydantic.BaseModel): @@ -55,26 +89,34 @@ logging.basicConfig( # oppo_rate: float = None -async def main(argv): - cardset_name = input(f'What is the name of this Cardset? ') +async def main(args): + arg_data = get_args(args) + + # cardset_name = input(f'What is the name of this Cardset? ') + cardset_name = arg_data['cardset_name'] + print(f'Searching for cardset: {cardset_name}') c_query = await db_get('cardsets', params=[('name', cardset_name)]) if c_query['count'] == 0: print(f'I do not see a cardset named {cardset_name}') return cardset = c_query['cardsets'][0] - print(f'Cardset ID: {cardset["id"]}') - game_count = int(input(f'How many games out of 162 have been played? ')) + if 'season' in arg_data: + season = arg_data['season'] + else: + season = int(cardset['name'][:4]) + + game_count = int(arg_data['games_played']) if game_count < 1 or game_count > 162: print(f'Game count has to be between 1 and 162.') return season_pct = game_count / 162 + print(f'Cardset ID: {cardset["id"]} / Season: {season}\nGame count: {game_count} / Season %: {season_pct}\n') + start_time = datetime.datetime.now() + release_directory = f'{season}-{datetime.datetime.now().month}{datetime.datetime.now().day}' input_path = f'data-input/{cardset["name"]} Cardset/' - # print(f"\nI'll be reading from the following files:\n" - # f"{input_path}vlhp-basic.csv\n{input_path}vlhp-rate.csv\n{input_path}vrhp-basic.csv\n" - # f"{input_path}vrhp-rate.csv\n{input_path}running.csv") print('Reading batting stats...') vl_basic = pd.read_csv(f'{input_path}vlhp-basic.csv').query('PA >= 20') @@ -103,27 +145,85 @@ async def main(argv): else: return 'R' - async def get_offense_col(df_data): - p_query = await db_get('mlbplayers', api_ver=2, params=[('key_bbref', df_data['key_bbref'])]) - if p_query['count'] > 0: - return p_query['players'][0]['offense_col'] - else: - return random.randint(1, 3) + print(f'Pulling PD player IDs...') + p_query = await db_get('players', params=[('inc_dex', False), ('cardset_id', cardset['id'])]) + if p_query['count'] == 0: + raise ValueError(f'No players returned from Paper Dynasty API') + pd_players = pd.DataFrame(p_query['players']).rename(columns={'bbref_id': 'key_bbref'}) - print(f'Pulling player IDs...') - ids_and_names = all_batting.apply(get_pids, axis=1, result_type='expand') - final_batting = ids_and_names.query('key_mlbam == key_mlbam').join(all_batting) - print(f'Player IDs linked...{len(final_batting.values)} players remain\n') + print(f'Now pulling mlbam player IDs...') + ids_and_names = all_batting.apply(get_pids, axis=1) + player_data = (ids_and_names + .merge(pd_players, left_on='key_bbref', right_on='key_bbref') + .query('key_mlbam == key_mlbam') + .set_index('key_bbref', drop=False)) + print(f'Matched mlbam to pd players.') + new_players = [] + + def create_players(df_data): + f_name = sanitize_name(df_data["name_first"]).title() + l_name = sanitize_name(df_data["name_last"]).title() + new_players.append({ + 'p_name': f'{f_name} {l_name}', + 'cost': 99999, + 'image': f'{CARD_BASE_URL}/{release_directory}/{f_name.lower()}-{l_name.lower()}.png', + 'mlbclub': 'None', + 'franchise': 'None', + 'cardset_id': cardset['id'], + 'set_num': df_data['key_fangraphs'], + 'rarity_id': 99, + 'pos_1': 'DH', + 'description': f'Live {f_name} {l_name}', + 'bbref_id': df_data.name, + 'fangr_id': int(float(df_data['key_fangraphs'])) + }) + player_data[player_data['player_id'].isnull()].apply(create_players, axis=1) + print(f'Creating {len(new_players)} new players...') + for x in new_players: + this_player = await db_post('players', payload=x) + player_data.at[x['bbref_id'], 'player_id'] = this_player['player_id'] + player_data.at[x['bbref_id'], 'p_name'] = this_player['p_name'] + + final_batting = pd.merge( + player_data, all_batting, left_on='key_fangraphs', right_on='playerId', sort=False + ).set_index('key_bbref', drop=False) + del ids_and_names, all_batting, pd_players + print(f'Player IDs linked to batting stats.\n{len(final_batting.values)} players remain\n') print(f'Reading baserunning stats...') - run_data = pd.read_csv(f'{input_path}running.csv').rename(columns={"Name-additional": "key_bbref"}) + run_data = (pd.read_csv(f'{input_path}running.csv') + .set_index('Name-additional')) run_data['bat_hand'] = run_data.apply(get_hand, axis=1) - offense_stats = pd.merge(final_batting, run_data, on="key_bbref") + offense_stats = final_batting.join(run_data) del final_batting, run_data - print(f'Stats are tallied\n') + print(f'Stats are tallied\n{len(offense_stats.values)} players remain\n\nCollecting defensive data from bbref...') + + # print(f'Pulling pitcher defense...') + # df_p = cde.get_bbref_fielding_df('p', season) + # print(f'Pulling catcher defense...') + # df_c = cde.get_bbref_fielding_df('c', season) + # print(f'Pulling first base defense...') + # df_1b = cde.get_bbref_fielding_df('1b', season) + # print(f'Pulling second base defense...') + # df_2b = cde.get_bbref_fielding_df('2b', season) + # print(f'Pulling third base defense...') + # df_3b = cde.get_bbref_fielding_df('3b', season) + # print(f'Pulling short stop defense...') + # df_ss = cde.get_bbref_fielding_df('ss', season) + # print(f'Pulling left field defense...') + # df_lf = cde.get_bbref_fielding_df('lf', season) + # print(f'Pulling center field defense...') + # df_cf = cde.get_bbref_fielding_df('cf', season) + # print(f'Pulling right field defense...') + # df_rf = cde.get_bbref_fielding_df('rf', season) + # print(f'Pulling outfield defense...') + # df_of = cde.get_bbref_fielding_df('of', season) + print(f'Positions data is retrieved') + + batting_cards = [] def create_batting_card(df_data): - s_data = cb.stealing( + s_data = cba.stealing( chances=df_data['SBO'], sb2s=df_data['SB2'], cs2s=df_data['CS2'], @@ -131,166 +231,144 @@ async def main(argv): cs3s=df_data['CS3'], season_pct=season_pct ) - return { + batting_cards.append({ + "player_id": df_data['player_id'], + "key_bbref": df_data.name, + "key_fangraphs": df_data['key_fangraphs'], + "key_mlbam": df_data['key_mlbam'], + "key_retro": df_data['key_retro'], + "name_first": df_data["name_first"].title(), + "name_last": df_data["name_last"].title(), "steal_low": s_data[0], "steal_high": s_data[1], "steal_auto": s_data[2], "steal_jump": s_data[3], - "hit_and_run": cb.hit_and_run( + "hit_and_run": cba.hit_and_run( df_data['AB_vL'], df_data['AB_vR'], df_data['H_vL'], df_data['H_vR'], df_data['HR_vL'], df_data['HR_vR'], df_data['SO_vL'], df_data['SO_vR'] ), - "running": cb.running(df_data['XBT%']), + "running": cba.running(df_data['XBT%']), "hand": df_data['bat_hand'] - } + }) print(f'Calculating batting cards...') - offense_stats['batting_card'] = offense_stats.apply(create_batting_card, axis=1) - print(f'Cards are complete\n') + offense_stats.apply(create_batting_card, axis=1) + print(f'Cards are complete.\n\nPosting cards now...') + # resp = await db_put('battingcards', payload={'cards': batting_cards}, timeout=30) + # print(f'Response: {resp}\n') + + position_payload = [] + + # def create_positions(df_data): + # for pos_data in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]: + # if df_data.name in pos_data[0].index: + # logging.debug(f'Running {pos_data[1]} stats for {player_data.at[df_data.name, "p_name"]}') + # position_payload.append({ + # "player_id": int(player_data.at[df_data.name, 'player_id']), + # "position": pos_data[1].upper(), + # "innings": float(pos_data[0].at[df_data.name, 'Inn_def']), + # "range": cde.get_if_range( + # pos_code=pos_data[1], + # tz_runs=int(pos_data[0].at[df_data.name, 'tz_runs_total']), + # r_dp=0, + # season_pct=season_pct + # ), + # "error": cde.get_any_error( + # pos_code=pos_data[1], + # errors=int(pos_data[0].at[df_data.name, 'E_def']), + # chances=int(pos_data[0].at[df_data.name, 'chances']), + # season_pct=season_pct + # ) + # }) + # + # of_arms = [] + # of_payloads = [] + # for pos_data in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]: + # if df_data.name in pos_data[0].index: + # of_payloads.append({ + # "player_id": int(player_data.at[df_data.name, 'player_id']), + # "position": pos_data[1].upper(), + # "innings": float(pos_data[0].at[df_data.name, 'Inn_def']), + # "range": cde.get_of_range( + # pos_code=pos_data[1], + # tz_runs=int(pos_data[0].at[df_data.name, 'tz_runs_total']), + # season_pct=season_pct + # ) + # }) + # of_arms.append(int(pos_data[0].at[df_data.name, 'bis_runs_outfield'])) + # + # if df_data.name in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0: + # error_rating = cde.get_any_error( + # pos_code=pos_data[1], + # errors=int(df_of.at[df_data.name, 'E_def']), + # chances=int(df_of.at[df_data.name, 'chances']), + # season_pct=season_pct + # ) + # arm_rating = cde.arm_outfield(of_arms) + # for f in of_payloads: + # f['error'] = error_rating + # f['arm'] = arm_rating + # position_payload.append(f) + # + # if df_data.name in df_c.index: + # if df_c.at[df_data.name, 'SB'] + df_c.at[df_data.name, 'CS'] == 0: + # arm_rating = 3 + # else: + # arm_rating = cde.arm_catcher( + # cs_pct=df_c.at[df_data.name, 'caught_stealing_perc'], + # raa=int(df_c.at[df_data.name, 'bis_runs_catcher_sb']), + # season_pct=season_pct + # ) + # position_payload.append({ + # "player_id": int(player_data.at[df_data.name, 'player_id']), + # "position": 'C', + # "innings": float(df_c.at[df_data.name, 'Inn_def']), + # "range": cde.range_catcher( + # rs_value=int(df_c.at[df_data.name, 'tz_runs_catcher']), + # season_pct=season_pct + # ), + # "error": cde.get_any_error( + # pos_code='c', + # errors=int(df_c.at[df_data.name, 'E_def']), + # chances=int(df_c.at[df_data.name, 'chances']), + # season_pct=season_pct + # ), + # "arm": arm_rating, + # "pb": cde.pb_catcher( + # pb=int(df_c.at[df_data.name, 'PB']), + # innings=int(float(df_c.at[df_data.name, 'Inn_def'])), + # season_pct=season_pct + # ), + # "overthrow": cde.ot_catcher( + # errors=int(df_c.at[df_data.name, 'E_def']), + # chances=int(df_c.at[df_data.name, 'chances']), + # season_pct=season_pct + # ) + # }) + # + # print(f'Calculating fielding lines now...') + # offense_stats.apply(create_positions, axis=1) + # print(f'Fielding is complete.\n\nPosting positions now...') + # resp = await db_put('cardpositions', payload={'positions': position_payload}, timeout=30) + # print(f'Response: {resp}\n') + + batting_ratings = [] def create_batting_card_ratings(df_data): - vl = cb.BattingCardRatingsModel(vs_hand='L') - vr = cb.BattingCardRatingsModel(vs_hand='R') - # TODO: Build Batting Card Ratings + logging.info(f'Calculating card ratings for {df_data.name}') + batting_ratings.extend(cba.get_batter_ratings(df_data)) print(f'Calculating card ratings...') - offense_stats['batting_card_ratings'] = offense_stats.apply(create_batting_card_ratings, axis=1) - print(f'Ratings are complete\n') + offense_stats.apply(create_batting_card_ratings, axis=1) + print(f'Ratings are complete\n\nPosting ratings now...') + # resp = await db_put('battingcardratings', payload={'ratings': batting_ratings}, timeout=30) - # Get position stats into dataframes + # Update player record with positions, rarity, cost + # Cost only changes if starting cost is 99999 or calculated rarity is different than current - # batting_data = {} # { : { 'vL': BattingStat, 'vR': BattingStat, 'run': } } - # - # with open(f'{input_path}vlhp-basic.csv', 'r', encoding='utf8') as file: - # reader = csv.reader(file) - # logging.info(f'Reading vLHP Basic') - # for row in reader: - # logging.info(f'Reading vL basic / player id: {row[23]} / name: {row[1]}') - # if row[0] != 'Season' and int(row[4]) >= 20: - # batting_data[row[23]] = { - # 'vL': BattingStat( - # fg_id=row[23], - # vs_hand='L', - # pa=row[4], - # hit=row[6], - # single=row[7], - # double=row[8], - # triple=row[9], - # homerun=row[10], - # rbi=row[12], - # bb=row[13], - # ibb=row[14], - # so=row[15], - # hbp=row[16], - # gidp=row[19], - # sb=row[20], - # cs=row[21], - # avg=row[22] - # ), - # 'vR': None - # } - # logging.info(f'Saved vL basic BattingStat for {row[1]}') - # else: - # logging.error(f'Invalid vL basic row; PA: {row[4]}') - # - # with open(f'{input_path}vlhp-rate.csv', 'r', encoding='utf8') as file: - # reader = csv.reader(file) - # logging.info(f'Reading vLHP Rate') - # for row in reader: - # logging.info(f'Reading vL rate / player id: {row[18]} / name: {row[1]}') - # if row[0] != 'Season' and int(row[3]) >= 20 and row[18] in batting_data: - # this_stat = batting_data[row[18]]['vL'] - # this_stat.hard_rate = row[17] - # this_stat.med_rate = row[16] - # this_stat.soft_rate = row[15] - # this_stat.ifh_rate = row[10] - # this_stat.hr_per_fb = row[9] - # this_stat.ld_rate = row[5] - # this_stat.iffb_rate = row[8] - # this_stat.fb_rate = row[7] - # this_stat.pull_rate = row[12] - # this_stat.center_rate = row[13] - # this_stat.oppo_rate = row[14] - # logging.info(f'Saved vL rate BattingStat for {row[1]}') - # else: - # logging.error(f'Invalid vL rate row; PA: {row[3]}') - # - # with open(f'{input_path}vrhp-basic.csv', 'r', encoding='utf8') as file: - # reader = csv.reader(file) - # logging.info(f'Reading vRHP Basic') - # for row in reader: - # logging.info(f'Reading vR basic / player id: {row[23]} / name: {row[1]}') - # if row[0] != 'Season' and int(row[4]) >= 40: - # if row[23] in batting_data: - # batting_data[row[23]]['vR'] = BattingStat( - # fg_id=row[23], - # vs_hand='R', - # pa=row[4], - # hit=row[6], - # single=row[7], - # double=row[8], - # triple=row[9], - # homerun=row[10], - # rbi=row[12], - # bb=row[13], - # ibb=row[14], - # so=row[15], - # hbp=row[16], - # gidp=row[19], - # sb=row[20], - # cs=row[21], - # avg=row[22] - # ) - # logging.info(f'Saved vR basic BattingStat for {row[1]}') - # else: - # logging.error(f'Player {row[1]} does not have a vL line - skipping vR line') - # else: - # logging.error(f'Invalid vR basic row; PA: {row[4]}') - # - # with open(f'{input_path}vrhp-rate.csv', 'r', encoding='utf8') as file: - # reader = csv.reader(file) - # logging.info(f'Reading vRHP Rate') - # for row in reader: - # logging.info(f'Reading vR rate / player id: {row[18]} / name: {row[1]}') - # if row[18] not in batting_data: - # logging.error(f'Invalid vR rate row / {row[1]} has no vL data') - # elif row[0] != 'Season' and int(row[3]) >= 40: - # this_stat = batting_data[row[18]]['vR'] - # this_stat.hard_rate = row[17] - # this_stat.med_rate = row[16] - # this_stat.soft_rate = row[15] - # this_stat.ifh_rate = row[10] - # this_stat.hr_per_fb = row[9] - # this_stat.ld_rate = row[5] - # this_stat.iffb_rate = row[8] - # this_stat.fb_rate = row[7] - # this_stat.pull_rate = row[12] - # this_stat.center_rate = row[13] - # this_stat.oppo_rate = row[14] - # logging.info(f'Saved vR rate BattingStat for {row[1]}') - # else: - # logging.error(f'Invalid vR rate row; PA: {row[3]}') - # - # # TODO: run baserunning stats and add to batting_data['run']; will need to match bbref to fgid - # # with open(f'{input_path}running.csv', 'r', encoding='utf8') as file: - # # reader = csv.reader(file) - # # logging.info(f'Reading Running stats') - # # for row in reader: - # # logging.info(f'Reading running / ') - # - # full_bstats = [] - # for x in batting_data.values(): - # if x['vL'].hard_rate is None: - # logging.error(f'Missing vL rate data for player ID {x["vL"].fg_id}') - # elif x['vR'] is None: - # logging.error(f'Missing vR data for player ID {x["vL"].fg_id}') - # elif x['vR'].hard_rate is None: - # logging.error(f'Missing vR rate data for player ID {x["vR"].fg_id}') - # else: - # logging.info(f'Adding {x["vR"].fg_id} to be processed') - # full_bstats.append({'vL': x['vL'], 'vR': x['vR']}) - - # print(f'Ready to process {len(all_batting.index)} batters\n') + run_time = datetime.datetime.now() - start_time + print(f'Total batting cards: {len(batting_cards)}\nNew cardset batters: {len(new_players)}\n' + f'Program runtime: {round(run_time.total_seconds())} seconds') if __name__ == '__main__':