import logging import pandas as pd import requests from bs4 import BeautifulSoup from typing import Literal from db_calls import db_put async def create_positions( all_stats: pd.DataFrame, season_pct: float, post_pos: bool, df_c: pd.DataFrame, df_1b: pd.DataFrame, df_2b: pd.DataFrame, df_3b: pd.DataFrame, df_ss: pd.DataFrame, df_lf: pd.DataFrame, df_cf: pd.DataFrame, df_rf: pd.DataFrame, df_of: pd.DataFrame): position_payload = [] def process_pos(df_data): no_data = True for pos_data in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]: if df_data['key_bbref'] in pos_data[0].index: logging.info(f'Running {pos_data[1]} stats for {df_data["p_name"]}') try: average_range = (int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']) + int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) + min( int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']), int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) )) / 3 position_payload.append({ "player_id": int(df_data['player_id']), "position": pos_data[1].upper(), "innings": float(pos_data[0].at[df_data["key_bbref"], 'Inn_def']), "range": get_if_range( pos_code=pos_data[1], tz_runs=round(average_range), r_dp=0, season_pct=season_pct ), "error": get_any_error( pos_code=pos_data[1], errors=int(pos_data[0].at[df_data["key_bbref"], 'E_def']), chances=int(pos_data[0].at[df_data["key_bbref"], 'chances']), season_pct=season_pct ) }) no_data = False except Exception as e: logging.info(f'Infield position failed: {e}') of_arms = [] of_payloads = [] for pos_data in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]: if df_data["key_bbref"] in pos_data[0].index: try: average_range = (int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']) + int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) + min( int(pos_data[0].at[df_data["key_bbref"], 'tz_runs_total']), int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_total']) )) / 3 of_payloads.append({ "player_id": int(df_data['player_id']), "position": pos_data[1].upper(), "innings": float(pos_data[0].at[df_data["key_bbref"], 'Inn_def']), "range": get_of_range( pos_code=pos_data[1], tz_runs=round(average_range), season_pct=season_pct ) }) of_arms.append(int(pos_data[0].at[df_data["key_bbref"], 'bis_runs_outfield'])) no_data = False except Exception as e: logging.info(f'Outfield position failed: {e}') if df_data["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0: try: error_rating = get_any_error( pos_code=pos_data[1], errors=int(df_of.at[df_data["key_bbref"], 'E_def']), chances=int(df_of.at[df_data["key_bbref"], 'chances']), season_pct=season_pct ) arm_rating = arm_outfield(of_arms) for f in of_payloads: f['error'] = error_rating f['arm'] = arm_rating position_payload.append(f) no_data = False except Exception as e: logging.info(f'Outfield position failed: {e}') if df_data["key_bbref"] in df_c.index: try: if df_c.at[df_data["key_bbref"], 'SB'] + df_c.at[df_data["key_bbref"], 'CS'] == 0: arm_rating = 3 else: arm_rating = arm_catcher( cs_pct=df_c.at[df_data["key_bbref"], 'caught_stealing_perc'], raa=int(df_c.at[df_data["key_bbref"], 'bis_runs_catcher_sb']), season_pct=season_pct ) position_payload.append({ "player_id": int(df_data['player_id']), "position": 'C', "innings": float(df_c.at[df_data["key_bbref"], 'Inn_def']), "range": range_catcher( rs_value=int(df_c.at[df_data["key_bbref"], 'tz_runs_catcher']), season_pct=season_pct ), "error": get_any_error( pos_code='c', errors=int(df_c.at[df_data["key_bbref"], 'E_def']), chances=int(df_c.at[df_data["key_bbref"], 'chances']), season_pct=season_pct ), "arm": arm_rating, "pb": pb_catcher( pb=int(df_c.at[df_data["key_bbref"], 'PB']), innings=int(float(df_c.at[df_data["key_bbref"], 'Inn_def'])), season_pct=season_pct ), "overthrow": ot_catcher( errors=int(df_c.at[df_data["key_bbref"], 'E_def']), chances=int(df_c.at[df_data["key_bbref"], 'chances']), season_pct=season_pct ) }) no_data = False except Exception as e: logging.info(f'Catcher position failed: {e}') if no_data: position_payload.append({ "player_id": int(df_data['player_id']), "position": 'DH', "innings": df_data['PA_vL'] + df_data['PA_vR'] }) print(f'Calculating fielding lines now...') all_stats.apply(process_pos, axis=1) print(f'Fielding is complete.\n\nPosting positions now...') if post_pos: resp = await db_put('cardpositions', payload={'positions': position_payload}, timeout=30) print(f'Response: {resp}\n') return len(position_payload) def range_pitcher(rs_value: int, season_pct: float): if rs_value >= (3 * season_pct): return 1 elif rs_value >= (1 * season_pct): return 2 elif rs_value >= (0 * season_pct): return 3 elif rs_value >= (-2 * season_pct): return 4 else: return 5 def range_catcher(rs_value: int, season_pct: float): if rs_value >= 7 * season_pct: return 1 elif rs_value >= 3 * season_pct: return 2 elif rs_value >= -1 * season_pct: return 3 elif rs_value >= -5 * season_pct: return 4 else: return 5 def range_first_base(tz_runs: int, r_dp: int, season_pct: float): if (tz_runs + r_dp) >= max(6 * season_pct, 2): return 1 elif (tz_runs + r_dp) >= max(2 * season_pct, 1): return 2 elif (tz_runs + r_dp) >= min(-1 * season_pct, -1): return 3 elif (tz_runs + r_dp) >= min(-3 * season_pct, -3): return 4 else: return 5 def range_second_base(tz_runs: int, r_dp: int, season_pct: float): if (tz_runs + r_dp) >= max(6 * season_pct, 2): return 1 elif (tz_runs + r_dp) >= max(2 * season_pct, 1): return 2 elif (tz_runs + r_dp) >= min(-1 * season_pct, -1): return 3 elif (tz_runs + r_dp) >= min(-3 * season_pct, -3): return 4 else: return 5 def range_third_base(tz_runs: int, r_dp: int, season_pct: float): if (tz_runs + r_dp) >= max(6 * season_pct, 2): return 1 elif (tz_runs + r_dp) >= max(2 * season_pct, 1): return 2 elif (tz_runs + r_dp) >= min(-1 * season_pct, -1): return 3 elif (tz_runs + r_dp) >= min(-3 * season_pct, -3): return 4 else: return 5 def range_shortstop(tz_runs: int, r_dp: int, season_pct: float): if (tz_runs + r_dp) >= max(8 * season_pct, 2): return 1 elif (tz_runs + r_dp) >= max(2 * season_pct, 1): return 2 elif (tz_runs + r_dp) >= min(-1 * season_pct, -1): return 3 elif (tz_runs + r_dp) >= min(-3 * season_pct, -3): return 4 else: return 5 def get_if_range(pos_code: str, tz_runs: int, r_dp: int, season_pct: float): logging.info(f'pos: {pos_code} / tz_runs: {tz_runs} ({type(tz_runs)})') if pos_code == '1b': return range_first_base(tz_runs, 0, season_pct) elif pos_code == '2b': return range_second_base(tz_runs, 0, season_pct) elif pos_code == '3b': return range_third_base(tz_runs, 0, season_pct) elif pos_code == 'ss': return range_shortstop(tz_runs, 0, season_pct) else: raise ValueError(f'get_if_range - pos_code must be one of 1b, 2b, 3b, ss / {pos_code} not valid') def range_center_field(drs: int, season_pct: float): if drs >= 9 * season_pct: return 1 elif drs >= 3 * season_pct: return 2 elif drs >= -1 * season_pct: return 3 elif drs >= -4 * season_pct: return 4 else: return 5 def range_left_field(drs: int, season_pct: float): return range_center_field(drs, season_pct) def range_right_field(drs: int, season_pct: float): return range_center_field(drs, season_pct) def get_of_range(pos_code: str, tz_runs: int, season_pct: float): logging.info(f'pos: {pos_code} / tz_runs: {tz_runs}') if pos_code == 'lf': return range_left_field(tz_runs, season_pct) elif pos_code == 'cf': return range_center_field(tz_runs, season_pct) else: return range_right_field(tz_runs, season_pct) def valid_error_ratings(err_num: int, position: str) -> int: if position.lower() == 'p': valid_err = [ 0, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 40, 42, 43, 44, 46, 47, 48, 50, 51 ] elif position.lower() == 'c': valid_err = list(range(17)) elif position.lower() == '1b': valid_err = list(range(31)) elif position.lower() == '2b': valid_err = [ 0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 34, 37, 39, 41, 44, 47, 50, 53, 56, 59, 62, 65, 68, 71 ] elif position.lower() == '3b': valid_err = [ 0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, 44, 47, 50, 53, 56, 59, 62, 65 ] elif position.lower() == 'ss': valid_err = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 38, 40, 42, 44, 48, 52, 56, 60, 64, 68, 72 ] # Outfielders else: valid_err = list(range(26)) if err_num in valid_err: return err_num elif err_num > valid_err[len(valid_err) - 1]: return valid_err[len(valid_err) - 1] else: for x in valid_err: if err_num <= x: return x def raw_error(errors: int, chances: int, season_pct: float, chance_max: int): if errors == 0 or chances == 0: return 0 # c_max = max(round(chance_max * season_pct), 1) c_max = chance_max return errors * c_max / chances def error_pitcher(errors: int, chances: int, season_pct: float): return valid_error_ratings(int(raw_error(errors, chances, season_pct, 300)), 'p') def error_catcher(errors: int, chances: int, season_pct: float): return valid_error_ratings(int(raw_error(errors, chances, season_pct, 500)), 'c') def error_first_base(errors: int, chances: int, season_pct: float): return valid_error_ratings(int(raw_error(errors, chances, season_pct, 1300)), '1b') def error_second_base(errors: int, chances: int, season_pct: float): return valid_error_ratings(int(raw_error(errors, chances, season_pct, 700)), '2b') def error_third_base(errors: int, chances: int, season_pct: float): return valid_error_ratings(int(raw_error(errors, chances, season_pct, 500)), '3b') def error_shortstop(errors: int, chances: int, season_pct: float): return valid_error_ratings(int(raw_error(errors, chances, season_pct, 700)), 'ss') def error_outfield(errors: int, chances: int, season_pct: float): return valid_error_ratings(int(raw_error(errors, chances, season_pct, 250)), 'of') def get_any_error(pos_code: str, errors: int, chances: int, season_pct: float): if pos_code.lower() == 'p': return error_pitcher(errors, chances, season_pct) elif pos_code.lower() == 'c': return error_catcher(errors, chances, season_pct) elif pos_code.lower() == '1b': return error_first_base(errors, chances, season_pct) elif pos_code.lower() == '2b': return error_second_base(errors, chances, season_pct) elif pos_code.lower() == '3b': return error_third_base(errors, chances, season_pct) elif pos_code.lower() == 'ss': return error_shortstop(errors, chances, season_pct) elif pos_code.lower() in ['lf', 'cf', 'rf', 'of']: return error_outfield(errors, chances, season_pct) def arm_outfield(all_arms: list): if not all_arms: return 5 if max(all_arms) > 8: return -6 elif max(all_arms) > 4: return -5 elif max(all_arms) < -4: return +5 else: return max(all_arms) * -1 def arm_catcher(cs_pct: str, raa: int, season_pct: float) -> int: if cs_pct == '': return 3 cs_pct = float(cs_pct.strip("%")) / 100 if raa > 5 * season_pct: max_arm = -4 elif raa > 2 * season_pct: max_arm = -2 elif raa > -1 * season_pct: max_arm = 0 elif raa > -2 * season_pct: max_arm = 3 else: max_arm = 5 if cs_pct > .6: raw_arm = -5 elif cs_pct > .5: raw_arm = -4 elif cs_pct > .4: raw_arm = -3 elif cs_pct > .3: raw_arm = -2 elif cs_pct > .25: raw_arm = -1 elif cs_pct > .2: raw_arm = 0 elif cs_pct > .16: raw_arm = 1 elif cs_pct > .12: raw_arm = 2 elif cs_pct > .1: raw_arm = 3 elif cs_pct > .05: raw_arm = 4 else: raw_arm = 5 return int(min(max_arm, raw_arm)) def pb_catcher(pb: int, innings: int, season_pct: float): if pb == 0 or innings == 0: return 0 return int(abs(min(pb * 1000 * season_pct / innings, 20))) def ot_catcher(errors: int, chances: int, season_pct: float): if errors == 0 or chances == 0: return 0 c_max = 3000 * season_pct return int(min(errors * c_max / chances / 3, 20)) def hold_pitcher(raw_cs: str, picks: int, season_pct: float) -> str: if raw_cs == '': return '+9' cs_pct = float(raw_cs.strip("%")) / 100 if picks > 5 * season_pct: pick_cap = -3 elif picks > 3 * season_pct: pick_cap = -2 elif picks > 0 * season_pct: pick_cap = 5 else: pick_cap = 9 if cs_pct > .667: hold_num = -5 elif cs_pct > .6: hold_num = -4 elif cs_pct > .48: hold_num = -3 elif cs_pct > .34: hold_num = -2 elif cs_pct > .26: hold_num = -1 elif cs_pct > .22: hold_num = 0 elif cs_pct > .2: hold_num = 1 elif cs_pct > .18: hold_num = 3 elif cs_pct > .16: hold_num = 4 elif cs_pct > .14: hold_num = 5 elif cs_pct > .12: hold_num = 6 elif cs_pct > .1: hold_num = 7 elif cs_pct > .06: hold_num = 8 else: hold_num = 9 final_hold = min(pick_cap, hold_num) return final_hold # return f'{"+" if final_hold >= 0 else ""}{final_hold}' def pow_ratings(innings: float, gs: int, games: int) -> (int, int): if innings <= 1 or games <= 1: return 1, 1 s_innings = int(innings * gs / games) r_innings = int(innings * (games - gs) / games) if gs == 0: s_pow = 1 else: s_pow = round(s_innings / gs) if r_innings == 0: r_pow = 1 else: r_pow = round(r_innings / (games - gs)) if r_innings / max(s_innings, 1) < .1: r_pow = 1 elif r_pow >= s_pow > 1: r_pow = s_pow - 1 return s_pow, r_pow def innings_float(innings: str) -> float: if '.' in innings: whole, decimal = innings.split('.') else: whole = innings decimal = "0" return float(int(whole) + int(decimal) * .333) # Get position stats into dataframes def get_bbref_fielding_df( position: Literal['p', 'c', '1b', '2b', '3b', 'ss', 'lf', 'cf', 'rf', 'of'], s_num: int): url = f'https://www.baseball-reference.com/leagues/majors/{s_num}-specialpos_{position}-fielding.shtml' soup = BeautifulSoup(requests.get(url).text, 'html.parser') table = soup.find('table', {'id': 'players_players_standard_fielding_fielding'}) headers = [] data = [] indeces = [] for row in table.find_all('tr'): row_data = [] col_names = [] for cell in row.find_all('td'): try: player_id = cell['data-append-csv'] row_data.append(player_id) if len(headers) == 0: col_names.append('key_bbref') except Exception as e: pass row_data.append(cell.text) if len(headers) == 0: col_names.append(cell['data-stat']) if len(row_data) > 0: data.append(row_data) indeces.append(row_data[0]) if len(headers) == 0: headers.extend(col_names) pos_frame = pd.DataFrame(data, index=indeces, columns=headers).query('key_bbref == key_bbref') if position == 'p': return pos_frame.drop_duplicates(subset=['key_bbref'], keep='first') tmp = pos_frame[~pos_frame['chances'].isin(['0', '1', '2'])] return tmp.drop_duplicates(subset=['key_bbref'], keep='first')