Add defense calcs
Begin work on posting data
This commit is contained in:
parent
eb79430de7
commit
44e8e22bc0
@ -8,9 +8,11 @@ from typing import Literal
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pybaseball as pb
|
import pybaseball as pb
|
||||||
from pybaseball import cache
|
from pybaseball import cache
|
||||||
|
import urllib
|
||||||
|
|
||||||
from creation_helpers import get_args
|
from creation_helpers import get_args, CLUB_LIST, FRANCHISE_LIST
|
||||||
from batters.stat_prep import DataMismatchError
|
from batters.stat_prep import DataMismatchError
|
||||||
|
from db_calls import DB_URL, db_get, db_patch, db_post
|
||||||
import batters.calcs_batter as cba
|
import batters.calcs_batter as cba
|
||||||
import defenders.calcs_defense as cde
|
import defenders.calcs_defense as cde
|
||||||
|
|
||||||
@ -27,11 +29,17 @@ RETRO_FILE_PATH = 'data-input/retrosheet/'
|
|||||||
EVENTS_FILENAME = 'retrosheets_events_1998_short.csv' # Removed last few columns which were throwing dtype errors
|
EVENTS_FILENAME = 'retrosheets_events_1998_short.csv' # Removed last few columns which were throwing dtype errors
|
||||||
PERSONNEL_FILENAME = 'retrosheets_personnel.csv'
|
PERSONNEL_FILENAME = 'retrosheets_personnel.csv'
|
||||||
DATA_INPUT_FILE_PATH = 'data-input/1998 Season Cardset/'
|
DATA_INPUT_FILE_PATH = 'data-input/1998 Season Cardset/'
|
||||||
|
CARD_BASE_URL = f'{DB_URL}/v2/players/'
|
||||||
|
|
||||||
|
start_time = datetime.datetime.now()
|
||||||
|
RELEASE_DIRECTORY = f'{start_time.year}-{start_time.month}-{start_time.day}'
|
||||||
|
|
||||||
MIN_PA_VL = 20
|
MIN_PA_VL = 20
|
||||||
MIN_PA_VR = 40
|
MIN_PA_VR = 40
|
||||||
MIN_TBF_VL = MIN_PA_VL
|
MIN_TBF_VL = MIN_PA_VL
|
||||||
MIN_TBF_VR = MIN_PA_VR
|
MIN_TBF_VR = MIN_PA_VR
|
||||||
|
CARDSET_ID = 20
|
||||||
|
PLAYER_DESCRIPTION = 'Live'
|
||||||
|
|
||||||
|
|
||||||
async def store_defense_to_csv(season: int):
|
async def store_defense_to_csv(season: int):
|
||||||
@ -64,7 +72,7 @@ def get_run_stat_df(input_path: str):
|
|||||||
if 'Name-additional' in run_data:
|
if 'Name-additional' in run_data:
|
||||||
run_data = run_data.rename(columns={'Name-additional': 'key_bbref'})
|
run_data = run_data.rename(columns={'Name-additional': 'key_bbref'})
|
||||||
|
|
||||||
run_data = run_data[['key_bbref', 'ROE', 'XI', 'RS%', 'SBO', 'SB', 'CS', 'SB%', 'SB2', 'CS2', 'SB3', 'CS3', 'SBH', 'CSH', 'PO', 'PCS', 'OOB', 'OOB1', 'OOB2', 'OOB3', 'OOBHm', 'BT', 'XBT%', '1stS', '1stS2', '1stS3', '1stD', '1stD3', '1stDH', '2ndS', '2ndS3', '2ndSH']]
|
run_data = run_data[['key_bbref', 'Tm', 'ROE', 'XI', 'RS%', 'SBO', 'SB', 'CS', 'SB%', 'SB2', 'CS2', 'SB3', 'CS3', 'SBH', 'CSH', 'PO', 'PCS', 'OOB', 'OOB1', 'OOB2', 'OOB3', 'OOBHm', 'BT', 'XBT%', '1stS', '1stS2', '1stS3', '1stD', '1stD3', '1stDH', '2ndS', '2ndS3', '2ndSH']]
|
||||||
|
|
||||||
run_data = run_data.fillna(0)
|
run_data = run_data.fillna(0)
|
||||||
return run_data.set_index('key_bbref')
|
return run_data.set_index('key_bbref')
|
||||||
@ -136,8 +144,6 @@ def get_player_ids(plays: pd.DataFrame, which: Literal['batters', 'pitchers']) -
|
|||||||
def get_base_batting_df(all_plays: pd.DataFrame) -> pd.DataFrame:
|
def get_base_batting_df(all_plays: pd.DataFrame) -> pd.DataFrame:
|
||||||
bs = get_player_ids(all_plays, 'batters')
|
bs = get_player_ids(all_plays, 'batters')
|
||||||
|
|
||||||
# bs['key_mlbam'] = bs.apply()
|
|
||||||
|
|
||||||
pal_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vL')
|
pal_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vL')
|
||||||
bs = pd.concat([bs, pal_series], axis=1)
|
bs = pd.concat([bs, pal_series], axis=1)
|
||||||
par_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vR')
|
par_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vR')
|
||||||
@ -160,6 +166,11 @@ def get_batting_stats_by_date(retro_file_path, start_date: int, end_date: int) -
|
|||||||
batting_stats = get_base_batting_df(all_plays)
|
batting_stats = get_base_batting_df(all_plays)
|
||||||
print(f'Get base dataframe: {(datetime.datetime.now() - start).total_seconds():.2f}s')
|
print(f'Get base dataframe: {(datetime.datetime.now() - start).total_seconds():.2f}s')
|
||||||
|
|
||||||
|
start = datetime.datetime.now()
|
||||||
|
all_player_ids = batting_stats['key_retro']
|
||||||
|
all_plays = all_plays[all_plays['batter_id'].isin(all_player_ids)]
|
||||||
|
print(f'Shrink all_plays: {(datetime.datetime.now() - start).total_seconds():.2f}s')
|
||||||
|
|
||||||
# Basic counting stats
|
# Basic counting stats
|
||||||
start = datetime.datetime.now()
|
start = datetime.datetime.now()
|
||||||
for event_type, vs_hand, col_name in [
|
for event_type, vs_hand, col_name in [
|
||||||
@ -317,6 +328,8 @@ def get_batting_stats_by_date(retro_file_path, start_date: int, end_date: int) -
|
|||||||
batting_stats['Oppo%_vL'] = round(1 - batting_stats['Pull%_vL'] - batting_stats['Cent%_vL'], 5)
|
batting_stats['Oppo%_vL'] = round(1 - batting_stats['Pull%_vL'] - batting_stats['Cent%_vL'], 5)
|
||||||
batting_stats['Oppo%_vR'] = round(1 - batting_stats['Pull%_vR'] - batting_stats['Cent%_vR'], 5)
|
batting_stats['Oppo%_vR'] = round(1 - batting_stats['Pull%_vR'] - batting_stats['Cent%_vR'], 5)
|
||||||
|
|
||||||
|
batting_stats = batting_stats.fillna(0)
|
||||||
|
|
||||||
print(f'Calculated fields: {(datetime.datetime.now() - start).total_seconds():.2f}s')
|
print(f'Calculated fields: {(datetime.datetime.now() - start).total_seconds():.2f}s')
|
||||||
|
|
||||||
return batting_stats
|
return batting_stats
|
||||||
@ -349,6 +362,7 @@ def calc_batting_cards(bs: pd.DataFrame) -> pd.DataFrame:
|
|||||||
return y.loc[0]
|
return y.loc[0]
|
||||||
|
|
||||||
all_cards = bs.apply(create_batting_card, axis=1)
|
all_cards = bs.apply(create_batting_card, axis=1)
|
||||||
|
all_cards = all_cards.set_index('key_bbref')
|
||||||
|
|
||||||
return all_cards
|
return all_cards
|
||||||
|
|
||||||
@ -356,150 +370,290 @@ def calc_batting_cards(bs: pd.DataFrame) -> pd.DataFrame:
|
|||||||
def calc_batter_ratings(bs: pd.DataFrame) -> pd.DataFrame:
|
def calc_batter_ratings(bs: pd.DataFrame) -> pd.DataFrame:
|
||||||
def create_batting_rating(row):
|
def create_batting_rating(row):
|
||||||
ratings = cba.get_batter_ratings(row)
|
ratings = cba.get_batter_ratings(row)
|
||||||
# list_of_ratings = ratings[0]
|
ops_vl = ratings[0]['obp'] + ratings[0]['slg']
|
||||||
|
ops_vr = ratings[1]['obp'] + ratings[1]['slg']
|
||||||
|
total_ops = (ops_vl + ops_vr + min(ops_vr, ops_vl)) / 3
|
||||||
|
|
||||||
|
def calc_cost(total_ops, base_cost, base_ops, max_delta) -> int:
|
||||||
|
delta = ((total_ops - base_ops) / 0.1) * 2
|
||||||
|
if delta < 1:
|
||||||
|
delta = (max_delta * (1 - (total_ops / base_ops))) * -0.1
|
||||||
|
|
||||||
|
final_cost = base_cost + (max_delta * delta)
|
||||||
|
|
||||||
|
return round(final_cost)
|
||||||
|
|
||||||
|
if total_ops >= 1.2:
|
||||||
|
rarity_id = 99
|
||||||
|
cost = calc_cost(total_ops, base_cost=2400, base_ops=1.215, max_delta=810)
|
||||||
|
elif total_ops >= 1:
|
||||||
|
rarity_id = 1
|
||||||
|
cost = calc_cost(total_ops, base_cost=810, base_ops=1.05, max_delta=270)
|
||||||
|
elif total_ops >= 0.9:
|
||||||
|
rarity_id = 2
|
||||||
|
cost = calc_cost(total_ops, base_cost=270, base_ops=0.95, max_delta=90)
|
||||||
|
elif total_ops >= 0.8:
|
||||||
|
rarity_id = 3
|
||||||
|
cost = calc_cost(total_ops, base_cost=90, base_ops=0.85, max_delta=30)
|
||||||
|
elif total_ops >= 0.7:
|
||||||
|
rarity_id = 4
|
||||||
|
cost = calc_cost(total_ops, base_cost=30, base_ops=0.75, max_delta=10)
|
||||||
|
else:
|
||||||
|
rarity_id = 5
|
||||||
|
cost = calc_cost(total_ops, base_cost=10, base_ops=0.61, max_delta=8)
|
||||||
|
|
||||||
x = pd.DataFrame({
|
x = pd.DataFrame({
|
||||||
'key_bbref': [row['key_bbref']],
|
'key_bbref': [row['key_bbref']],
|
||||||
'ratings_vL': [ratings[0]],
|
'ratings_vL': [ratings[0]],
|
||||||
'ratings_vR': [ratings[1]]
|
'ratings_vR': [ratings[1]],
|
||||||
|
'ops_vL': ops_vl,
|
||||||
|
'ops_vR': ops_vr,
|
||||||
|
'total_ops': total_ops,
|
||||||
|
'rarity_id': rarity_id,
|
||||||
|
'cost': cost
|
||||||
})
|
})
|
||||||
return x.loc[0]
|
return x.loc[0]
|
||||||
|
|
||||||
all_ratings = bs.apply(create_batting_rating, axis=1)
|
all_ratings = bs.apply(create_batting_rating, axis=1)
|
||||||
|
all_ratings = all_ratings.set_index('key_bbref')
|
||||||
|
|
||||||
return all_ratings
|
return all_ratings
|
||||||
|
|
||||||
|
|
||||||
def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
df_c = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_c.csv').set_index('key_bbref')
|
||||||
|
df_1b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_1b.csv').set_index('key_bbref')
|
||||||
|
df_2b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_2b.csv').set_index('key_bbref')
|
||||||
|
df_3b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_3b.csv').set_index('key_bbref')
|
||||||
|
df_ss = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_ss.csv').set_index('key_bbref')
|
||||||
|
df_lf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_lf.csv').set_index('key_bbref')
|
||||||
|
df_cf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_cf.csv').set_index('key_bbref')
|
||||||
|
df_rf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_rf.csv').set_index('key_bbref')
|
||||||
|
df_of = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_of.csv').set_index('key_bbref')
|
||||||
|
season_pct = 1.0
|
||||||
|
|
||||||
|
all_pos = []
|
||||||
|
|
||||||
def process_pos(row):
|
def process_pos(row):
|
||||||
no_data = True
|
no_data = True
|
||||||
for pos_data in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]:
|
# TODO: Add pos_1 through pos_8 to def df to be pulled in at post time
|
||||||
if row['key_bbref'] in pos_data[0].index:
|
for pos_df, position in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]:
|
||||||
logging.info(f'Running {pos_data[1]} stats for {row["p_name"]}')
|
if row['key_bbref'] in pos_df.index:
|
||||||
|
logging.info(f'Running {position} stats for {row["use_name"]} {row["last_name"]}')
|
||||||
try:
|
try:
|
||||||
average_range = (int(pos_data[0].at[row["key_bbref"], 'tz_runs_total']) +
|
if 'tz_runs_total' in row:
|
||||||
int(pos_data[0].at[row["key_bbref"], 'bis_runs_total']) +
|
average_range = (int(pos_df.at[row["key_bbref"], 'tz_runs_total']) +
|
||||||
min(
|
int(pos_df.at[row["key_bbref"], 'bis_runs_total']) +
|
||||||
int(pos_data[0].at[row["key_bbref"], 'tz_runs_total']),
|
min(
|
||||||
int(pos_data[0].at[row["key_bbref"], 'bis_runs_total'])
|
int(pos_df.at[row["key_bbref"], 'tz_runs_total']),
|
||||||
)) / 3
|
int(pos_df.at[row["key_bbref"], 'bis_runs_total'])
|
||||||
|
)) / 3
|
||||||
|
else:
|
||||||
|
average_range = pos_df.at[row["key_bbref"], 'tz_runs_total']
|
||||||
|
|
||||||
position_payload.append({ # TODO: convert position_payload to a list?
|
if float(pos_df.at[row["key_bbref"], 'Inn_def']) >= 10.0:
|
||||||
"player_id": int(row['player_id']),
|
all_pos.append({
|
||||||
"position": pos_data[1].upper(),
|
"key_bbref": row['key_bbref'],
|
||||||
"innings": float(pos_data[0].at[row["key_bbref"], 'Inn_def']),
|
"position": position.upper(),
|
||||||
"range": get_if_range(
|
"innings": float(pos_df.at[row["key_bbref"], 'Inn_def']),
|
||||||
pos_code=pos_data[1],
|
"range": cde.get_if_range(
|
||||||
tz_runs=round(average_range),
|
pos_code=position,
|
||||||
r_dp=0,
|
tz_runs=round(average_range),
|
||||||
season_pct=season_pct
|
r_dp=0,
|
||||||
),
|
season_pct=season_pct
|
||||||
"error": get_any_error(
|
),
|
||||||
pos_code=pos_data[1],
|
"error": cde.get_any_error(
|
||||||
errors=int(pos_data[0].at[row["key_bbref"], 'E_def']),
|
pos_code=position,
|
||||||
chances=int(pos_data[0].at[row["key_bbref"], 'chances']),
|
errors=int(pos_df.at[row["key_bbref"], 'E_def']),
|
||||||
season_pct=season_pct
|
chances=int(pos_df.at[row["key_bbref"], 'chances']),
|
||||||
)
|
season_pct=season_pct
|
||||||
})
|
)
|
||||||
no_data = False
|
})
|
||||||
|
no_data = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info(f'Infield position failed: {e}')
|
logging.info(f'Infield position failed: {e}')
|
||||||
|
|
||||||
of_arms = []
|
of_arms = []
|
||||||
of_payloads = []
|
of_payloads = []
|
||||||
for pos_data in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]:
|
for pos_df, position in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]:
|
||||||
if row["key_bbref"] in pos_data[0].index:
|
if row["key_bbref"] in pos_df.index:
|
||||||
try:
|
try:
|
||||||
average_range = (int(pos_data[0].at[row["key_bbref"], 'tz_runs_total']) +
|
if 'tz_runs_total' in row:
|
||||||
int(pos_data[0].at[row["key_bbref"], 'bis_runs_total']) +
|
average_range = (int(pos_df.at[row["key_bbref"], 'tz_runs_total']) +
|
||||||
min(
|
int(pos_df.at[row["key_bbref"], 'bis_runs_total']) +
|
||||||
int(pos_data[0].at[row["key_bbref"], 'tz_runs_total']),
|
min(
|
||||||
int(pos_data[0].at[row["key_bbref"], 'bis_runs_total'])
|
int(pos_df.at[row["key_bbref"], 'tz_runs_total']),
|
||||||
)) / 3
|
int(pos_df.at[row["key_bbref"], 'bis_runs_total'])
|
||||||
of_payloads.append({
|
)) / 3
|
||||||
"player_id": int(row['player_id']),
|
else:
|
||||||
"position": pos_data[1].upper(),
|
average_range = pos_df.at[row["key_bbref"], 'tz_runs_total']
|
||||||
"innings": float(pos_data[0].at[row["key_bbref"], 'Inn_def']),
|
|
||||||
"range": get_of_range(
|
if float(pos_df.at[row["key_bbref"], 'Inn_def']) >= 10.0:
|
||||||
pos_code=pos_data[1],
|
of_payloads.append({
|
||||||
tz_runs=round(average_range),
|
"key_bbref": row['key_bbref'],
|
||||||
season_pct=season_pct
|
"position": position.upper(),
|
||||||
)
|
"innings": float(pos_df.at[row["key_bbref"], 'Inn_def']),
|
||||||
})
|
"range": cde.get_of_range(
|
||||||
of_arms.append(int(pos_data[0].at[row["key_bbref"], 'bis_runs_outfield']))
|
pos_code=position,
|
||||||
no_data = False
|
tz_runs=round(average_range),
|
||||||
|
season_pct=season_pct
|
||||||
|
)
|
||||||
|
})
|
||||||
|
of_run_rating = 'bis_runs_outfield' if 'bis_runs_outfield' in pos_df else 'tz_runs_outfield'
|
||||||
|
of_arms.append(int(pos_df.at[row["key_bbref"], of_run_rating]))
|
||||||
|
no_data = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info(f'Outfield position failed: {e}')
|
logging.info(f'Outfield position failed: {e}')
|
||||||
|
|
||||||
if row["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0:
|
if row["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0:
|
||||||
try:
|
try:
|
||||||
error_rating = get_any_error(
|
error_rating = cde.get_any_error(
|
||||||
pos_code=pos_data[1],
|
pos_code=position,
|
||||||
errors=int(df_of.at[row["key_bbref"], 'E_def']),
|
errors=int(df_of.at[row["key_bbref"], 'E_def']),
|
||||||
chances=int(df_of.at[row["key_bbref"], 'chances']),
|
chances=int(df_of.at[row["key_bbref"], 'chances']),
|
||||||
season_pct=season_pct
|
season_pct=season_pct
|
||||||
)
|
)
|
||||||
arm_rating = arm_outfield(of_arms)
|
arm_rating = cde.arm_outfield(of_arms)
|
||||||
for f in of_payloads:
|
for f in of_payloads:
|
||||||
f['error'] = error_rating
|
f['error'] = error_rating
|
||||||
f['arm'] = arm_rating
|
f['arm'] = arm_rating
|
||||||
position_payload.append(f)
|
all_pos.append(f)
|
||||||
no_data = False
|
no_data = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info(f'Outfield position failed: {e}')
|
logging.info(f'Outfield position failed: {e}')
|
||||||
|
|
||||||
if row["key_bbref"] in df_c.index:
|
if row["key_bbref"] in df_c.index:
|
||||||
try:
|
try:
|
||||||
|
run_rating = 'bis_runs_catcher_sb' if 'bis_runs_catcher_sb' in df_c else 'tz_runs_catcher'
|
||||||
|
|
||||||
if df_c.at[row["key_bbref"], 'SB'] + df_c.at[row["key_bbref"], 'CS'] == 0:
|
if df_c.at[row["key_bbref"], 'SB'] + df_c.at[row["key_bbref"], 'CS'] == 0:
|
||||||
arm_rating = 3
|
arm_rating = 3
|
||||||
else:
|
else:
|
||||||
arm_rating = arm_catcher(
|
arm_rating = cde.arm_catcher(
|
||||||
cs_pct=df_c.at[row["key_bbref"], 'caught_stealing_perc'],
|
cs_pct=df_c.at[row["key_bbref"], 'caught_stealing_perc'],
|
||||||
raa=int(df_c.at[row["key_bbref"], 'bis_runs_catcher_sb']),
|
raa=int(df_c.at[row["key_bbref"], run_rating]),
|
||||||
season_pct=season_pct
|
season_pct=season_pct
|
||||||
)
|
)
|
||||||
position_payload.append({
|
|
||||||
"player_id": int(row['player_id']),
|
if float(df_c.at[row["key_bbref"], 'Inn_def']) >= 10.0:
|
||||||
"position": 'C',
|
all_pos.append({
|
||||||
"innings": float(df_c.at[row["key_bbref"], 'Inn_def']),
|
"key_bbref": row['key_bbref'],
|
||||||
"range": range_catcher(
|
"position": 'C',
|
||||||
rs_value=int(df_c.at[row["key_bbref"], 'tz_runs_catcher']),
|
"innings": float(df_c.at[row["key_bbref"], 'Inn_def']),
|
||||||
season_pct=season_pct
|
"range": cde.range_catcher(
|
||||||
),
|
rs_value=int(df_c.at[row["key_bbref"], 'tz_runs_catcher']),
|
||||||
"error": get_any_error(
|
season_pct=season_pct
|
||||||
pos_code='c',
|
),
|
||||||
errors=int(df_c.at[row["key_bbref"], 'E_def']),
|
"error": cde.get_any_error(
|
||||||
chances=int(df_c.at[row["key_bbref"], 'chances']),
|
pos_code='c',
|
||||||
season_pct=season_pct
|
errors=int(df_c.at[row["key_bbref"], 'E_def']),
|
||||||
),
|
chances=int(df_c.at[row["key_bbref"], 'chances']),
|
||||||
"arm": arm_rating,
|
season_pct=season_pct
|
||||||
"pb": pb_catcher(
|
),
|
||||||
pb=int(df_c.at[row["key_bbref"], 'PB']),
|
"arm": arm_rating,
|
||||||
innings=int(float(df_c.at[row["key_bbref"], 'Inn_def'])),
|
"pb": cde.pb_catcher(
|
||||||
season_pct=season_pct
|
pb=int(df_c.at[row["key_bbref"], 'PB']),
|
||||||
),
|
innings=int(float(df_c.at[row["key_bbref"], 'Inn_def'])),
|
||||||
"overthrow": ot_catcher(
|
season_pct=season_pct
|
||||||
errors=int(df_c.at[row["key_bbref"], 'E_def']),
|
),
|
||||||
chances=int(df_c.at[row["key_bbref"], 'chances']),
|
"overthrow": cde.ot_catcher(
|
||||||
season_pct=season_pct
|
errors=int(df_c.at[row["key_bbref"], 'E_def']),
|
||||||
)
|
chances=int(df_c.at[row["key_bbref"], 'chances']),
|
||||||
})
|
season_pct=season_pct
|
||||||
no_data = False
|
)
|
||||||
|
})
|
||||||
|
no_data = False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info(f'Catcher position failed: {e}')
|
logging.info(f'Catcher position failed: {e}')
|
||||||
|
|
||||||
if no_data:
|
if no_data:
|
||||||
position_payload.append({
|
all_pos.append({
|
||||||
"player_id": int(row['player_id']),
|
"key_bbref": row['key_bbref'],
|
||||||
"position": 'DH',
|
"position": 'DH',
|
||||||
"innings": row['PA_vL'] + row['PA_vR']
|
"innings": row['PA_vL'] + row['PA_vR']
|
||||||
})
|
})
|
||||||
|
|
||||||
all_pos = bs.apply(process_pos, axis=1)
|
bs.apply(process_pos, axis=1)
|
||||||
|
pos_df = pd.DataFrame(all_pos)
|
||||||
|
pos_df = pos_df.set_index('key_bbref')
|
||||||
|
|
||||||
return all_pos
|
return pos_df
|
||||||
|
|
||||||
|
|
||||||
def run_batters(data_input_path: str, start_date: int, end_date: int):
|
async def get_or_post_players(stat_df: pd.DataFrame, bat_card_df: pd.DataFrame, bat_rat_df: pd.DataFrame, def_rat_df: pd.DataFrame) -> pd.DataFrame:
|
||||||
|
# Columns: bbref_id, player_id
|
||||||
|
all_bbref_ids = []
|
||||||
|
all_player_ids = []
|
||||||
|
|
||||||
|
dev_count = 0
|
||||||
|
for index, row in stat_df.iterrows():
|
||||||
|
if dev_count > 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
p_query = await db_get('players', params=[('key_bbref', row["key_bbref"]), ('cardset_id', CARDSET_ID)])
|
||||||
|
if p_query['count'] > 0:
|
||||||
|
this_record = p_query['players'][0]
|
||||||
|
if 'id' in this_record:
|
||||||
|
player_id = this_record['id']
|
||||||
|
else:
|
||||||
|
player_id = this_record['player_id']
|
||||||
|
|
||||||
|
all_bbref_ids.append(row['key_bbref'])
|
||||||
|
all_player_ids.append(player_id)
|
||||||
|
else:
|
||||||
|
mlb_query = await db_get('mlbplayers', params=[('key_retro', row['key_retro'])])
|
||||||
|
if mlb_query['count'] > 0:
|
||||||
|
mlb_player = mlb_query['players'][0]
|
||||||
|
else:
|
||||||
|
mlb_player = await db_post(
|
||||||
|
'mlbplayers/one',
|
||||||
|
payload={
|
||||||
|
'first_name': row['use_name'],
|
||||||
|
'last_name': row['last_name'],
|
||||||
|
'key_mlbam': row['key_mlbam'],
|
||||||
|
'key_fangraphs': row['key_fangraphs'],
|
||||||
|
'key_bbref': row['key_bbref'],
|
||||||
|
'key_retro': row['key_retro']
|
||||||
|
}
|
||||||
|
)
|
||||||
|
new_player = await db_post({
|
||||||
|
'p_name': f'{row["use_name"]} {row["last_name"]}',
|
||||||
|
'cost': f'{bat_rat_df["cost"]}',
|
||||||
|
'image': f'change-me',
|
||||||
|
'mlbclub': CLUB_LIST[row['Tm']],
|
||||||
|
'franchise': FRANCHISE_LIST[row['Tm']],
|
||||||
|
'cardset_id': CARDSET_ID,
|
||||||
|
'set_num': int(float(row['key_fangraphs'])),
|
||||||
|
'rarity_id': bat_rat_df.loc[row['key_bbref']]['rarity_id'],
|
||||||
|
'pos_1': row['pos_1'],
|
||||||
|
'description': PLAYER_DESCRIPTION,
|
||||||
|
'bbref_id': row['key_bbref'],
|
||||||
|
'fangr_id': row['key_fangraphs'],
|
||||||
|
'mlbplayer_id': mlb_player['id']
|
||||||
|
})
|
||||||
|
|
||||||
|
if 'id' in new_player:
|
||||||
|
player_id = new_player['id']
|
||||||
|
else:
|
||||||
|
player_id = new_player['player_id']
|
||||||
|
|
||||||
|
await db_patch('players', object_id=player_id, params=[('image', f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}')])
|
||||||
|
|
||||||
|
all_bbref_ids.append(row['key_bbref'])
|
||||||
|
all_player_ids.append(player_id)
|
||||||
|
|
||||||
|
dev_count += 1
|
||||||
|
|
||||||
|
new_data = {'key_bbref': all_bbref_ids, 'player_id': all_player_ids}
|
||||||
|
players_df = pd.DataFrame(new_data)
|
||||||
|
return players_df
|
||||||
|
|
||||||
|
|
||||||
|
async def post_batter_data(bs: pd.DataFrame, bc: pd.DataFrame, br: pd.DataFrame, dr: pd.date_range) -> int:
|
||||||
|
pd_ids = await get_or_post_players(bs, bc, br, dr)
|
||||||
|
|
||||||
|
|
||||||
|
async def run_batters(data_input_path: str, start_date: int, end_date: int, post_data: bool = False):
|
||||||
print(f'Running the batter calcs...')
|
print(f'Running the batter calcs...')
|
||||||
batter_start = datetime.datetime.now()
|
batter_start = datetime.datetime.now()
|
||||||
|
|
||||||
@ -508,12 +662,11 @@ def run_batters(data_input_path: str, start_date: int, end_date: int):
|
|||||||
bs_len = len(batting_stats)
|
bs_len = len(batting_stats)
|
||||||
|
|
||||||
end_calc = datetime.datetime.now()
|
end_calc = datetime.datetime.now()
|
||||||
print(f'Batting stats: {(end_calc - batter_start).total_seconds():.2f}s')
|
print(f'Combined batting stats: {(end_calc - batter_start).total_seconds():.2f}s\n')
|
||||||
running_start = datetime.datetime.now()
|
running_start = datetime.datetime.now()
|
||||||
|
|
||||||
# Get running stats
|
# Get running stats
|
||||||
running_stats = get_run_stat_df(data_input_path)
|
running_stats = get_run_stat_df(data_input_path)
|
||||||
run_len = len(running_stats)
|
|
||||||
|
|
||||||
batting_stats = pd.merge(
|
batting_stats = pd.merge(
|
||||||
left=batting_stats,
|
left=batting_stats,
|
||||||
@ -533,7 +686,7 @@ def run_batters(data_input_path: str, start_date: int, end_date: int):
|
|||||||
all_batting_cards = calc_batting_cards(batting_stats)
|
all_batting_cards = calc_batting_cards(batting_stats)
|
||||||
card_end = datetime.datetime.now()
|
card_end = datetime.datetime.now()
|
||||||
|
|
||||||
print(f'Create batting cards: {(card_end - card_start).total_seconds()}s')
|
print(f'Create batting cards: {(card_end - card_start).total_seconds():.2f}s')
|
||||||
|
|
||||||
# Calculate batting ratings
|
# Calculate batting ratings
|
||||||
rating_start = datetime.datetime.now()
|
rating_start = datetime.datetime.now()
|
||||||
@ -541,29 +694,47 @@ def run_batters(data_input_path: str, start_date: int, end_date: int):
|
|||||||
all_batting_ratings = calc_batter_ratings(batting_stats)
|
all_batting_ratings = calc_batter_ratings(batting_stats)
|
||||||
rating_end = datetime.datetime.now()
|
rating_end = datetime.datetime.now()
|
||||||
|
|
||||||
print(f'Create batting ratings: {(rating_end - rating_start).total_seconds()}s')
|
print(f'Create batting ratings: {(rating_end - rating_start).total_seconds():.2f}s')
|
||||||
|
|
||||||
# Calculate defense ratings
|
# Calculate defense ratings
|
||||||
defense_start = datetime.datetime.now()
|
defense_start = datetime.datetime.now()
|
||||||
all_defense_ratings = calc_positions(batting_stats)
|
all_defense_ratings = calc_positions(batting_stats)
|
||||||
defense_end = datetime.datetime.now()
|
defense_end = datetime.datetime.now()
|
||||||
|
|
||||||
print(f'Create defense ratings: {(defense_end - defense_start).total_seconds()}s')
|
print(f'Create defense ratings: {(defense_end - defense_start).total_seconds():.2f}s')
|
||||||
|
|
||||||
|
# Post all data
|
||||||
|
if post_data:
|
||||||
|
print(f'Posting player data...')
|
||||||
|
post_start = datetime.datetime.now()
|
||||||
|
num_players = await post_batter_data(batting_stats, all_batting_cards, all_batting_ratings, all_defense_ratings)
|
||||||
|
post_end = datetime.datetime.now()
|
||||||
|
|
||||||
|
print(f'Post player data: {(post_end - post_start).total_seconds()}s')
|
||||||
|
|
||||||
|
post_msg = f'Posted {num_players} players to the database'
|
||||||
|
logging.info(post_msg)
|
||||||
|
print(post_msg)
|
||||||
|
else:
|
||||||
|
post_msg = f'Players are NOT being posted to the database'
|
||||||
|
logging.warning(post_msg)
|
||||||
|
print(post_msg)
|
||||||
|
|
||||||
return batting_stats
|
return batting_stats
|
||||||
|
|
||||||
|
|
||||||
async def main(args):
|
async def main(args):
|
||||||
# batter_start = datetime.datetime.now()
|
batter_start = datetime.datetime.now()
|
||||||
# batting_stats = run_batters(f'{DATA_INPUT_FILE_PATH}', start_date=19980101, end_date=19980430)
|
batting_stats = await run_batters(f'{DATA_INPUT_FILE_PATH}', start_date=19980101, end_date=19980430, post_data=True)
|
||||||
# batting_stats.to_csv(f'batting_stats.csv')
|
batting_stats.to_csv(f'batting_stats.csv')
|
||||||
# batter_end = datetime.datetime.now()
|
batter_end = datetime.datetime.now()
|
||||||
|
|
||||||
# pitcher_start = datetime.datetime.now()
|
pitcher_start = datetime.datetime.now()
|
||||||
# pitcher_end = datetime.datetime.now()
|
pitcher_end = datetime.datetime.now()
|
||||||
|
|
||||||
# print(f'\n\nBatter time: {(batter_end - batter_start).total_seconds():.2f}s \nPitcher time: {(pitcher_end - pitcher_start).total_seconds():.2f}s\nTotal: {(pitcher_end - batter_start).total_seconds():.2f}s\n\nDone!')
|
print(f'\n\nBatter time: {(batter_end - batter_start).total_seconds():.2f}s \nPitcher time: {(pitcher_end - pitcher_start).total_seconds():.2f}s\nTotal: {(pitcher_end - batter_start).total_seconds():.2f}s\n\nDone!')
|
||||||
await store_defense_to_csv(1998)
|
|
||||||
|
# await store_defense_to_csv(1998)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user