Add defense calcs

Begin work on posting data
This commit is contained in:
Cal Corum 2024-10-20 22:57:45 -05:00
parent eb79430de7
commit 44e8e22bc0

View File

@ -8,9 +8,11 @@ from typing import Literal
import pandas as pd import pandas as pd
import pybaseball as pb import pybaseball as pb
from pybaseball import cache from pybaseball import cache
import urllib
from creation_helpers import get_args from creation_helpers import get_args, CLUB_LIST, FRANCHISE_LIST
from batters.stat_prep import DataMismatchError from batters.stat_prep import DataMismatchError
from db_calls import DB_URL, db_get, db_patch, db_post
import batters.calcs_batter as cba import batters.calcs_batter as cba
import defenders.calcs_defense as cde import defenders.calcs_defense as cde
@ -27,11 +29,17 @@ RETRO_FILE_PATH = 'data-input/retrosheet/'
EVENTS_FILENAME = 'retrosheets_events_1998_short.csv' # Removed last few columns which were throwing dtype errors EVENTS_FILENAME = 'retrosheets_events_1998_short.csv' # Removed last few columns which were throwing dtype errors
PERSONNEL_FILENAME = 'retrosheets_personnel.csv' PERSONNEL_FILENAME = 'retrosheets_personnel.csv'
DATA_INPUT_FILE_PATH = 'data-input/1998 Season Cardset/' DATA_INPUT_FILE_PATH = 'data-input/1998 Season Cardset/'
CARD_BASE_URL = f'{DB_URL}/v2/players/'
start_time = datetime.datetime.now()
RELEASE_DIRECTORY = f'{start_time.year}-{start_time.month}-{start_time.day}'
MIN_PA_VL = 20 MIN_PA_VL = 20
MIN_PA_VR = 40 MIN_PA_VR = 40
MIN_TBF_VL = MIN_PA_VL MIN_TBF_VL = MIN_PA_VL
MIN_TBF_VR = MIN_PA_VR MIN_TBF_VR = MIN_PA_VR
CARDSET_ID = 20
PLAYER_DESCRIPTION = 'Live'
async def store_defense_to_csv(season: int): async def store_defense_to_csv(season: int):
@ -64,7 +72,7 @@ def get_run_stat_df(input_path: str):
if 'Name-additional' in run_data: if 'Name-additional' in run_data:
run_data = run_data.rename(columns={'Name-additional': 'key_bbref'}) run_data = run_data.rename(columns={'Name-additional': 'key_bbref'})
run_data = run_data[['key_bbref', 'ROE', 'XI', 'RS%', 'SBO', 'SB', 'CS', 'SB%', 'SB2', 'CS2', 'SB3', 'CS3', 'SBH', 'CSH', 'PO', 'PCS', 'OOB', 'OOB1', 'OOB2', 'OOB3', 'OOBHm', 'BT', 'XBT%', '1stS', '1stS2', '1stS3', '1stD', '1stD3', '1stDH', '2ndS', '2ndS3', '2ndSH']] run_data = run_data[['key_bbref', 'Tm', 'ROE', 'XI', 'RS%', 'SBO', 'SB', 'CS', 'SB%', 'SB2', 'CS2', 'SB3', 'CS3', 'SBH', 'CSH', 'PO', 'PCS', 'OOB', 'OOB1', 'OOB2', 'OOB3', 'OOBHm', 'BT', 'XBT%', '1stS', '1stS2', '1stS3', '1stD', '1stD3', '1stDH', '2ndS', '2ndS3', '2ndSH']]
run_data = run_data.fillna(0) run_data = run_data.fillna(0)
return run_data.set_index('key_bbref') return run_data.set_index('key_bbref')
@ -136,8 +144,6 @@ def get_player_ids(plays: pd.DataFrame, which: Literal['batters', 'pitchers']) -
def get_base_batting_df(all_plays: pd.DataFrame) -> pd.DataFrame: def get_base_batting_df(all_plays: pd.DataFrame) -> pd.DataFrame:
bs = get_player_ids(all_plays, 'batters') bs = get_player_ids(all_plays, 'batters')
# bs['key_mlbam'] = bs.apply()
pal_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vL') pal_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vL')
bs = pd.concat([bs, pal_series], axis=1) bs = pd.concat([bs, pal_series], axis=1)
par_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vR') par_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vR')
@ -160,6 +166,11 @@ def get_batting_stats_by_date(retro_file_path, start_date: int, end_date: int) -
batting_stats = get_base_batting_df(all_plays) batting_stats = get_base_batting_df(all_plays)
print(f'Get base dataframe: {(datetime.datetime.now() - start).total_seconds():.2f}s') print(f'Get base dataframe: {(datetime.datetime.now() - start).total_seconds():.2f}s')
start = datetime.datetime.now()
all_player_ids = batting_stats['key_retro']
all_plays = all_plays[all_plays['batter_id'].isin(all_player_ids)]
print(f'Shrink all_plays: {(datetime.datetime.now() - start).total_seconds():.2f}s')
# Basic counting stats # Basic counting stats
start = datetime.datetime.now() start = datetime.datetime.now()
for event_type, vs_hand, col_name in [ for event_type, vs_hand, col_name in [
@ -317,6 +328,8 @@ def get_batting_stats_by_date(retro_file_path, start_date: int, end_date: int) -
batting_stats['Oppo%_vL'] = round(1 - batting_stats['Pull%_vL'] - batting_stats['Cent%_vL'], 5) batting_stats['Oppo%_vL'] = round(1 - batting_stats['Pull%_vL'] - batting_stats['Cent%_vL'], 5)
batting_stats['Oppo%_vR'] = round(1 - batting_stats['Pull%_vR'] - batting_stats['Cent%_vR'], 5) batting_stats['Oppo%_vR'] = round(1 - batting_stats['Pull%_vR'] - batting_stats['Cent%_vR'], 5)
batting_stats = batting_stats.fillna(0)
print(f'Calculated fields: {(datetime.datetime.now() - start).total_seconds():.2f}s') print(f'Calculated fields: {(datetime.datetime.now() - start).total_seconds():.2f}s')
return batting_stats return batting_stats
@ -349,6 +362,7 @@ def calc_batting_cards(bs: pd.DataFrame) -> pd.DataFrame:
return y.loc[0] return y.loc[0]
all_cards = bs.apply(create_batting_card, axis=1) all_cards = bs.apply(create_batting_card, axis=1)
all_cards = all_cards.set_index('key_bbref')
return all_cards return all_cards
@ -356,150 +370,290 @@ def calc_batting_cards(bs: pd.DataFrame) -> pd.DataFrame:
def calc_batter_ratings(bs: pd.DataFrame) -> pd.DataFrame: def calc_batter_ratings(bs: pd.DataFrame) -> pd.DataFrame:
def create_batting_rating(row): def create_batting_rating(row):
ratings = cba.get_batter_ratings(row) ratings = cba.get_batter_ratings(row)
# list_of_ratings = ratings[0] ops_vl = ratings[0]['obp'] + ratings[0]['slg']
ops_vr = ratings[1]['obp'] + ratings[1]['slg']
total_ops = (ops_vl + ops_vr + min(ops_vr, ops_vl)) / 3
def calc_cost(total_ops, base_cost, base_ops, max_delta) -> int:
delta = ((total_ops - base_ops) / 0.1) * 2
if delta < 1:
delta = (max_delta * (1 - (total_ops / base_ops))) * -0.1
final_cost = base_cost + (max_delta * delta)
return round(final_cost)
if total_ops >= 1.2:
rarity_id = 99
cost = calc_cost(total_ops, base_cost=2400, base_ops=1.215, max_delta=810)
elif total_ops >= 1:
rarity_id = 1
cost = calc_cost(total_ops, base_cost=810, base_ops=1.05, max_delta=270)
elif total_ops >= 0.9:
rarity_id = 2
cost = calc_cost(total_ops, base_cost=270, base_ops=0.95, max_delta=90)
elif total_ops >= 0.8:
rarity_id = 3
cost = calc_cost(total_ops, base_cost=90, base_ops=0.85, max_delta=30)
elif total_ops >= 0.7:
rarity_id = 4
cost = calc_cost(total_ops, base_cost=30, base_ops=0.75, max_delta=10)
else:
rarity_id = 5
cost = calc_cost(total_ops, base_cost=10, base_ops=0.61, max_delta=8)
x = pd.DataFrame({ x = pd.DataFrame({
'key_bbref': [row['key_bbref']], 'key_bbref': [row['key_bbref']],
'ratings_vL': [ratings[0]], 'ratings_vL': [ratings[0]],
'ratings_vR': [ratings[1]] 'ratings_vR': [ratings[1]],
'ops_vL': ops_vl,
'ops_vR': ops_vr,
'total_ops': total_ops,
'rarity_id': rarity_id,
'cost': cost
}) })
return x.loc[0] return x.loc[0]
all_ratings = bs.apply(create_batting_rating, axis=1) all_ratings = bs.apply(create_batting_rating, axis=1)
all_ratings = all_ratings.set_index('key_bbref')
return all_ratings return all_ratings
def calc_positions(bs: pd.DataFrame) -> pd.DataFrame: def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
df_c = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_c.csv').set_index('key_bbref')
df_1b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_1b.csv').set_index('key_bbref')
df_2b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_2b.csv').set_index('key_bbref')
df_3b = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_3b.csv').set_index('key_bbref')
df_ss = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_ss.csv').set_index('key_bbref')
df_lf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_lf.csv').set_index('key_bbref')
df_cf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_cf.csv').set_index('key_bbref')
df_rf = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_rf.csv').set_index('key_bbref')
df_of = pd.read_csv(f'{DATA_INPUT_FILE_PATH}defense_of.csv').set_index('key_bbref')
season_pct = 1.0
all_pos = []
def process_pos(row): def process_pos(row):
no_data = True no_data = True
for pos_data in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]: # TODO: Add pos_1 through pos_8 to def df to be pulled in at post time
if row['key_bbref'] in pos_data[0].index: for pos_df, position in [(df_1b, '1b'), (df_2b, '2b'), (df_3b, '3b'), (df_ss, 'ss')]:
logging.info(f'Running {pos_data[1]} stats for {row["p_name"]}') if row['key_bbref'] in pos_df.index:
logging.info(f'Running {position} stats for {row["use_name"]} {row["last_name"]}')
try: try:
average_range = (int(pos_data[0].at[row["key_bbref"], 'tz_runs_total']) + if 'tz_runs_total' in row:
int(pos_data[0].at[row["key_bbref"], 'bis_runs_total']) + average_range = (int(pos_df.at[row["key_bbref"], 'tz_runs_total']) +
min( int(pos_df.at[row["key_bbref"], 'bis_runs_total']) +
int(pos_data[0].at[row["key_bbref"], 'tz_runs_total']), min(
int(pos_data[0].at[row["key_bbref"], 'bis_runs_total']) int(pos_df.at[row["key_bbref"], 'tz_runs_total']),
)) / 3 int(pos_df.at[row["key_bbref"], 'bis_runs_total'])
)) / 3
else:
average_range = pos_df.at[row["key_bbref"], 'tz_runs_total']
position_payload.append({ # TODO: convert position_payload to a list? if float(pos_df.at[row["key_bbref"], 'Inn_def']) >= 10.0:
"player_id": int(row['player_id']), all_pos.append({
"position": pos_data[1].upper(), "key_bbref": row['key_bbref'],
"innings": float(pos_data[0].at[row["key_bbref"], 'Inn_def']), "position": position.upper(),
"range": get_if_range( "innings": float(pos_df.at[row["key_bbref"], 'Inn_def']),
pos_code=pos_data[1], "range": cde.get_if_range(
tz_runs=round(average_range), pos_code=position,
r_dp=0, tz_runs=round(average_range),
season_pct=season_pct r_dp=0,
), season_pct=season_pct
"error": get_any_error( ),
pos_code=pos_data[1], "error": cde.get_any_error(
errors=int(pos_data[0].at[row["key_bbref"], 'E_def']), pos_code=position,
chances=int(pos_data[0].at[row["key_bbref"], 'chances']), errors=int(pos_df.at[row["key_bbref"], 'E_def']),
season_pct=season_pct chances=int(pos_df.at[row["key_bbref"], 'chances']),
) season_pct=season_pct
}) )
no_data = False })
no_data = False
except Exception as e: except Exception as e:
logging.info(f'Infield position failed: {e}') logging.info(f'Infield position failed: {e}')
of_arms = [] of_arms = []
of_payloads = [] of_payloads = []
for pos_data in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]: for pos_df, position in [(df_lf, 'lf'), (df_cf, 'cf'), (df_rf, 'rf')]:
if row["key_bbref"] in pos_data[0].index: if row["key_bbref"] in pos_df.index:
try: try:
average_range = (int(pos_data[0].at[row["key_bbref"], 'tz_runs_total']) + if 'tz_runs_total' in row:
int(pos_data[0].at[row["key_bbref"], 'bis_runs_total']) + average_range = (int(pos_df.at[row["key_bbref"], 'tz_runs_total']) +
min( int(pos_df.at[row["key_bbref"], 'bis_runs_total']) +
int(pos_data[0].at[row["key_bbref"], 'tz_runs_total']), min(
int(pos_data[0].at[row["key_bbref"], 'bis_runs_total']) int(pos_df.at[row["key_bbref"], 'tz_runs_total']),
)) / 3 int(pos_df.at[row["key_bbref"], 'bis_runs_total'])
of_payloads.append({ )) / 3
"player_id": int(row['player_id']), else:
"position": pos_data[1].upper(), average_range = pos_df.at[row["key_bbref"], 'tz_runs_total']
"innings": float(pos_data[0].at[row["key_bbref"], 'Inn_def']),
"range": get_of_range( if float(pos_df.at[row["key_bbref"], 'Inn_def']) >= 10.0:
pos_code=pos_data[1], of_payloads.append({
tz_runs=round(average_range), "key_bbref": row['key_bbref'],
season_pct=season_pct "position": position.upper(),
) "innings": float(pos_df.at[row["key_bbref"], 'Inn_def']),
}) "range": cde.get_of_range(
of_arms.append(int(pos_data[0].at[row["key_bbref"], 'bis_runs_outfield'])) pos_code=position,
no_data = False tz_runs=round(average_range),
season_pct=season_pct
)
})
of_run_rating = 'bis_runs_outfield' if 'bis_runs_outfield' in pos_df else 'tz_runs_outfield'
of_arms.append(int(pos_df.at[row["key_bbref"], of_run_rating]))
no_data = False
except Exception as e: except Exception as e:
logging.info(f'Outfield position failed: {e}') logging.info(f'Outfield position failed: {e}')
if row["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0: if row["key_bbref"] in df_of.index and len(of_arms) > 0 and len(of_payloads) > 0:
try: try:
error_rating = get_any_error( error_rating = cde.get_any_error(
pos_code=pos_data[1], pos_code=position,
errors=int(df_of.at[row["key_bbref"], 'E_def']), errors=int(df_of.at[row["key_bbref"], 'E_def']),
chances=int(df_of.at[row["key_bbref"], 'chances']), chances=int(df_of.at[row["key_bbref"], 'chances']),
season_pct=season_pct season_pct=season_pct
) )
arm_rating = arm_outfield(of_arms) arm_rating = cde.arm_outfield(of_arms)
for f in of_payloads: for f in of_payloads:
f['error'] = error_rating f['error'] = error_rating
f['arm'] = arm_rating f['arm'] = arm_rating
position_payload.append(f) all_pos.append(f)
no_data = False no_data = False
except Exception as e: except Exception as e:
logging.info(f'Outfield position failed: {e}') logging.info(f'Outfield position failed: {e}')
if row["key_bbref"] in df_c.index: if row["key_bbref"] in df_c.index:
try: try:
run_rating = 'bis_runs_catcher_sb' if 'bis_runs_catcher_sb' in df_c else 'tz_runs_catcher'
if df_c.at[row["key_bbref"], 'SB'] + df_c.at[row["key_bbref"], 'CS'] == 0: if df_c.at[row["key_bbref"], 'SB'] + df_c.at[row["key_bbref"], 'CS'] == 0:
arm_rating = 3 arm_rating = 3
else: else:
arm_rating = arm_catcher( arm_rating = cde.arm_catcher(
cs_pct=df_c.at[row["key_bbref"], 'caught_stealing_perc'], cs_pct=df_c.at[row["key_bbref"], 'caught_stealing_perc'],
raa=int(df_c.at[row["key_bbref"], 'bis_runs_catcher_sb']), raa=int(df_c.at[row["key_bbref"], run_rating]),
season_pct=season_pct season_pct=season_pct
) )
position_payload.append({
"player_id": int(row['player_id']), if float(df_c.at[row["key_bbref"], 'Inn_def']) >= 10.0:
"position": 'C', all_pos.append({
"innings": float(df_c.at[row["key_bbref"], 'Inn_def']), "key_bbref": row['key_bbref'],
"range": range_catcher( "position": 'C',
rs_value=int(df_c.at[row["key_bbref"], 'tz_runs_catcher']), "innings": float(df_c.at[row["key_bbref"], 'Inn_def']),
season_pct=season_pct "range": cde.range_catcher(
), rs_value=int(df_c.at[row["key_bbref"], 'tz_runs_catcher']),
"error": get_any_error( season_pct=season_pct
pos_code='c', ),
errors=int(df_c.at[row["key_bbref"], 'E_def']), "error": cde.get_any_error(
chances=int(df_c.at[row["key_bbref"], 'chances']), pos_code='c',
season_pct=season_pct errors=int(df_c.at[row["key_bbref"], 'E_def']),
), chances=int(df_c.at[row["key_bbref"], 'chances']),
"arm": arm_rating, season_pct=season_pct
"pb": pb_catcher( ),
pb=int(df_c.at[row["key_bbref"], 'PB']), "arm": arm_rating,
innings=int(float(df_c.at[row["key_bbref"], 'Inn_def'])), "pb": cde.pb_catcher(
season_pct=season_pct pb=int(df_c.at[row["key_bbref"], 'PB']),
), innings=int(float(df_c.at[row["key_bbref"], 'Inn_def'])),
"overthrow": ot_catcher( season_pct=season_pct
errors=int(df_c.at[row["key_bbref"], 'E_def']), ),
chances=int(df_c.at[row["key_bbref"], 'chances']), "overthrow": cde.ot_catcher(
season_pct=season_pct errors=int(df_c.at[row["key_bbref"], 'E_def']),
) chances=int(df_c.at[row["key_bbref"], 'chances']),
}) season_pct=season_pct
no_data = False )
})
no_data = False
except Exception as e: except Exception as e:
logging.info(f'Catcher position failed: {e}') logging.info(f'Catcher position failed: {e}')
if no_data: if no_data:
position_payload.append({ all_pos.append({
"player_id": int(row['player_id']), "key_bbref": row['key_bbref'],
"position": 'DH', "position": 'DH',
"innings": row['PA_vL'] + row['PA_vR'] "innings": row['PA_vL'] + row['PA_vR']
}) })
all_pos = bs.apply(process_pos, axis=1) bs.apply(process_pos, axis=1)
pos_df = pd.DataFrame(all_pos)
pos_df = pos_df.set_index('key_bbref')
return all_pos return pos_df
def run_batters(data_input_path: str, start_date: int, end_date: int): async def get_or_post_players(stat_df: pd.DataFrame, bat_card_df: pd.DataFrame, bat_rat_df: pd.DataFrame, def_rat_df: pd.DataFrame) -> pd.DataFrame:
# Columns: bbref_id, player_id
all_bbref_ids = []
all_player_ids = []
dev_count = 0
for index, row in stat_df.iterrows():
if dev_count > 0:
break
p_query = await db_get('players', params=[('key_bbref', row["key_bbref"]), ('cardset_id', CARDSET_ID)])
if p_query['count'] > 0:
this_record = p_query['players'][0]
if 'id' in this_record:
player_id = this_record['id']
else:
player_id = this_record['player_id']
all_bbref_ids.append(row['key_bbref'])
all_player_ids.append(player_id)
else:
mlb_query = await db_get('mlbplayers', params=[('key_retro', row['key_retro'])])
if mlb_query['count'] > 0:
mlb_player = mlb_query['players'][0]
else:
mlb_player = await db_post(
'mlbplayers/one',
payload={
'first_name': row['use_name'],
'last_name': row['last_name'],
'key_mlbam': row['key_mlbam'],
'key_fangraphs': row['key_fangraphs'],
'key_bbref': row['key_bbref'],
'key_retro': row['key_retro']
}
)
new_player = await db_post({
'p_name': f'{row["use_name"]} {row["last_name"]}',
'cost': f'{bat_rat_df["cost"]}',
'image': f'change-me',
'mlbclub': CLUB_LIST[row['Tm']],
'franchise': FRANCHISE_LIST[row['Tm']],
'cardset_id': CARDSET_ID,
'set_num': int(float(row['key_fangraphs'])),
'rarity_id': bat_rat_df.loc[row['key_bbref']]['rarity_id'],
'pos_1': row['pos_1'],
'description': PLAYER_DESCRIPTION,
'bbref_id': row['key_bbref'],
'fangr_id': row['key_fangraphs'],
'mlbplayer_id': mlb_player['id']
})
if 'id' in new_player:
player_id = new_player['id']
else:
player_id = new_player['player_id']
await db_patch('players', object_id=player_id, params=[('image', f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}')])
all_bbref_ids.append(row['key_bbref'])
all_player_ids.append(player_id)
dev_count += 1
new_data = {'key_bbref': all_bbref_ids, 'player_id': all_player_ids}
players_df = pd.DataFrame(new_data)
return players_df
async def post_batter_data(bs: pd.DataFrame, bc: pd.DataFrame, br: pd.DataFrame, dr: pd.date_range) -> int:
pd_ids = await get_or_post_players(bs, bc, br, dr)
async def run_batters(data_input_path: str, start_date: int, end_date: int, post_data: bool = False):
print(f'Running the batter calcs...') print(f'Running the batter calcs...')
batter_start = datetime.datetime.now() batter_start = datetime.datetime.now()
@ -508,12 +662,11 @@ def run_batters(data_input_path: str, start_date: int, end_date: int):
bs_len = len(batting_stats) bs_len = len(batting_stats)
end_calc = datetime.datetime.now() end_calc = datetime.datetime.now()
print(f'Batting stats: {(end_calc - batter_start).total_seconds():.2f}s') print(f'Combined batting stats: {(end_calc - batter_start).total_seconds():.2f}s\n')
running_start = datetime.datetime.now() running_start = datetime.datetime.now()
# Get running stats # Get running stats
running_stats = get_run_stat_df(data_input_path) running_stats = get_run_stat_df(data_input_path)
run_len = len(running_stats)
batting_stats = pd.merge( batting_stats = pd.merge(
left=batting_stats, left=batting_stats,
@ -533,7 +686,7 @@ def run_batters(data_input_path: str, start_date: int, end_date: int):
all_batting_cards = calc_batting_cards(batting_stats) all_batting_cards = calc_batting_cards(batting_stats)
card_end = datetime.datetime.now() card_end = datetime.datetime.now()
print(f'Create batting cards: {(card_end - card_start).total_seconds()}s') print(f'Create batting cards: {(card_end - card_start).total_seconds():.2f}s')
# Calculate batting ratings # Calculate batting ratings
rating_start = datetime.datetime.now() rating_start = datetime.datetime.now()
@ -541,29 +694,47 @@ def run_batters(data_input_path: str, start_date: int, end_date: int):
all_batting_ratings = calc_batter_ratings(batting_stats) all_batting_ratings = calc_batter_ratings(batting_stats)
rating_end = datetime.datetime.now() rating_end = datetime.datetime.now()
print(f'Create batting ratings: {(rating_end - rating_start).total_seconds()}s') print(f'Create batting ratings: {(rating_end - rating_start).total_seconds():.2f}s')
# Calculate defense ratings # Calculate defense ratings
defense_start = datetime.datetime.now() defense_start = datetime.datetime.now()
all_defense_ratings = calc_positions(batting_stats) all_defense_ratings = calc_positions(batting_stats)
defense_end = datetime.datetime.now() defense_end = datetime.datetime.now()
print(f'Create defense ratings: {(defense_end - defense_start).total_seconds()}s') print(f'Create defense ratings: {(defense_end - defense_start).total_seconds():.2f}s')
# Post all data
if post_data:
print(f'Posting player data...')
post_start = datetime.datetime.now()
num_players = await post_batter_data(batting_stats, all_batting_cards, all_batting_ratings, all_defense_ratings)
post_end = datetime.datetime.now()
print(f'Post player data: {(post_end - post_start).total_seconds()}s')
post_msg = f'Posted {num_players} players to the database'
logging.info(post_msg)
print(post_msg)
else:
post_msg = f'Players are NOT being posted to the database'
logging.warning(post_msg)
print(post_msg)
return batting_stats return batting_stats
async def main(args): async def main(args):
# batter_start = datetime.datetime.now() batter_start = datetime.datetime.now()
# batting_stats = run_batters(f'{DATA_INPUT_FILE_PATH}', start_date=19980101, end_date=19980430) batting_stats = await run_batters(f'{DATA_INPUT_FILE_PATH}', start_date=19980101, end_date=19980430, post_data=True)
# batting_stats.to_csv(f'batting_stats.csv') batting_stats.to_csv(f'batting_stats.csv')
# batter_end = datetime.datetime.now() batter_end = datetime.datetime.now()
# pitcher_start = datetime.datetime.now() pitcher_start = datetime.datetime.now()
# pitcher_end = datetime.datetime.now() pitcher_end = datetime.datetime.now()
# print(f'\n\nBatter time: {(batter_end - batter_start).total_seconds():.2f}s \nPitcher time: {(pitcher_end - pitcher_start).total_seconds():.2f}s\nTotal: {(pitcher_end - batter_start).total_seconds():.2f}s\n\nDone!') print(f'\n\nBatter time: {(batter_end - batter_start).total_seconds():.2f}s \nPitcher time: {(pitcher_end - pitcher_start).total_seconds():.2f}s\nTotal: {(pitcher_end - batter_start).total_seconds():.2f}s\n\nDone!')
await store_defense_to_csv(1998)
# await store_defense_to_csv(1998)
if __name__ == '__main__': if __name__ == '__main__':