Added PA and AB to batter_stats
This commit is contained in:
parent
07faea0bc7
commit
1109a12434
1172
batting_stats.csv
1172
batting_stats.csv
File diff suppressed because it is too large
Load Diff
@ -12,11 +12,11 @@ date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetim
|
||||
log_level = logging.INFO
|
||||
logging.basicConfig(
|
||||
filename=f'logs/{date}.log',
|
||||
format='%(asctime)s - card-creation - %(levelname)s - %(message)s',
|
||||
format='%(asctime)s - retrosheet_data - %(levelname)s - %(message)s',
|
||||
level=log_level
|
||||
)
|
||||
FILE_PATH = 'data-input/retrosheet/'
|
||||
EVENTS_FILENAME = 'retrosheets_events_1998_short.csv'
|
||||
EVENTS_FILENAME = 'retrosheets_events_1998_short.csv' # Removed last few columns which were throwing dtype errors
|
||||
PERSONNEL_FILENAME = 'retrosheets_personnel.csv'
|
||||
|
||||
|
||||
@ -39,6 +39,17 @@ def get_batting_stats_by_date(start_date: int, end_date: int) -> pd.DataFrame:
|
||||
bs['batter_id'] = all_plays['batter_id'].unique()
|
||||
bs = bs.set_index('batter_id')
|
||||
|
||||
pal_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('PAvL')
|
||||
bs = pd.concat([bs, pal_series], axis=1)
|
||||
par_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('PAvR')
|
||||
bs = pd.concat([bs, par_series], axis=1)
|
||||
|
||||
|
||||
abl_series = all_plays[(all_plays.ab == 't') & (all_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('ABvL')
|
||||
bs = pd.concat([bs, abl_series], axis=1)
|
||||
abr_series = all_plays[(all_plays.ab == 't') & (all_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('ABvR')
|
||||
bs = pd.concat([bs, abr_series], axis=1)
|
||||
|
||||
# Basic counting stats
|
||||
for event_type, vs_hand, col_name in [
|
||||
('home run', 'r', 'HRvR'),
|
||||
@ -75,8 +86,16 @@ def get_batting_stats_by_date(start_date: int, end_date: int) -> pd.DataFrame:
|
||||
|
||||
|
||||
async def main(args):
|
||||
print(f'Running the calcs...')
|
||||
start = datetime.datetime.now()
|
||||
data = get_batting_stats_by_date(start_date=101, end_date=430)
|
||||
end_calc = datetime.datetime.now()
|
||||
|
||||
print(f'Saving to csv...')
|
||||
data.to_csv(f'batting_stats.csv')
|
||||
end = datetime.datetime.now()
|
||||
|
||||
print(f'Done!\n\nCalc time: {(end_calc - start).total_seconds()}s\nSave time: {(end - end_calc).total_seconds()}s\nTotal: {(end - start).total_seconds()}s')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Loading…
Reference in New Issue
Block a user