Updated mround to return float
Counting stats nearly complete for batters
This commit is contained in:
parent
1109a12434
commit
0de2239100
1172
batting_stats.csv
1172
batting_stats.csv
File diff suppressed because it is too large
Load Diff
@ -570,7 +570,8 @@ def get_pitching_peripherals(season: int):
|
||||
|
||||
|
||||
def mround(x, prec=2, base=.05):
|
||||
return round(base * round(float(x) / base), prec)
|
||||
return float(round(Decimal(str(x)) / Decimal(str(base))) * Decimal(str(base)))
|
||||
# return round(base * round(float(x) / base), prec)
|
||||
|
||||
|
||||
def chances_from_row(row_num):
|
||||
@ -913,7 +914,6 @@ def legacy_sanitize_chance_output(total_chances: float, min_chances: float = 1.0
|
||||
return x
|
||||
|
||||
|
||||
|
||||
def mlbteam_and_franchise(mlbam_playerid):
|
||||
api_url = f'https://statsapi.mlb.com/api/v1/people/{mlbam_playerid}?hydrate=currentTeam'
|
||||
logging.info(f'Calling {api_url}')
|
||||
|
||||
@ -8,6 +8,8 @@ from typing import Literal
|
||||
import pandas as pd
|
||||
import pybaseball as pb
|
||||
|
||||
from creation_helpers import get_all_pybaseball_ids
|
||||
|
||||
date = f'{datetime.datetime.now().year}-{datetime.datetime.now().month}-{datetime.datetime.now().day}'
|
||||
log_level = logging.INFO
|
||||
logging.basicConfig(
|
||||
@ -20,8 +22,8 @@ EVENTS_FILENAME = 'retrosheets_events_1998_short.csv' # Removed last few column
|
||||
PERSONNEL_FILENAME = 'retrosheets_personnel.csv'
|
||||
|
||||
|
||||
def get_events_by_date(start_date: int, end_date: int) -> pd.DataFrame:
|
||||
all_plays = pd.read_csv(f'{FILE_PATH}{EVENTS_FILENAME}', dtype={'game_id': 'str'})
|
||||
def get_events_by_date(file_path: str, start_date: int, end_date: int) -> pd.DataFrame:
|
||||
all_plays = pd.read_csv(f'{file_path}', dtype={'game_id': 'str'})
|
||||
all_plays['date'] = all_plays['game_id'].str[7:-1].astype(int)
|
||||
date_plays = all_plays[(all_plays.date >= start_date) & (all_plays.date <= end_date)]
|
||||
return date_plays
|
||||
@ -32,70 +34,111 @@ def get_result_series(plays: pd.DataFrame, event_type: str, pitcher_hand: Litera
|
||||
return this_series
|
||||
|
||||
|
||||
def get_batting_stats_by_date(start_date: int, end_date: int) -> pd.DataFrame:
|
||||
all_plays = get_events_by_date(start_date, end_date)
|
||||
# def get_batting_handedness(plays: pd.DataFrame) -> pd.DataFrame:
|
||||
|
||||
|
||||
bs = pd.DataFrame()
|
||||
bs['batter_id'] = all_plays['batter_id'].unique()
|
||||
bs = bs.set_index('batter_id')
|
||||
|
||||
pal_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('PAvL')
|
||||
def get_player_ids(plays: pd.DataFrame, which: Literal['batters', 'pitchers']) -> pd.DataFrame:
|
||||
RETRO_PLAYERS = pd.read_csv(f'{FILE_PATH}{PERSONNEL_FILENAME}')
|
||||
id_key = 'batter_id' if which == 'batters' else 'pitcher_id'
|
||||
|
||||
players = pd.DataFrame()
|
||||
unique_players = pd.Series(plays[id_key].unique()).to_frame('id')
|
||||
players = pd.merge(
|
||||
left=RETRO_PLAYERS,
|
||||
right=unique_players,
|
||||
how='right',
|
||||
left_on='id',
|
||||
right_on='id'
|
||||
).rename(columns={'id': id_key})
|
||||
|
||||
def get_pids(row):
|
||||
# return get_all_pybaseball_ids([row[id_key]], 'retro', full_name=f'{row["use_name"]} {row["last_name"]}')
|
||||
pull = pb.playerid_reverse_lookup([row[id_key]], key_type='retro')
|
||||
if len(pull.values) == 0:
|
||||
print(f'Could not find id {row[id_key]} in pybaseball lookup')
|
||||
return pull.loc[0][['key_mlbam', 'key_retro', 'key_bbref', 'key_fangraphs']]
|
||||
|
||||
players = players[[id_key, 'last_name', 'use_name']]
|
||||
start_time = datetime.datetime.now()
|
||||
other_ids = players.apply(get_pids, axis=1)
|
||||
end_time = datetime.datetime.now()
|
||||
print(f'ID lookup time: {(end_time - start_time).total_seconds():.2f}s')
|
||||
|
||||
players = pd.merge(
|
||||
left=players,
|
||||
right=other_ids,
|
||||
left_on=id_key,
|
||||
right_on='key_retro'
|
||||
)
|
||||
players = players.set_index(id_key)
|
||||
|
||||
return players
|
||||
|
||||
|
||||
def get_base_batting_df(all_plays: pd.DataFrame) -> pd.DataFrame:
|
||||
bs = get_player_ids(all_plays, 'batters')
|
||||
|
||||
# bs['key_mlbam'] = bs.apply()
|
||||
|
||||
pal_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vL')
|
||||
bs = pd.concat([bs, pal_series], axis=1)
|
||||
par_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('PAvR')
|
||||
par_series = all_plays[(all_plays.batter_event == 't') & (all_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('PA_vR')
|
||||
bs = pd.concat([bs, par_series], axis=1)
|
||||
|
||||
|
||||
abl_series = all_plays[(all_plays.ab == 't') & (all_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('ABvL')
|
||||
abl_series = all_plays[(all_plays.ab == 't') & (all_plays.pitcher_hand == 'l')].groupby('batter_id').count()['event_type'].astype(int).rename('AB_vL')
|
||||
bs = pd.concat([bs, abl_series], axis=1)
|
||||
abr_series = all_plays[(all_plays.ab == 't') & (all_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('ABvR')
|
||||
abr_series = all_plays[(all_plays.ab == 't') & (all_plays.pitcher_hand == 'r')].groupby('batter_id').count()['event_type'].astype(int).rename('AB_vR')
|
||||
bs = pd.concat([bs, abr_series], axis=1)
|
||||
|
||||
# Basic counting stats
|
||||
for event_type, vs_hand, col_name in [
|
||||
('home run', 'r', 'HRvR'),
|
||||
('home run', 'l', 'HRvL'),
|
||||
('single', 'r', '1BvR'),
|
||||
('single', 'l', '1BvL'),
|
||||
('double', 'r', '2BvR'),
|
||||
('double', 'l', '2BvL'),
|
||||
('triple', 'r', '3BvR'),
|
||||
('triple', 'l', '3BvL'),
|
||||
('walk', 'r', 'BBvR'),
|
||||
('walk', 'l', 'BBvL'),
|
||||
('strikeout', 'r', 'SOvR'),
|
||||
('strikeout', 'l', 'SOvL'),
|
||||
('hit by pitch', 'r', 'HBPvR'),
|
||||
('hit by pitch', 'l', 'HBPvL')
|
||||
]:
|
||||
this_series = get_result_series(all_plays, event_type, vs_hand, col_name)
|
||||
bs = pd.concat([bs, this_series], axis=1)
|
||||
|
||||
# Bespoke queries
|
||||
|
||||
# fill na to 0 following counting stats
|
||||
bs = bs.fillna(0)
|
||||
|
||||
return bs
|
||||
|
||||
|
||||
# def get_batting_stat_range(start_month: int, start_day: int, end_month: int, end_day: int):
|
||||
# return get_batting_stats_by_date(
|
||||
# start_date=start_month * 100 + start_day,
|
||||
# end_date=end_month * 100 + end_day
|
||||
# )
|
||||
def get_batting_stats_by_date(file_path, start_date: int, end_date: int) -> pd.DataFrame:
|
||||
all_plays = get_events_by_date(file_path, start_date, end_date)
|
||||
|
||||
batting_stats = get_base_batting_df(all_plays)
|
||||
|
||||
# Basic counting stats
|
||||
for event_type, vs_hand, col_name in [
|
||||
('home run', 'r', 'HR_vR'),
|
||||
('home run', 'l', 'HR_vL'),
|
||||
('single', 'r', '1B_vR'),
|
||||
('single', 'l', '1B_vL'),
|
||||
('double', 'r', '2B_vR'),
|
||||
('double', 'l', '2B_vL'),
|
||||
('triple', 'r', '3B_vR'),
|
||||
('triple', 'l', '3B_vL'),
|
||||
('walk', 'r', 'BB_vR'),
|
||||
('walk', 'l', 'BB_vL'),
|
||||
('strikeout', 'r', 'SO_vR'),
|
||||
('strikeout', 'l', 'SO_vL'),
|
||||
('hit by pitch', 'r', 'HBP_vR'),
|
||||
('hit by pitch', 'l', 'HBP_vL')
|
||||
]:
|
||||
this_series = get_result_series(all_plays, event_type, vs_hand, col_name)
|
||||
batting_stats = pd.concat([batting_stats, this_series], axis=1)
|
||||
|
||||
# Bespoke queries
|
||||
# Remaining:
|
||||
|
||||
# fill na to 0 following counting stats
|
||||
batting_stats = batting_stats.fillna(0)
|
||||
|
||||
return batting_stats
|
||||
|
||||
|
||||
async def main(args):
|
||||
print(f'Running the calcs...')
|
||||
start = datetime.datetime.now()
|
||||
data = get_batting_stats_by_date(start_date=101, end_date=430)
|
||||
data = get_batting_stats_by_date(f'{FILE_PATH}{EVENTS_FILENAME}', start_date=101, end_date=430)
|
||||
end_calc = datetime.datetime.now()
|
||||
|
||||
print(f'Saving to csv...')
|
||||
data.to_csv(f'batting_stats.csv')
|
||||
end = datetime.datetime.now()
|
||||
|
||||
print(f'Done!\n\nCalc time: {(end_calc - start).total_seconds()}s\nSave time: {(end - end_calc).total_seconds()}s\nTotal: {(end - start).total_seconds()}s')
|
||||
print(f'Stat calc time: {(end_calc - start).total_seconds():.2f}s\nSave time: {(end - end_calc).total_seconds():.2f}s\nTotal: {(end - start).total_seconds():.2f}s\n\nDone!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
197418
tests/data-input/retrosheets_events_1998_short.csv
Normal file
197418
tests/data-input/retrosheets_events_1998_short.csv
Normal file
File diff suppressed because it is too large
Load Diff
25826
tests/data-input/retrosheets_personnel.csv
Normal file
25826
tests/data-input/retrosheets_personnel.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,56 +1,9 @@
|
||||
from decimal import ROUND_HALF_EVEN, Decimal
|
||||
import math
|
||||
|
||||
from batters.calcs_batter import bp_singles, wh_singles, sanitize_chance_output
|
||||
from creation_helpers import sanitize_chance_output, mround
|
||||
from batters.calcs_batter import bp_singles, wh_singles
|
||||
|
||||
|
||||
def test_sanitize():
|
||||
# def my_round_decimal(i: float):
|
||||
# return Decimal(i).quantize(Decimal('0.05'), ROUND_HALF_EVEN)
|
||||
|
||||
# def my_round(num: float, to: float = 0.05):
|
||||
# num, to = Decimal(str(num)), Decimal(str(to))
|
||||
# return float(round(num / to) * to)
|
||||
|
||||
# assert my_round(6) == 6
|
||||
# assert my_round(5.96) == 5.95
|
||||
# assert my_round(5.84) == 5.85
|
||||
# assert my_round(3.123) == 3.1
|
||||
# assert math.floor(my_round(6)) == 6
|
||||
# assert math.floor(my_round(5.96)) == 5
|
||||
|
||||
assert sanitize_chance_output(6) == 6.0
|
||||
assert sanitize_chance_output(1.21) == 1.2
|
||||
assert sanitize_chance_output(4.77) == 4.75
|
||||
assert sanitize_chance_output(4.78) == 4.8
|
||||
|
||||
# step_1 = Decimal(6) / Decimal(0.05)
|
||||
# step_1_5 = round(step_1, )
|
||||
# step_2 = round(step_1)
|
||||
# step_3 = float(step_2 * Decimal(0.05))
|
||||
# step_4 = Decimal(step_3)
|
||||
|
||||
# assert Decimal(6) == Decimal(6).quantize(Decimal('0.05'), ROUND_HALF_EVEN)
|
||||
|
||||
# assert round(step_1) == 120
|
||||
# # assert step_1 == 120
|
||||
# assert step_1_5 == 120
|
||||
# assert step_2 == 120
|
||||
# assert step_3 == 6.0
|
||||
# assert step_4 == Decimal('6.0')
|
||||
|
||||
# rounded_val = step_4.quantize(Decimal("0.05"), ROUND_HALF_EVEN)
|
||||
# assert rounded_val == 6
|
||||
# assert sanitize_chance_output(6) == 6
|
||||
|
||||
|
||||
def test_mround():
|
||||
assert mround(6) == 6.0
|
||||
assert mround(1.21) == 1.2
|
||||
assert mround(4.77) == 4.75
|
||||
assert mround(4.78) == 4.8
|
||||
|
||||
|
||||
def test_decimals():
|
||||
assert Decimal(8) == 8
|
||||
|
||||
@ -1,7 +1,52 @@
|
||||
from creation_helpers import pd_positions_df
|
||||
from creation_helpers import pd_positions_df, mround, sanitize_chance_output
|
||||
|
||||
|
||||
def test_positions_df():
|
||||
cardset_19_pos = pd_positions_df(19)
|
||||
|
||||
assert True == True
|
||||
|
||||
|
||||
def test_mround():
|
||||
assert mround(6.4) == 6.4
|
||||
assert mround(6.66) == 6.65
|
||||
|
||||
|
||||
def test_sanitize():
|
||||
# def my_round_decimal(i: float):
|
||||
# return Decimal(i).quantize(Decimal('0.05'), ROUND_HALF_EVEN)
|
||||
|
||||
# def my_round(num: float, to: float = 0.05):
|
||||
# num, to = Decimal(str(num)), Decimal(str(to))
|
||||
# return float(round(num / to) * to)
|
||||
|
||||
# assert my_round(6) == 6
|
||||
# assert my_round(5.96) == 5.95
|
||||
# assert my_round(5.84) == 5.85
|
||||
# assert my_round(3.123) == 3.1
|
||||
# assert math.floor(my_round(6)) == 6
|
||||
# assert math.floor(my_round(5.96)) == 5
|
||||
|
||||
assert sanitize_chance_output(6) == 6.0
|
||||
assert sanitize_chance_output(1.21) == 1.2
|
||||
assert sanitize_chance_output(4.77) == 4.75
|
||||
assert sanitize_chance_output(4.78) == 4.8
|
||||
|
||||
# step_1 = Decimal(6) / Decimal(0.05)
|
||||
# step_1_5 = round(step_1, )
|
||||
# step_2 = round(step_1)
|
||||
# step_3 = float(step_2 * Decimal(0.05))
|
||||
# step_4 = Decimal(step_3)
|
||||
|
||||
# assert Decimal(6) == Decimal(6).quantize(Decimal('0.05'), ROUND_HALF_EVEN)
|
||||
|
||||
# assert round(step_1) == 120
|
||||
# # assert step_1 == 120
|
||||
# assert step_1_5 == 120
|
||||
# assert step_2 == 120
|
||||
# assert step_3 == 6.0
|
||||
# assert step_4 == Decimal('6.0')
|
||||
|
||||
# rounded_val = step_4.quantize(Decimal("0.05"), ROUND_HALF_EVEN)
|
||||
# assert rounded_val == 6
|
||||
# assert sanitize_chance_output(6) == 6
|
||||
|
||||
Loading…
Reference in New Issue
Block a user