- Add Sippie Swartzel custom batter profile (0.820 OPS, SS/RF, no HR power) - Update Kalin Young profile (0.891 OPS, All-Star rarity) - Update Admiral Ball Traits profile with innings field - Fix S3 cache-busting to include Unix timestamp for same-day updates - Add pd_cards/core/upload.py and scouting.py modules - Add custom card submission scripts and documentation - Add uv.lock for dependency tracking
671 lines
28 KiB
Python
671 lines
28 KiB
Python
"""
|
|
Scouting report generation core logic.
|
|
|
|
Business logic for generating batting and pitching scouting reports.
|
|
"""
|
|
|
|
import asyncio
|
|
import datetime
|
|
from functools import partial
|
|
import multiprocessing
|
|
from pathlib import Path
|
|
from typing import Literal, Optional, List
|
|
|
|
import pandas as pd
|
|
|
|
# These imports are resolved at runtime when called from CLI
|
|
# since the CLI adds the parent directory to sys.path
|
|
from db_calls import db_get
|
|
from exceptions import logger, log_exception
|
|
|
|
|
|
# =============================================================================
|
|
# Shared Utilities
|
|
# =============================================================================
|
|
|
|
def log_time(
|
|
which: Literal['start', 'end'],
|
|
message: str = '',
|
|
print_to_console: bool = True,
|
|
start_time: datetime.datetime = None
|
|
) -> Optional[datetime.datetime]:
|
|
"""Log timing information for operations."""
|
|
if print_to_console and len(message) == 0:
|
|
log_exception(KeyError, 'A message must be included when print_to_console equals True')
|
|
|
|
if which == 'start':
|
|
logger.info(f'starting timer - {message}')
|
|
if print_to_console:
|
|
print(message)
|
|
return datetime.datetime.now()
|
|
|
|
elif start_time is not None:
|
|
logger.info(f'ending timer - {message}: {(datetime.datetime.now() - start_time).total_seconds():.2f}s\n')
|
|
if print_to_console:
|
|
print(f'{message}\n')
|
|
return None
|
|
|
|
else:
|
|
log_exception(KeyError, 'start_time must be passed to log_time() when which equals \'end\'')
|
|
|
|
|
|
async def fetch_data(data: tuple) -> dict:
|
|
"""Fetch data from API endpoint."""
|
|
start_time = log_time('start', print_to_console=False)
|
|
this_query = await db_get(endpoint=data[0], params=data[1])
|
|
log_time('end', print_to_console=False, start_time=start_time)
|
|
return this_query
|
|
|
|
|
|
# =============================================================================
|
|
# Batting Scouting
|
|
# =============================================================================
|
|
|
|
def build_series(label: str, code: str, pos_code: str, all_positions: list) -> pd.Series:
|
|
"""Build a pandas Series from position data."""
|
|
logger.info(f'Building {label} series for {pos_code}')
|
|
return pd.Series(
|
|
dict([(x['player']['player_id'], x[code]) for x in all_positions if x['position'] == pos_code]),
|
|
name=f'{label} {pos_code}'
|
|
)
|
|
|
|
|
|
def build_ranges(all_positions: list, pos_code: str) -> pd.Series:
|
|
"""Build range rating series for a position."""
|
|
return build_series('Range', 'range', pos_code, all_positions)
|
|
|
|
|
|
def build_errors(all_positions: list, pos_code: str) -> pd.Series:
|
|
"""Build error rating series for a position."""
|
|
x = build_series('Error', 'error', pos_code, all_positions)
|
|
logger.info(f'error ratings:\n{x}')
|
|
return x
|
|
|
|
|
|
def build_of_arms(all_positions: list, pos_code: str) -> pd.Series:
|
|
"""Build outfield arm rating series."""
|
|
logger.info(f'Building OF series for {pos_code}')
|
|
return pd.Series(
|
|
dict([(x['player']['player_id'], x['arm']) for x in all_positions if x['position'] == pos_code]),
|
|
name='Arm OF'
|
|
)
|
|
|
|
|
|
def build_c_arms(all_positions: list, pos_code: str) -> pd.Series:
|
|
"""Build catcher arm rating series."""
|
|
x = build_series('Arm', 'arm', pos_code, all_positions)
|
|
logger.info(f'arm ratings:\n{x}')
|
|
return x
|
|
|
|
|
|
def build_c_pb(all_positions: list, pos_code: str) -> pd.Series:
|
|
"""Build catcher passed ball rating series."""
|
|
return build_series('PB', 'pb', pos_code, all_positions)
|
|
|
|
|
|
def build_c_throw(all_positions: list, pos_code: str) -> pd.Series:
|
|
"""Build catcher overthrow rating series."""
|
|
return build_series('Throw', 'overthrow', pos_code, all_positions)
|
|
|
|
|
|
async def get_batting_scouting_dfs(cardset_ids: List[int] = None) -> pd.DataFrame:
|
|
"""
|
|
Fetch and build batting scouting dataframes from API.
|
|
|
|
Args:
|
|
cardset_ids: List of cardset IDs to filter by (empty = all)
|
|
|
|
Returns:
|
|
DataFrame with batting ratings and defensive positions joined
|
|
"""
|
|
cardset_ids = cardset_ids or []
|
|
cardset_params = [('cardset_id', x) for x in cardset_ids]
|
|
ratings_params = [('team_id', 31), ('ts', 's37136685556r6135248705'), *cardset_params]
|
|
|
|
API_CALLS = [
|
|
('battingcardratings', [('vs_hand', 'vL'), *ratings_params]),
|
|
('battingcardratings', [('vs_hand', 'vR'), *ratings_params]),
|
|
('cardpositions', cardset_params)
|
|
]
|
|
|
|
start_time = log_time('start', message='Pulling all batting card ratings and positions')
|
|
tasks = [fetch_data(params) for params in API_CALLS]
|
|
api_data = await asyncio.gather(*tasks)
|
|
log_time('end', f'Pulled {api_data[0]["count"] + api_data[1]["count"]} batting card ratings and {api_data[2]["count"]} positions', start_time=start_time)
|
|
|
|
start_time = log_time('start', message='Building base dataframes')
|
|
|
|
vl_vals = api_data[0]['ratings']
|
|
for x in vl_vals:
|
|
x.update(x['battingcard'])
|
|
x['player_id'] = x['battingcard']['player']['player_id']
|
|
x['player_name'] = x['battingcard']['player']['p_name']
|
|
x['rarity'] = x['battingcard']['player']['rarity']['name']
|
|
x['cardset_id'] = x['battingcard']['player']['cardset']['id']
|
|
x['cardset_name'] = x['battingcard']['player']['cardset']['name']
|
|
del x['battingcard']
|
|
del x['player']
|
|
|
|
vr_vals = api_data[1]['ratings']
|
|
for x in vr_vals:
|
|
x['player_id'] = x['battingcard']['player']['player_id']
|
|
del x['battingcard']
|
|
|
|
vl = pd.DataFrame(vl_vals)
|
|
vr = pd.DataFrame(vr_vals)
|
|
|
|
log_time('end', 'Base dataframes are complete', start_time=start_time)
|
|
start_time = log_time('start', message='Building combined dataframe')
|
|
|
|
bat_df = pd.merge(vl, vr, on='player_id', suffixes=('_vl', '_vr')).set_index('player_id', drop=False)
|
|
|
|
log_time('end', 'Combined dataframe is complete', start_time=start_time)
|
|
|
|
POSITION_DATA = api_data[2]['positions']
|
|
series_list = []
|
|
POSITIONS = ['P', 'C', '1B', '2B', '3B', 'SS', 'LF', 'CF', 'RF']
|
|
|
|
start_time = log_time('start', message='Building range series')
|
|
with multiprocessing.Pool(processes=min(8, multiprocessing.cpu_count())) as pool:
|
|
get_ranges = partial(build_ranges, POSITION_DATA)
|
|
ranges = pool.map(get_ranges, POSITIONS)
|
|
series_list.extend(ranges)
|
|
log_time('end', f'Processed {len(ranges)} position ranges', start_time=start_time)
|
|
|
|
start_time = log_time('start', message='Building error series')
|
|
with multiprocessing.Pool(processes=min(8, multiprocessing.cpu_count())) as pool:
|
|
get_errors = partial(build_errors, POSITION_DATA)
|
|
errors = pool.map(get_errors, POSITIONS)
|
|
series_list.extend(errors)
|
|
log_time('end', f'Processed {len(errors)} position errors', start_time=start_time)
|
|
|
|
start_time = log_time('start', message='Building OF arm series')
|
|
lf_arms = build_of_arms(POSITION_DATA, 'LF')
|
|
cf_arms = build_of_arms(POSITION_DATA, 'CF')
|
|
rf_arms = build_of_arms(POSITION_DATA, 'RF')
|
|
|
|
combined_series = lf_arms.combine(cf_arms, max, fill_value=0)
|
|
combined_series = combined_series.combine(rf_arms, max, fill_value=0)
|
|
series_list.extend([combined_series])
|
|
log_time('end', f'Processed {len(combined_series)} OF arms', start_time=start_time)
|
|
|
|
start_time = log_time('start', message='Building C arm series')
|
|
c_arms = build_c_arms(POSITION_DATA, 'C')
|
|
series_list.extend([c_arms])
|
|
log_time('end', f'Processed {len(c_arms)} catcher arms', start_time=start_time)
|
|
|
|
start_time = log_time('start', message='Building C PB series')
|
|
with multiprocessing.Pool(processes=min(8, multiprocessing.cpu_count())) as pool:
|
|
get_pb = partial(build_c_pb, POSITION_DATA)
|
|
passed_ball = pool.map(get_pb, ['C'])
|
|
series_list.extend(passed_ball)
|
|
log_time('end', f'Processed {len(passed_ball)} C PB series', start_time=start_time)
|
|
|
|
start_time = log_time('start', message='Building C OT series')
|
|
with multiprocessing.Pool(processes=min(8, multiprocessing.cpu_count())) as pool:
|
|
get_throw = partial(build_c_throw, POSITION_DATA)
|
|
overthrows = pool.map(get_throw, ['C'])
|
|
series_list.extend(overthrows)
|
|
log_time('end', f'Processed {len(overthrows)} C OT series', start_time=start_time)
|
|
|
|
logger.info(f'series_list: {series_list}')
|
|
return bat_df.join(series_list)
|
|
|
|
|
|
async def calc_batting_basic(batting_dfs: pd.DataFrame, output_dir: Path) -> None:
|
|
"""
|
|
Calculate basic batting scouting metrics and save to CSV.
|
|
|
|
Args:
|
|
batting_dfs: DataFrame with batting ratings
|
|
output_dir: Directory to save output CSV
|
|
"""
|
|
def get_raw_speed(df_data):
|
|
speed_raw = df_data['running'] / 20 + df_data['steal_jump']
|
|
if df_data['steal_auto']:
|
|
speed_raw += 0.5
|
|
return speed_raw
|
|
|
|
start_time = log_time('start', 'Beginning Speed calcs')
|
|
raw_series = batting_dfs.apply(get_raw_speed, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
batting_dfs['Speed'] = round(rank_series * 100)
|
|
log_time('end', 'Done Speed calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning Stealing calcs')
|
|
def get_raw_steal(df_data):
|
|
return (
|
|
((df_data['steal_high'] / 20) + (df_data['steal_low'] / 20)) * df_data['steal_jump']
|
|
)
|
|
raw_series = batting_dfs.apply(get_raw_steal, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
batting_dfs['Steal'] = round(rank_series * 100)
|
|
log_time('end', 'Done Stealing calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning Reaction calcs')
|
|
def get_raw_reaction(df_data):
|
|
raw_total = 0
|
|
for pos_range in [df_data['Range C'], df_data['Range 1B'], df_data['Range 2B'], df_data['Range 3B'],
|
|
df_data['Range SS'], df_data['Range LF'], df_data['Range CF'], df_data['Range RF']]:
|
|
if pd.notna(pos_range):
|
|
raw_total += 10 ** (5 - pos_range)
|
|
return raw_total
|
|
raw_series = batting_dfs.apply(get_raw_reaction, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
batting_dfs['Reaction'] = round(rank_series * 100)
|
|
log_time('end', 'Done Reaction calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning Arm calcs')
|
|
def get_raw_arm(df_data):
|
|
of_arm = None
|
|
of_pos = None
|
|
if pd.notna(df_data['Range RF']):
|
|
of_pos = 'RF'
|
|
elif pd.notna(df_data['Range CF']):
|
|
of_pos = 'CF'
|
|
elif pd.notna(df_data['Range LF']):
|
|
of_pos = 'LF'
|
|
|
|
if of_pos is not None:
|
|
if df_data['Arm OF'] < 0:
|
|
of_raw = df_data['Arm OF'] * -10
|
|
else:
|
|
of_raw = (5 - df_data['Arm OF'])
|
|
|
|
if of_pos == 'RF':
|
|
of_raw = of_raw * 1.5
|
|
of_raw += ((6 - df_data['Range RF']) * 4)
|
|
elif of_pos == 'CF':
|
|
of_raw += ((6 - df_data['Range CF']) * 3)
|
|
elif of_pos == 'LF':
|
|
of_raw = of_raw / 2
|
|
of_raw += ((6 - df_data['Range LF']) * 2)
|
|
of_arm = of_raw
|
|
|
|
if_arm = None
|
|
if pd.notna(df_data['Range 3B']) or pd.notna(df_data['Range 2B']) or pd.notna(df_data['Range 1B']) or \
|
|
pd.notna(df_data['Range SS']):
|
|
range_totals = 0
|
|
if pd.notna(df_data['Range 3B']):
|
|
range_totals += ((6 - df_data['Range 3B']) * 5)
|
|
if pd.notna(df_data['Range SS']):
|
|
range_totals += ((6 - df_data['Range SS']) * 4)
|
|
if pd.notna(df_data['Range 2B']):
|
|
range_totals += ((6 - df_data['Range 2B']) * 3)
|
|
if pd.notna(df_data['Range 1B']):
|
|
range_totals += (6 - df_data['Range 1B'])
|
|
if_arm = 100 - (50 - range_totals)
|
|
|
|
c_arm = None
|
|
if pd.notna(df_data['Arm C']):
|
|
if df_data['Arm C'] == -5:
|
|
c_arm = 100
|
|
else:
|
|
temp_arm = 20 + ((10 - df_data['Arm C']) * 3) + (20 - df_data['PB C']) + (20 - df_data['Throw C']) - \
|
|
df_data['Error C']
|
|
c_arm = min(100, temp_arm)
|
|
|
|
if c_arm is not None:
|
|
return c_arm
|
|
elif of_arm is not None:
|
|
return of_arm
|
|
elif if_arm is not None:
|
|
return if_arm
|
|
else:
|
|
return 1
|
|
raw_series = batting_dfs.apply(get_raw_arm, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
batting_dfs['Arm'] = round(rank_series * 100)
|
|
log_time('end', 'Done Arm calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning Fielding calcs')
|
|
def get_raw_fielding(df_data):
|
|
if_error, of_error, c_error = 0, 0, 0
|
|
denom = 0
|
|
if pd.notna(df_data['Error 3B']) or pd.notna(df_data['Error 2B']) or pd.notna(df_data['Error 1B']) or \
|
|
pd.notna(df_data['Error SS']):
|
|
raw_if = 100
|
|
if pd.notna(df_data['Error 3B']):
|
|
raw_if -= (df_data['Error 3B'] * 2)
|
|
if pd.notna(df_data['Error SS']):
|
|
raw_if -= (df_data['Error SS'] * .75)
|
|
if pd.notna(df_data['Error 2B']):
|
|
raw_if -= (df_data['Error 2B'] * 1.25)
|
|
if pd.notna(df_data['Error 1B']):
|
|
raw_if -= (df_data['Error 1B'] * 2)
|
|
if_error = max(1, raw_if)
|
|
denom += 1
|
|
|
|
if pd.notna(df_data['Error LF']) or pd.notna(df_data['Error CF']) or pd.notna(df_data['Error RF']):
|
|
raw_of = 100
|
|
if pd.notna(df_data['Error LF']):
|
|
raw_of -= (df_data['Error LF'] * 2)
|
|
if pd.notna(df_data['Error CF']):
|
|
raw_of -= (df_data['Error CF'] * .75)
|
|
if pd.notna(df_data['Error RF']):
|
|
raw_of -= (df_data['Error RF'] * 1.25)
|
|
of_error = max(1, raw_of)
|
|
denom += 1
|
|
|
|
if pd.notna(df_data['Error C']):
|
|
c_error = max(100 - (df_data['Error C'] * 5) - df_data['Throw C'] - df_data['PB C'], 1)
|
|
denom += 1
|
|
|
|
return sum([if_error, of_error, c_error]) / max(denom, 1)
|
|
raw_series = batting_dfs.apply(get_raw_fielding, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
batting_dfs['Fielding'] = round(rank_series * 100)
|
|
log_time('end', 'Done Fielding calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning AVG vL calcs')
|
|
rank_series = batting_dfs['avg_vl'].rank(pct=True)
|
|
batting_dfs['Contact L'] = round(rank_series * 100)
|
|
log_time('end', 'Done AVG vL calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning AVG vR calcs')
|
|
rank_series = batting_dfs['avg_vr'].rank(pct=True)
|
|
batting_dfs['Contact R'] = round(rank_series * 100)
|
|
log_time('end', 'Done AVG vR calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning PWR vL calcs')
|
|
rank_series = batting_dfs['slg_vl'].rank(pct=True)
|
|
batting_dfs['Power L'] = round(rank_series * 100)
|
|
log_time('end', 'Done PWR vL calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning PWR vR calcs')
|
|
rank_series = batting_dfs['slg_vr'].rank(pct=True)
|
|
batting_dfs['Power R'] = round(rank_series * 100)
|
|
log_time('end', 'Done PWR vR calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning Vision calcs')
|
|
def get_raw_vision(df_data):
|
|
return (
|
|
((((df_data['obp_vr'] * 0.67) + (df_data['obp_vl'] * 0.33)) -
|
|
((df_data['avg_vr'] * 0.67) + (df_data['avg_vl'] * 0.33))) * 5) -
|
|
(((df_data['strikeout_vl'] * 0.33) + (df_data['strikeout_vr'] * 0.67)) / 208)
|
|
)
|
|
raw_series = batting_dfs.apply(get_raw_vision, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
batting_dfs['Vision'] = round(rank_series * 100)
|
|
log_time('end', 'Done Vision calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning Rating calcs')
|
|
def get_raw_rating(df_data):
|
|
return (
|
|
((df_data['Reaction'] + df_data['Arm'] + df_data['Fielding']) * 2) +
|
|
(df_data['Speed'] + df_data['Steal']) +
|
|
((((df_data['Contact R'] + df_data['Power R']) * 0.67) +
|
|
((df_data['Contact L'] + df_data['Power L']) * 0.33) + df_data['Vision']) * 6)
|
|
)
|
|
raw_series = batting_dfs.apply(get_raw_rating, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
batting_dfs['Rating'] = round(rank_series * 100)
|
|
log_time('end', 'Done Rating calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning write to file')
|
|
output = batting_dfs[[
|
|
'player_id', 'player_name', 'Rating', 'Contact R', 'Contact L', 'Power R', 'Power L', 'Vision', 'Speed',
|
|
'Steal', 'Reaction', 'Arm', 'Fielding', 'hand', 'cardset_name'
|
|
]]
|
|
csv_file = pd.DataFrame(output).to_csv(index=False)
|
|
output_file = output_dir / 'batting-basic.csv'
|
|
with open(output_file, 'w') as file:
|
|
file.write(csv_file)
|
|
log_time('end', 'Done writing to file', start_time=start_time)
|
|
|
|
|
|
async def calc_batting_ratings(batting_dfs: pd.DataFrame, output_dir: Path) -> None:
|
|
"""
|
|
Filter batting ratings and save to CSV.
|
|
|
|
Args:
|
|
batting_dfs: DataFrame with batting ratings
|
|
output_dir: Directory to save output CSV
|
|
"""
|
|
start_time = log_time('start', 'Beginning Ratings filtering')
|
|
output = batting_dfs
|
|
first = ['player_id', 'player_name', 'cardset_name', 'rarity', 'hand', 'variant']
|
|
exclude = first + ['id_vl', 'id_vr', 'vs_hand_vl', 'vs_hand_vr']
|
|
output = output[first + [col for col in output.columns if col not in exclude]]
|
|
log_time('end', 'Done filtering ratings', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning write to file')
|
|
csv_file = pd.DataFrame(output).to_csv(index=False)
|
|
output_file = output_dir / 'batting-ratings.csv'
|
|
with open(output_file, 'w') as file:
|
|
file.write(csv_file)
|
|
log_time('end', 'Done writing to file', start_time=start_time)
|
|
|
|
|
|
# =============================================================================
|
|
# Pitching Scouting
|
|
# =============================================================================
|
|
|
|
async def get_pitching_scouting_dfs(cardset_ids: List[int] = None) -> pd.DataFrame:
|
|
"""
|
|
Fetch and build pitching scouting dataframes from API.
|
|
|
|
Args:
|
|
cardset_ids: List of cardset IDs to filter by (empty = all)
|
|
|
|
Returns:
|
|
DataFrame with pitching ratings and defensive positions joined
|
|
"""
|
|
cardset_ids = cardset_ids or []
|
|
cardset_params = [('cardset_id', x) for x in cardset_ids]
|
|
ratings_params = [('team_id', 31), ('ts', 's37136685556r6135248705'), *cardset_params]
|
|
|
|
API_CALLS = [
|
|
('pitchingcardratings', [('vs_hand', 'vL'), *ratings_params]),
|
|
('pitchingcardratings', [('vs_hand', 'vR'), *ratings_params]),
|
|
('cardpositions', [('position', 'P'), *cardset_params])
|
|
]
|
|
|
|
start_time = log_time('start', message='Pulling all pitching card ratings and positions')
|
|
tasks = [fetch_data(params) for params in API_CALLS]
|
|
api_data = await asyncio.gather(*tasks)
|
|
log_time('end', f'Pulled {api_data[0]["count"] + api_data[1]["count"]} pitching card ratings and {api_data[2]["count"]} positions', start_time=start_time)
|
|
|
|
start_time = log_time('start', message='Building base dataframes')
|
|
|
|
vl_vals = api_data[0]['ratings']
|
|
for x in vl_vals:
|
|
x.update(x['pitchingcard'])
|
|
x['player_id'] = x['pitchingcard']['player']['player_id']
|
|
x['player_name'] = x['pitchingcard']['player']['p_name']
|
|
x['rarity'] = x['pitchingcard']['player']['rarity']['name']
|
|
x['cardset_id'] = x['pitchingcard']['player']['cardset']['id']
|
|
x['cardset_name'] = x['pitchingcard']['player']['cardset']['name']
|
|
x['starter_rating'] = x['pitchingcard']['starter_rating']
|
|
x['relief_rating'] = x['pitchingcard']['relief_rating']
|
|
x['closer_rating'] = x['pitchingcard']['closer_rating']
|
|
del x['pitchingcard'], x['player']
|
|
|
|
vr_vals = api_data[1]['ratings']
|
|
for x in vr_vals:
|
|
x['player_id'] = x['pitchingcard']['player']['player_id']
|
|
del x['pitchingcard']
|
|
|
|
vl = pd.DataFrame(vl_vals)
|
|
vr = pd.DataFrame(vr_vals)
|
|
|
|
pit_df = pd.merge(vl, vr, on='player_id', suffixes=('_vl', '_vr')).set_index('player_id', drop=False)
|
|
|
|
log_time('end', 'Base dataframes are complete', start_time=start_time)
|
|
start_time = log_time('start', message='Building defense series')
|
|
|
|
positions = api_data[2]['positions']
|
|
series_list = [
|
|
pd.Series(
|
|
dict([(x['player']['player_id'], x['range']) for x in positions]),
|
|
name='Range P'
|
|
),
|
|
pd.Series(
|
|
dict([(x['player']['player_id'], x['error']) for x in positions]),
|
|
name='Error P'
|
|
)
|
|
]
|
|
log_time('end', f'Processed {len(positions)} defense series', start_time=start_time)
|
|
logger.info(f'series_list: {series_list}')
|
|
|
|
return pit_df.join(series_list)
|
|
|
|
|
|
async def calc_pitching_basic(pitching_dfs: pd.DataFrame, output_dir: Path) -> None:
|
|
"""
|
|
Calculate basic pitching scouting metrics and save to CSV.
|
|
|
|
Args:
|
|
pitching_dfs: DataFrame with pitching ratings
|
|
output_dir: Directory to save output CSV
|
|
"""
|
|
raw_data = pitching_dfs
|
|
|
|
def get_raw_leftcontrol(df_data):
|
|
return ((1 - (df_data['obp_vl'] - df_data['avg_vl'])) * 100) + (1 - (df_data['wild_pitch'] / 20))
|
|
|
|
start_time = log_time('start', 'Beginning Control L calcs')
|
|
raw_series = raw_data.apply(get_raw_leftcontrol, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data['Control L'] = round(rank_series * 100)
|
|
log_time('end', 'Done Control L calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning Control R calcs')
|
|
def get_raw_rightcontrol(df_data):
|
|
return ((1 - (df_data['obp_vr'] - df_data['avg_vr'])) * 100) + (1 - (df_data['wild_pitch'] / 20))
|
|
raw_series = raw_data.apply(get_raw_rightcontrol, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data['Control R'] = round(rank_series * 100)
|
|
log_time('end', 'Done Control R calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning Stuff L calcs')
|
|
def get_raw_leftstuff(df_data):
|
|
return 10 - (df_data['slg_vl'] + df_data['slg_vl'] + ((df_data['homerun_vl'] + df_data['bp_homerun_vl']) / 108))
|
|
raw_series = raw_data.apply(get_raw_leftstuff, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data['Stuff L'] = round(rank_series * 100)
|
|
log_time('end', 'Done Stuff L calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning Stuff R calcs')
|
|
def get_raw_rightstuff(df_data):
|
|
return 10 - (df_data['slg_vr'] + df_data['slg_vr'] + ((df_data['homerun_vr'] + df_data['bp_homerun_vr']) / 108))
|
|
raw_series = raw_data.apply(get_raw_rightstuff, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data['Stuff R'] = round(rank_series * 100)
|
|
log_time('end', 'Done Stuff R calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning Fielding calcs')
|
|
def get_raw_fielding(df_data):
|
|
return ((6 - df_data['Range P']) * 10) + (50 - df_data['Error P'])
|
|
raw_series = raw_data.apply(get_raw_fielding, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
logger.info(f'max fld: {raw_series.max()} / min fld: {raw_series.min()}')
|
|
raw_data['Fielding'] = round(rank_series * 100)
|
|
log_time('end', 'Done Fielding calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning Stamina calcs')
|
|
def get_raw_stamina(df_data):
|
|
spow = df_data['starter_rating'] if pd.isna(df_data['starter_rating']) else -1
|
|
rpow = df_data['relief_rating'] if pd.isna(df_data['relief_rating']) else -1
|
|
this_pow = spow if spow > rpow else rpow
|
|
return (((this_pow * (df_data['obp_vr'] * (2 / 3))) + (this_pow * (df_data['obp_vl'] / 3))) * 4.5) + this_pow
|
|
raw_series = raw_data.apply(get_raw_stamina, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data['Stamina'] = round(rank_series * 100)
|
|
log_time('end', 'Done Stamina calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning H/9 calcs')
|
|
def get_raw_hit(df_data):
|
|
return 1 - (df_data['avg_vr'] * (2 / 3)) + (df_data['avg_vl'] / 3)
|
|
raw_series = raw_data.apply(get_raw_hit, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data['H/9'] = round(rank_series * 100)
|
|
log_time('end', 'Done H/9 calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning K/9 calcs')
|
|
def get_raw_k(df_data):
|
|
return ((df_data['strikeout_vr'] / 108) * (2 / 3)) + ((df_data['strikeout_vl'] / 108) / 3)
|
|
raw_series = raw_data.apply(get_raw_k, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data['K/9'] = round(rank_series * 100)
|
|
log_time('end', 'Done K/9 calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning BB/9 calcs')
|
|
def get_raw_bb(df_data):
|
|
return ((df_data['walk_vr'] / 108) * (2 / 3)) + ((df_data['walk_vl'] / 108) / 3)
|
|
raw_series = raw_data.apply(get_raw_bb, axis=1)
|
|
rank_series = raw_series.rank(pct=True, ascending=False)
|
|
raw_data['BB/9'] = round(rank_series * 100)
|
|
log_time('end', 'Done BB/9 calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning HR/9 calcs')
|
|
def get_raw_hr(df_data):
|
|
return 1 - (
|
|
(((df_data['homerun_vr'] + df_data['bp_homerun_vr']) / 108) * (2 / 3)) +
|
|
(((df_data['homerun_vl'] + df_data['bp_homerun_vl']) / 108) / 3)
|
|
)
|
|
raw_series = raw_data.apply(get_raw_hr, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data['HR/9'] = round(rank_series * 100)
|
|
log_time('end', 'Done HR/9 calcs', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning Rating calcs')
|
|
def get_raw_rating(df_data):
|
|
spow = df_data['starter_rating'] if pd.isna(df_data['starter_rating']) else -1
|
|
rpow = df_data['relief_rating'] if pd.isna(df_data['relief_rating']) else -1
|
|
|
|
if spow > rpow and spow >= 4:
|
|
return (
|
|
((df_data['H/9'] + df_data['K/9'] + df_data['BB/9'] + df_data['HR/9']) * 5) +
|
|
(df_data['Fielding']) + (df_data['Stamina'] * 5) +
|
|
(((df_data['Stuff L'] / 3) + (df_data['Stuff R'] * (2 / 3))) * 4) +
|
|
(((df_data['Control L'] / 3) + (df_data['Control R'] * (2 / 3))) * 2)
|
|
)
|
|
else:
|
|
return (
|
|
((df_data['H/9'] + df_data['K/9'] + df_data['BB/9'] + df_data['HR/9']) * 5) +
|
|
(df_data['Fielding']) + (df_data['Stamina'] * 5) +
|
|
(((df_data['Stuff L'] / 3) + (df_data['Stuff R'] * (2 / 3))) * 4) +
|
|
(((df_data['Control L'] / 3) + (df_data['Control R'] * (2 / 3))) * 2)
|
|
)
|
|
raw_series = raw_data.apply(get_raw_rating, axis=1)
|
|
rank_series = raw_series.rank(pct=True)
|
|
raw_data['Rating'] = round(rank_series * 100)
|
|
|
|
output = raw_data[[
|
|
'player_id', 'player_name', 'Rating', 'Control R', 'Control L', 'Stuff R', 'Stuff L', 'Stamina', 'Fielding',
|
|
'H/9', 'K/9', 'BB/9', 'HR/9', 'hand', 'cardset_name'
|
|
]]
|
|
|
|
log_time('end', 'Done Rating calcs', start_time=start_time)
|
|
start_time = log_time('start', 'Beginning write csv')
|
|
|
|
csv_file = pd.DataFrame(output).to_csv(index=False)
|
|
output_file = output_dir / 'pitching-basic.csv'
|
|
with open(output_file, 'w') as file:
|
|
file.write(csv_file)
|
|
log_time('end', 'Done writing to file', start_time=start_time)
|
|
|
|
|
|
async def calc_pitching_ratings(pitching_dfs: pd.DataFrame, output_dir: Path) -> None:
|
|
"""
|
|
Filter pitching ratings and save to CSV.
|
|
|
|
Args:
|
|
pitching_dfs: DataFrame with pitching ratings
|
|
output_dir: Directory to save output CSV
|
|
"""
|
|
start_time = log_time('start', 'Beginning Ratings filtering')
|
|
output = pitching_dfs
|
|
first = ['player_id', 'player_name', 'cardset_name', 'rarity', 'hand', 'variant']
|
|
exclude = first + ['id_vl', 'id_vr', 'vs_hand_vl', 'vs_hand_vr']
|
|
output = output[first + [col for col in output.columns if col not in exclude]]
|
|
log_time('end', 'Done filtering ratings', start_time=start_time)
|
|
|
|
start_time = log_time('start', 'Beginning write to file')
|
|
csv_file = pd.DataFrame(output).to_csv(index=False)
|
|
output_file = output_dir / 'pitching-ratings.csv'
|
|
with open(output_file, 'w') as file:
|
|
file.write(csv_file)
|
|
log_time('end', 'Done writing to file', start_time=start_time)
|