paper-dynasty-card-creation/pd_cards/core/scouting.py
Cal Corum 1de8b1db2f Add custom card profiles, S3 upload with timestamp cache-busting, and CLI enhancements
- Add Sippie Swartzel custom batter profile (0.820 OPS, SS/RF, no HR power)
- Update Kalin Young profile (0.891 OPS, All-Star rarity)
- Update Admiral Ball Traits profile with innings field
- Fix S3 cache-busting to include Unix timestamp for same-day updates
- Add pd_cards/core/upload.py and scouting.py modules
- Add custom card submission scripts and documentation
- Add uv.lock for dependency tracking
2026-01-25 21:57:35 -06:00

671 lines
28 KiB
Python

"""
Scouting report generation core logic.
Business logic for generating batting and pitching scouting reports.
"""
import asyncio
import datetime
from functools import partial
import multiprocessing
from pathlib import Path
from typing import Literal, Optional, List
import pandas as pd
# These imports are resolved at runtime when called from CLI
# since the CLI adds the parent directory to sys.path
from db_calls import db_get
from exceptions import logger, log_exception
# =============================================================================
# Shared Utilities
# =============================================================================
def log_time(
which: Literal['start', 'end'],
message: str = '',
print_to_console: bool = True,
start_time: datetime.datetime = None
) -> Optional[datetime.datetime]:
"""Log timing information for operations."""
if print_to_console and len(message) == 0:
log_exception(KeyError, 'A message must be included when print_to_console equals True')
if which == 'start':
logger.info(f'starting timer - {message}')
if print_to_console:
print(message)
return datetime.datetime.now()
elif start_time is not None:
logger.info(f'ending timer - {message}: {(datetime.datetime.now() - start_time).total_seconds():.2f}s\n')
if print_to_console:
print(f'{message}\n')
return None
else:
log_exception(KeyError, 'start_time must be passed to log_time() when which equals \'end\'')
async def fetch_data(data: tuple) -> dict:
"""Fetch data from API endpoint."""
start_time = log_time('start', print_to_console=False)
this_query = await db_get(endpoint=data[0], params=data[1])
log_time('end', print_to_console=False, start_time=start_time)
return this_query
# =============================================================================
# Batting Scouting
# =============================================================================
def build_series(label: str, code: str, pos_code: str, all_positions: list) -> pd.Series:
"""Build a pandas Series from position data."""
logger.info(f'Building {label} series for {pos_code}')
return pd.Series(
dict([(x['player']['player_id'], x[code]) for x in all_positions if x['position'] == pos_code]),
name=f'{label} {pos_code}'
)
def build_ranges(all_positions: list, pos_code: str) -> pd.Series:
"""Build range rating series for a position."""
return build_series('Range', 'range', pos_code, all_positions)
def build_errors(all_positions: list, pos_code: str) -> pd.Series:
"""Build error rating series for a position."""
x = build_series('Error', 'error', pos_code, all_positions)
logger.info(f'error ratings:\n{x}')
return x
def build_of_arms(all_positions: list, pos_code: str) -> pd.Series:
"""Build outfield arm rating series."""
logger.info(f'Building OF series for {pos_code}')
return pd.Series(
dict([(x['player']['player_id'], x['arm']) for x in all_positions if x['position'] == pos_code]),
name='Arm OF'
)
def build_c_arms(all_positions: list, pos_code: str) -> pd.Series:
"""Build catcher arm rating series."""
x = build_series('Arm', 'arm', pos_code, all_positions)
logger.info(f'arm ratings:\n{x}')
return x
def build_c_pb(all_positions: list, pos_code: str) -> pd.Series:
"""Build catcher passed ball rating series."""
return build_series('PB', 'pb', pos_code, all_positions)
def build_c_throw(all_positions: list, pos_code: str) -> pd.Series:
"""Build catcher overthrow rating series."""
return build_series('Throw', 'overthrow', pos_code, all_positions)
async def get_batting_scouting_dfs(cardset_ids: List[int] = None) -> pd.DataFrame:
"""
Fetch and build batting scouting dataframes from API.
Args:
cardset_ids: List of cardset IDs to filter by (empty = all)
Returns:
DataFrame with batting ratings and defensive positions joined
"""
cardset_ids = cardset_ids or []
cardset_params = [('cardset_id', x) for x in cardset_ids]
ratings_params = [('team_id', 31), ('ts', 's37136685556r6135248705'), *cardset_params]
API_CALLS = [
('battingcardratings', [('vs_hand', 'vL'), *ratings_params]),
('battingcardratings', [('vs_hand', 'vR'), *ratings_params]),
('cardpositions', cardset_params)
]
start_time = log_time('start', message='Pulling all batting card ratings and positions')
tasks = [fetch_data(params) for params in API_CALLS]
api_data = await asyncio.gather(*tasks)
log_time('end', f'Pulled {api_data[0]["count"] + api_data[1]["count"]} batting card ratings and {api_data[2]["count"]} positions', start_time=start_time)
start_time = log_time('start', message='Building base dataframes')
vl_vals = api_data[0]['ratings']
for x in vl_vals:
x.update(x['battingcard'])
x['player_id'] = x['battingcard']['player']['player_id']
x['player_name'] = x['battingcard']['player']['p_name']
x['rarity'] = x['battingcard']['player']['rarity']['name']
x['cardset_id'] = x['battingcard']['player']['cardset']['id']
x['cardset_name'] = x['battingcard']['player']['cardset']['name']
del x['battingcard']
del x['player']
vr_vals = api_data[1]['ratings']
for x in vr_vals:
x['player_id'] = x['battingcard']['player']['player_id']
del x['battingcard']
vl = pd.DataFrame(vl_vals)
vr = pd.DataFrame(vr_vals)
log_time('end', 'Base dataframes are complete', start_time=start_time)
start_time = log_time('start', message='Building combined dataframe')
bat_df = pd.merge(vl, vr, on='player_id', suffixes=('_vl', '_vr')).set_index('player_id', drop=False)
log_time('end', 'Combined dataframe is complete', start_time=start_time)
POSITION_DATA = api_data[2]['positions']
series_list = []
POSITIONS = ['P', 'C', '1B', '2B', '3B', 'SS', 'LF', 'CF', 'RF']
start_time = log_time('start', message='Building range series')
with multiprocessing.Pool(processes=min(8, multiprocessing.cpu_count())) as pool:
get_ranges = partial(build_ranges, POSITION_DATA)
ranges = pool.map(get_ranges, POSITIONS)
series_list.extend(ranges)
log_time('end', f'Processed {len(ranges)} position ranges', start_time=start_time)
start_time = log_time('start', message='Building error series')
with multiprocessing.Pool(processes=min(8, multiprocessing.cpu_count())) as pool:
get_errors = partial(build_errors, POSITION_DATA)
errors = pool.map(get_errors, POSITIONS)
series_list.extend(errors)
log_time('end', f'Processed {len(errors)} position errors', start_time=start_time)
start_time = log_time('start', message='Building OF arm series')
lf_arms = build_of_arms(POSITION_DATA, 'LF')
cf_arms = build_of_arms(POSITION_DATA, 'CF')
rf_arms = build_of_arms(POSITION_DATA, 'RF')
combined_series = lf_arms.combine(cf_arms, max, fill_value=0)
combined_series = combined_series.combine(rf_arms, max, fill_value=0)
series_list.extend([combined_series])
log_time('end', f'Processed {len(combined_series)} OF arms', start_time=start_time)
start_time = log_time('start', message='Building C arm series')
c_arms = build_c_arms(POSITION_DATA, 'C')
series_list.extend([c_arms])
log_time('end', f'Processed {len(c_arms)} catcher arms', start_time=start_time)
start_time = log_time('start', message='Building C PB series')
with multiprocessing.Pool(processes=min(8, multiprocessing.cpu_count())) as pool:
get_pb = partial(build_c_pb, POSITION_DATA)
passed_ball = pool.map(get_pb, ['C'])
series_list.extend(passed_ball)
log_time('end', f'Processed {len(passed_ball)} C PB series', start_time=start_time)
start_time = log_time('start', message='Building C OT series')
with multiprocessing.Pool(processes=min(8, multiprocessing.cpu_count())) as pool:
get_throw = partial(build_c_throw, POSITION_DATA)
overthrows = pool.map(get_throw, ['C'])
series_list.extend(overthrows)
log_time('end', f'Processed {len(overthrows)} C OT series', start_time=start_time)
logger.info(f'series_list: {series_list}')
return bat_df.join(series_list)
async def calc_batting_basic(batting_dfs: pd.DataFrame, output_dir: Path) -> None:
"""
Calculate basic batting scouting metrics and save to CSV.
Args:
batting_dfs: DataFrame with batting ratings
output_dir: Directory to save output CSV
"""
def get_raw_speed(df_data):
speed_raw = df_data['running'] / 20 + df_data['steal_jump']
if df_data['steal_auto']:
speed_raw += 0.5
return speed_raw
start_time = log_time('start', 'Beginning Speed calcs')
raw_series = batting_dfs.apply(get_raw_speed, axis=1)
rank_series = raw_series.rank(pct=True)
batting_dfs['Speed'] = round(rank_series * 100)
log_time('end', 'Done Speed calcs', start_time=start_time)
start_time = log_time('start', 'Beginning Stealing calcs')
def get_raw_steal(df_data):
return (
((df_data['steal_high'] / 20) + (df_data['steal_low'] / 20)) * df_data['steal_jump']
)
raw_series = batting_dfs.apply(get_raw_steal, axis=1)
rank_series = raw_series.rank(pct=True)
batting_dfs['Steal'] = round(rank_series * 100)
log_time('end', 'Done Stealing calcs', start_time=start_time)
start_time = log_time('start', 'Beginning Reaction calcs')
def get_raw_reaction(df_data):
raw_total = 0
for pos_range in [df_data['Range C'], df_data['Range 1B'], df_data['Range 2B'], df_data['Range 3B'],
df_data['Range SS'], df_data['Range LF'], df_data['Range CF'], df_data['Range RF']]:
if pd.notna(pos_range):
raw_total += 10 ** (5 - pos_range)
return raw_total
raw_series = batting_dfs.apply(get_raw_reaction, axis=1)
rank_series = raw_series.rank(pct=True)
batting_dfs['Reaction'] = round(rank_series * 100)
log_time('end', 'Done Reaction calcs', start_time=start_time)
start_time = log_time('start', 'Beginning Arm calcs')
def get_raw_arm(df_data):
of_arm = None
of_pos = None
if pd.notna(df_data['Range RF']):
of_pos = 'RF'
elif pd.notna(df_data['Range CF']):
of_pos = 'CF'
elif pd.notna(df_data['Range LF']):
of_pos = 'LF'
if of_pos is not None:
if df_data['Arm OF'] < 0:
of_raw = df_data['Arm OF'] * -10
else:
of_raw = (5 - df_data['Arm OF'])
if of_pos == 'RF':
of_raw = of_raw * 1.5
of_raw += ((6 - df_data['Range RF']) * 4)
elif of_pos == 'CF':
of_raw += ((6 - df_data['Range CF']) * 3)
elif of_pos == 'LF':
of_raw = of_raw / 2
of_raw += ((6 - df_data['Range LF']) * 2)
of_arm = of_raw
if_arm = None
if pd.notna(df_data['Range 3B']) or pd.notna(df_data['Range 2B']) or pd.notna(df_data['Range 1B']) or \
pd.notna(df_data['Range SS']):
range_totals = 0
if pd.notna(df_data['Range 3B']):
range_totals += ((6 - df_data['Range 3B']) * 5)
if pd.notna(df_data['Range SS']):
range_totals += ((6 - df_data['Range SS']) * 4)
if pd.notna(df_data['Range 2B']):
range_totals += ((6 - df_data['Range 2B']) * 3)
if pd.notna(df_data['Range 1B']):
range_totals += (6 - df_data['Range 1B'])
if_arm = 100 - (50 - range_totals)
c_arm = None
if pd.notna(df_data['Arm C']):
if df_data['Arm C'] == -5:
c_arm = 100
else:
temp_arm = 20 + ((10 - df_data['Arm C']) * 3) + (20 - df_data['PB C']) + (20 - df_data['Throw C']) - \
df_data['Error C']
c_arm = min(100, temp_arm)
if c_arm is not None:
return c_arm
elif of_arm is not None:
return of_arm
elif if_arm is not None:
return if_arm
else:
return 1
raw_series = batting_dfs.apply(get_raw_arm, axis=1)
rank_series = raw_series.rank(pct=True)
batting_dfs['Arm'] = round(rank_series * 100)
log_time('end', 'Done Arm calcs', start_time=start_time)
start_time = log_time('start', 'Beginning Fielding calcs')
def get_raw_fielding(df_data):
if_error, of_error, c_error = 0, 0, 0
denom = 0
if pd.notna(df_data['Error 3B']) or pd.notna(df_data['Error 2B']) or pd.notna(df_data['Error 1B']) or \
pd.notna(df_data['Error SS']):
raw_if = 100
if pd.notna(df_data['Error 3B']):
raw_if -= (df_data['Error 3B'] * 2)
if pd.notna(df_data['Error SS']):
raw_if -= (df_data['Error SS'] * .75)
if pd.notna(df_data['Error 2B']):
raw_if -= (df_data['Error 2B'] * 1.25)
if pd.notna(df_data['Error 1B']):
raw_if -= (df_data['Error 1B'] * 2)
if_error = max(1, raw_if)
denom += 1
if pd.notna(df_data['Error LF']) or pd.notna(df_data['Error CF']) or pd.notna(df_data['Error RF']):
raw_of = 100
if pd.notna(df_data['Error LF']):
raw_of -= (df_data['Error LF'] * 2)
if pd.notna(df_data['Error CF']):
raw_of -= (df_data['Error CF'] * .75)
if pd.notna(df_data['Error RF']):
raw_of -= (df_data['Error RF'] * 1.25)
of_error = max(1, raw_of)
denom += 1
if pd.notna(df_data['Error C']):
c_error = max(100 - (df_data['Error C'] * 5) - df_data['Throw C'] - df_data['PB C'], 1)
denom += 1
return sum([if_error, of_error, c_error]) / max(denom, 1)
raw_series = batting_dfs.apply(get_raw_fielding, axis=1)
rank_series = raw_series.rank(pct=True)
batting_dfs['Fielding'] = round(rank_series * 100)
log_time('end', 'Done Fielding calcs', start_time=start_time)
start_time = log_time('start', 'Beginning AVG vL calcs')
rank_series = batting_dfs['avg_vl'].rank(pct=True)
batting_dfs['Contact L'] = round(rank_series * 100)
log_time('end', 'Done AVG vL calcs', start_time=start_time)
start_time = log_time('start', 'Beginning AVG vR calcs')
rank_series = batting_dfs['avg_vr'].rank(pct=True)
batting_dfs['Contact R'] = round(rank_series * 100)
log_time('end', 'Done AVG vR calcs', start_time=start_time)
start_time = log_time('start', 'Beginning PWR vL calcs')
rank_series = batting_dfs['slg_vl'].rank(pct=True)
batting_dfs['Power L'] = round(rank_series * 100)
log_time('end', 'Done PWR vL calcs', start_time=start_time)
start_time = log_time('start', 'Beginning PWR vR calcs')
rank_series = batting_dfs['slg_vr'].rank(pct=True)
batting_dfs['Power R'] = round(rank_series * 100)
log_time('end', 'Done PWR vR calcs', start_time=start_time)
start_time = log_time('start', 'Beginning Vision calcs')
def get_raw_vision(df_data):
return (
((((df_data['obp_vr'] * 0.67) + (df_data['obp_vl'] * 0.33)) -
((df_data['avg_vr'] * 0.67) + (df_data['avg_vl'] * 0.33))) * 5) -
(((df_data['strikeout_vl'] * 0.33) + (df_data['strikeout_vr'] * 0.67)) / 208)
)
raw_series = batting_dfs.apply(get_raw_vision, axis=1)
rank_series = raw_series.rank(pct=True)
batting_dfs['Vision'] = round(rank_series * 100)
log_time('end', 'Done Vision calcs', start_time=start_time)
start_time = log_time('start', 'Beginning Rating calcs')
def get_raw_rating(df_data):
return (
((df_data['Reaction'] + df_data['Arm'] + df_data['Fielding']) * 2) +
(df_data['Speed'] + df_data['Steal']) +
((((df_data['Contact R'] + df_data['Power R']) * 0.67) +
((df_data['Contact L'] + df_data['Power L']) * 0.33) + df_data['Vision']) * 6)
)
raw_series = batting_dfs.apply(get_raw_rating, axis=1)
rank_series = raw_series.rank(pct=True)
batting_dfs['Rating'] = round(rank_series * 100)
log_time('end', 'Done Rating calcs', start_time=start_time)
start_time = log_time('start', 'Beginning write to file')
output = batting_dfs[[
'player_id', 'player_name', 'Rating', 'Contact R', 'Contact L', 'Power R', 'Power L', 'Vision', 'Speed',
'Steal', 'Reaction', 'Arm', 'Fielding', 'hand', 'cardset_name'
]]
csv_file = pd.DataFrame(output).to_csv(index=False)
output_file = output_dir / 'batting-basic.csv'
with open(output_file, 'w') as file:
file.write(csv_file)
log_time('end', 'Done writing to file', start_time=start_time)
async def calc_batting_ratings(batting_dfs: pd.DataFrame, output_dir: Path) -> None:
"""
Filter batting ratings and save to CSV.
Args:
batting_dfs: DataFrame with batting ratings
output_dir: Directory to save output CSV
"""
start_time = log_time('start', 'Beginning Ratings filtering')
output = batting_dfs
first = ['player_id', 'player_name', 'cardset_name', 'rarity', 'hand', 'variant']
exclude = first + ['id_vl', 'id_vr', 'vs_hand_vl', 'vs_hand_vr']
output = output[first + [col for col in output.columns if col not in exclude]]
log_time('end', 'Done filtering ratings', start_time=start_time)
start_time = log_time('start', 'Beginning write to file')
csv_file = pd.DataFrame(output).to_csv(index=False)
output_file = output_dir / 'batting-ratings.csv'
with open(output_file, 'w') as file:
file.write(csv_file)
log_time('end', 'Done writing to file', start_time=start_time)
# =============================================================================
# Pitching Scouting
# =============================================================================
async def get_pitching_scouting_dfs(cardset_ids: List[int] = None) -> pd.DataFrame:
"""
Fetch and build pitching scouting dataframes from API.
Args:
cardset_ids: List of cardset IDs to filter by (empty = all)
Returns:
DataFrame with pitching ratings and defensive positions joined
"""
cardset_ids = cardset_ids or []
cardset_params = [('cardset_id', x) for x in cardset_ids]
ratings_params = [('team_id', 31), ('ts', 's37136685556r6135248705'), *cardset_params]
API_CALLS = [
('pitchingcardratings', [('vs_hand', 'vL'), *ratings_params]),
('pitchingcardratings', [('vs_hand', 'vR'), *ratings_params]),
('cardpositions', [('position', 'P'), *cardset_params])
]
start_time = log_time('start', message='Pulling all pitching card ratings and positions')
tasks = [fetch_data(params) for params in API_CALLS]
api_data = await asyncio.gather(*tasks)
log_time('end', f'Pulled {api_data[0]["count"] + api_data[1]["count"]} pitching card ratings and {api_data[2]["count"]} positions', start_time=start_time)
start_time = log_time('start', message='Building base dataframes')
vl_vals = api_data[0]['ratings']
for x in vl_vals:
x.update(x['pitchingcard'])
x['player_id'] = x['pitchingcard']['player']['player_id']
x['player_name'] = x['pitchingcard']['player']['p_name']
x['rarity'] = x['pitchingcard']['player']['rarity']['name']
x['cardset_id'] = x['pitchingcard']['player']['cardset']['id']
x['cardset_name'] = x['pitchingcard']['player']['cardset']['name']
x['starter_rating'] = x['pitchingcard']['starter_rating']
x['relief_rating'] = x['pitchingcard']['relief_rating']
x['closer_rating'] = x['pitchingcard']['closer_rating']
del x['pitchingcard'], x['player']
vr_vals = api_data[1]['ratings']
for x in vr_vals:
x['player_id'] = x['pitchingcard']['player']['player_id']
del x['pitchingcard']
vl = pd.DataFrame(vl_vals)
vr = pd.DataFrame(vr_vals)
pit_df = pd.merge(vl, vr, on='player_id', suffixes=('_vl', '_vr')).set_index('player_id', drop=False)
log_time('end', 'Base dataframes are complete', start_time=start_time)
start_time = log_time('start', message='Building defense series')
positions = api_data[2]['positions']
series_list = [
pd.Series(
dict([(x['player']['player_id'], x['range']) for x in positions]),
name='Range P'
),
pd.Series(
dict([(x['player']['player_id'], x['error']) for x in positions]),
name='Error P'
)
]
log_time('end', f'Processed {len(positions)} defense series', start_time=start_time)
logger.info(f'series_list: {series_list}')
return pit_df.join(series_list)
async def calc_pitching_basic(pitching_dfs: pd.DataFrame, output_dir: Path) -> None:
"""
Calculate basic pitching scouting metrics and save to CSV.
Args:
pitching_dfs: DataFrame with pitching ratings
output_dir: Directory to save output CSV
"""
raw_data = pitching_dfs
def get_raw_leftcontrol(df_data):
return ((1 - (df_data['obp_vl'] - df_data['avg_vl'])) * 100) + (1 - (df_data['wild_pitch'] / 20))
start_time = log_time('start', 'Beginning Control L calcs')
raw_series = raw_data.apply(get_raw_leftcontrol, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data['Control L'] = round(rank_series * 100)
log_time('end', 'Done Control L calcs', start_time=start_time)
start_time = log_time('start', 'Beginning Control R calcs')
def get_raw_rightcontrol(df_data):
return ((1 - (df_data['obp_vr'] - df_data['avg_vr'])) * 100) + (1 - (df_data['wild_pitch'] / 20))
raw_series = raw_data.apply(get_raw_rightcontrol, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data['Control R'] = round(rank_series * 100)
log_time('end', 'Done Control R calcs', start_time=start_time)
start_time = log_time('start', 'Beginning Stuff L calcs')
def get_raw_leftstuff(df_data):
return 10 - (df_data['slg_vl'] + df_data['slg_vl'] + ((df_data['homerun_vl'] + df_data['bp_homerun_vl']) / 108))
raw_series = raw_data.apply(get_raw_leftstuff, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data['Stuff L'] = round(rank_series * 100)
log_time('end', 'Done Stuff L calcs', start_time=start_time)
start_time = log_time('start', 'Beginning Stuff R calcs')
def get_raw_rightstuff(df_data):
return 10 - (df_data['slg_vr'] + df_data['slg_vr'] + ((df_data['homerun_vr'] + df_data['bp_homerun_vr']) / 108))
raw_series = raw_data.apply(get_raw_rightstuff, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data['Stuff R'] = round(rank_series * 100)
log_time('end', 'Done Stuff R calcs', start_time=start_time)
start_time = log_time('start', 'Beginning Fielding calcs')
def get_raw_fielding(df_data):
return ((6 - df_data['Range P']) * 10) + (50 - df_data['Error P'])
raw_series = raw_data.apply(get_raw_fielding, axis=1)
rank_series = raw_series.rank(pct=True)
logger.info(f'max fld: {raw_series.max()} / min fld: {raw_series.min()}')
raw_data['Fielding'] = round(rank_series * 100)
log_time('end', 'Done Fielding calcs', start_time=start_time)
start_time = log_time('start', 'Beginning Stamina calcs')
def get_raw_stamina(df_data):
spow = df_data['starter_rating'] if pd.isna(df_data['starter_rating']) else -1
rpow = df_data['relief_rating'] if pd.isna(df_data['relief_rating']) else -1
this_pow = spow if spow > rpow else rpow
return (((this_pow * (df_data['obp_vr'] * (2 / 3))) + (this_pow * (df_data['obp_vl'] / 3))) * 4.5) + this_pow
raw_series = raw_data.apply(get_raw_stamina, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data['Stamina'] = round(rank_series * 100)
log_time('end', 'Done Stamina calcs', start_time=start_time)
start_time = log_time('start', 'Beginning H/9 calcs')
def get_raw_hit(df_data):
return 1 - (df_data['avg_vr'] * (2 / 3)) + (df_data['avg_vl'] / 3)
raw_series = raw_data.apply(get_raw_hit, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data['H/9'] = round(rank_series * 100)
log_time('end', 'Done H/9 calcs', start_time=start_time)
start_time = log_time('start', 'Beginning K/9 calcs')
def get_raw_k(df_data):
return ((df_data['strikeout_vr'] / 108) * (2 / 3)) + ((df_data['strikeout_vl'] / 108) / 3)
raw_series = raw_data.apply(get_raw_k, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data['K/9'] = round(rank_series * 100)
log_time('end', 'Done K/9 calcs', start_time=start_time)
start_time = log_time('start', 'Beginning BB/9 calcs')
def get_raw_bb(df_data):
return ((df_data['walk_vr'] / 108) * (2 / 3)) + ((df_data['walk_vl'] / 108) / 3)
raw_series = raw_data.apply(get_raw_bb, axis=1)
rank_series = raw_series.rank(pct=True, ascending=False)
raw_data['BB/9'] = round(rank_series * 100)
log_time('end', 'Done BB/9 calcs', start_time=start_time)
start_time = log_time('start', 'Beginning HR/9 calcs')
def get_raw_hr(df_data):
return 1 - (
(((df_data['homerun_vr'] + df_data['bp_homerun_vr']) / 108) * (2 / 3)) +
(((df_data['homerun_vl'] + df_data['bp_homerun_vl']) / 108) / 3)
)
raw_series = raw_data.apply(get_raw_hr, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data['HR/9'] = round(rank_series * 100)
log_time('end', 'Done HR/9 calcs', start_time=start_time)
start_time = log_time('start', 'Beginning Rating calcs')
def get_raw_rating(df_data):
spow = df_data['starter_rating'] if pd.isna(df_data['starter_rating']) else -1
rpow = df_data['relief_rating'] if pd.isna(df_data['relief_rating']) else -1
if spow > rpow and spow >= 4:
return (
((df_data['H/9'] + df_data['K/9'] + df_data['BB/9'] + df_data['HR/9']) * 5) +
(df_data['Fielding']) + (df_data['Stamina'] * 5) +
(((df_data['Stuff L'] / 3) + (df_data['Stuff R'] * (2 / 3))) * 4) +
(((df_data['Control L'] / 3) + (df_data['Control R'] * (2 / 3))) * 2)
)
else:
return (
((df_data['H/9'] + df_data['K/9'] + df_data['BB/9'] + df_data['HR/9']) * 5) +
(df_data['Fielding']) + (df_data['Stamina'] * 5) +
(((df_data['Stuff L'] / 3) + (df_data['Stuff R'] * (2 / 3))) * 4) +
(((df_data['Control L'] / 3) + (df_data['Control R'] * (2 / 3))) * 2)
)
raw_series = raw_data.apply(get_raw_rating, axis=1)
rank_series = raw_series.rank(pct=True)
raw_data['Rating'] = round(rank_series * 100)
output = raw_data[[
'player_id', 'player_name', 'Rating', 'Control R', 'Control L', 'Stuff R', 'Stuff L', 'Stamina', 'Fielding',
'H/9', 'K/9', 'BB/9', 'HR/9', 'hand', 'cardset_name'
]]
log_time('end', 'Done Rating calcs', start_time=start_time)
start_time = log_time('start', 'Beginning write csv')
csv_file = pd.DataFrame(output).to_csv(index=False)
output_file = output_dir / 'pitching-basic.csv'
with open(output_file, 'w') as file:
file.write(csv_file)
log_time('end', 'Done writing to file', start_time=start_time)
async def calc_pitching_ratings(pitching_dfs: pd.DataFrame, output_dir: Path) -> None:
"""
Filter pitching ratings and save to CSV.
Args:
pitching_dfs: DataFrame with pitching ratings
output_dir: Directory to save output CSV
"""
start_time = log_time('start', 'Beginning Ratings filtering')
output = pitching_dfs
first = ['player_id', 'player_name', 'cardset_name', 'rarity', 'hand', 'variant']
exclude = first + ['id_vl', 'id_vr', 'vs_hand_vl', 'vs_hand_vr']
output = output[first + [col for col in output.columns if col not in exclude]]
log_time('end', 'Done filtering ratings', start_time=start_time)
start_time = log_time('start', 'Beginning write to file')
csv_file = pd.DataFrame(output).to_csv(index=False)
output_file = output_dir / 'pitching-ratings.csv'
with open(output_file, 'w') as file:
file.write(csv_file)
log_time('end', 'Done writing to file', start_time=start_time)