fix: derive SEASON_PCT from date range instead of hardcoding half-season (#9)

Closes #9

Previously SEASON_PCT was hardcoded to 81/162 (~0.5) while END_DATE was
set to 20050731 (~65% through the season). Running retrosheet_data.py
directly (without the CLI which overrides SEASON_PCT at runtime) would
silently generate cards using half-season normalizations on stats covering
a larger portion of the season.

Fix: move START_DATE/END_DATE before SEASON_PCT and derive SEASON_PCT
from the date range using SEASON_END_DATE (2005 regular season end).
Now changing END_DATE automatically produces the correct SEASON_PCT.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Cal Corum 2026-03-20 20:35:08 -05:00
parent 770f296938
commit 63a30bd434

View File

@ -60,10 +60,21 @@ MIN_TBF_VR = 40
CARDSET_ID = 27 # 27: 2005 Live, 28: 2005 Promos CARDSET_ID = 27 # 27: 2005 Live, 28: 2005 Promos
# Per-Update Parameters # Per-Update Parameters
SEASON_PCT = 81 / 162 # Through end of July (~half season)
START_DATE = 20050403 # YYYYMMDD format - 2005 Opening Day START_DATE = 20050403 # YYYYMMDD format - 2005 Opening Day
# END_DATE = 20050531 # YYYYMMDD format - May PotM # END_DATE = 20050531 # YYYYMMDD format - May PotM
END_DATE = 20050731 # End of July 2005 END_DATE = 20050731 # End of July 2005
SEASON_END_DATE = 20051002 # 2005 regular season end date (used to derive SEASON_PCT)
SEASON_PCT = min(
(
datetime.datetime.strptime(str(END_DATE), "%Y%m%d")
- datetime.datetime.strptime(str(START_DATE), "%Y%m%d")
).days
/ (
datetime.datetime.strptime(str(SEASON_END_DATE), "%Y%m%d")
- datetime.datetime.strptime(str(START_DATE), "%Y%m%d")
).days,
1.0,
)
POST_DATA = True POST_DATA = True
LAST_WEEK_RATIO = 0.0 LAST_WEEK_RATIO = 0.0
LAST_TWOWEEKS_RATIO = 0.0 LAST_TWOWEEKS_RATIO = 0.0
@ -1427,7 +1438,7 @@ def calc_pitching_cards(ps: pd.DataFrame, season_pct: float) -> pd.DataFrame:
"closer_rating": [ "closer_rating": [
cpi.closer_rating(int(row["GF"]), int(row["SV"]), int(row["G"])) cpi.closer_rating(int(row["GF"]), int(row["SV"]), int(row["G"]))
], ],
"batting": [f'#1W{row["pitch_hand"].upper()}-C'], "batting": [f"#1W{row['pitch_hand'].upper()}-C"],
} }
) )
return y.loc[0] return y.loc[0]
@ -1596,7 +1607,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
]: ]:
if row["key_bbref"] in pos_df.index: if row["key_bbref"] in pos_df.index:
logger.info( logger.info(
f'Running {position} stats for {row["use_name"]} {row["last_name"]}' f"Running {position} stats for {row['use_name']} {row['last_name']}"
) )
try: try:
if "bis_runs_total" in pos_df.columns: if "bis_runs_total" in pos_df.columns:
@ -1863,8 +1874,8 @@ async def get_or_post_players(
def new_player_payload(row, ratings_df: pd.DataFrame): def new_player_payload(row, ratings_df: pd.DataFrame):
return { return {
"p_name": f'{row["use_name"]} {row["last_name"]}', "p_name": f"{row['use_name']} {row['last_name']}",
"cost": f'{ratings_df.loc[row['key_bbref']]["cost"]}', "cost": f"{ratings_df.loc[row['key_bbref']]['cost']}",
"image": "change-me", "image": "change-me",
"mlbclub": CLUB_LIST[row["Tm"]], "mlbclub": CLUB_LIST[row["Tm"]],
"franchise": FRANCHISE_LIST[row["Tm"]], "franchise": FRANCHISE_LIST[row["Tm"]],
@ -1914,11 +1925,11 @@ async def get_or_post_players(
# Update positions for existing players too # Update positions for existing players too
all_pos = get_player_record_pos(def_rat_df, row) all_pos = get_player_record_pos(def_rat_df, row)
patch_params = [ patch_params = [
("cost", f'{bat_rat_df.loc[row['key_bbref']]["cost"]}'), ("cost", f"{bat_rat_df.loc[row['key_bbref']]['cost']}"),
("rarity_id", int(bat_rat_df.loc[row["key_bbref"]]["rarity_id"])), ("rarity_id", int(bat_rat_df.loc[row["key_bbref"]]["rarity_id"])),
( (
"image", "image",
f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}', f"{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
), ),
] ]
# Add position updates - set all 8 slots to clear any old positions # Add position updates - set all 8 slots to clear any old positions
@ -1962,7 +1973,7 @@ async def get_or_post_players(
params=[ params=[
( (
"image", "image",
f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}', f"{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
) )
], ],
) )
@ -2001,11 +2012,11 @@ async def get_or_post_players(
# Determine pitcher positions based on ratings # Determine pitcher positions based on ratings
patch_params = [ patch_params = [
("cost", f'{pit_rat_df.loc[row['key_bbref']]["cost"]}'), ("cost", f"{pit_rat_df.loc[row['key_bbref']]['cost']}"),
("rarity_id", int(pit_rat_df.loc[row["key_bbref"]]["rarity_id"])), ("rarity_id", int(pit_rat_df.loc[row["key_bbref"]]["rarity_id"])),
( (
"image", "image",
f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}', f"{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
), ),
] ]
@ -2079,7 +2090,7 @@ async def get_or_post_players(
params=[ params=[
( (
"image", "image",
f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}', f"{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
) )
], ],
) )
@ -2103,10 +2114,10 @@ async def get_or_post_players(
raise KeyError("Could not get players - not enough stat DFs were supplied") raise KeyError("Could not get players - not enough stat DFs were supplied")
pd.DataFrame(player_deltas[1:], columns=player_deltas[0]).to_csv( pd.DataFrame(player_deltas[1:], columns=player_deltas[0]).to_csv(
f'{"batter" if bstat_df is not None else "pitcher"}-deltas.csv' f"{'batter' if bstat_df is not None else 'pitcher'}-deltas.csv"
) )
pd.DataFrame(new_players[1:], columns=new_players[0]).to_csv( pd.DataFrame(new_players[1:], columns=new_players[0]).to_csv(
f'new-{"batter" if bstat_df is not None else "pitcher"}s.csv' f"new-{'batter' if bstat_df is not None else 'pitcher'}s.csv"
) )
players_df = pd.DataFrame(all_players).set_index("bbref_id") players_df = pd.DataFrame(all_players).set_index("bbref_id")
@ -2278,7 +2289,7 @@ async def post_positions(pos_df: pd.DataFrame, delete_existing: bool = False):
deleted_count += 1 deleted_count += 1
except Exception as e: except Exception as e:
logger.warning( logger.warning(
f'Failed to delete cardposition {pos["id"]}: {e}' f"Failed to delete cardposition {pos['id']}: {e}"
) )
logger.info(f"Deleted {deleted_count} positions for players in current run") logger.info(f"Deleted {deleted_count} positions for players in current run")