fix: derive SEASON_PCT from date range instead of hardcoding half-season (#9)

Closes #9

Previously SEASON_PCT was hardcoded to 81/162 (~0.5) while END_DATE was
set to 20050731 (~65% through the season). Running retrosheet_data.py
directly (without the CLI which overrides SEASON_PCT at runtime) would
silently generate cards using half-season normalizations on stats covering
a larger portion of the season.

Fix: move START_DATE/END_DATE before SEASON_PCT and derive SEASON_PCT
from the date range using SEASON_END_DATE (2005 regular season end).
Now changing END_DATE automatically produces the correct SEASON_PCT.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Cal Corum 2026-03-20 20:35:08 -05:00
parent f1ca14791d
commit c568b6a024

View File

@ -62,10 +62,21 @@ CARDSET_ID = (
) # 27: 2005 Live, 28: 2005 Promos
# Per-Update Parameters
SEASON_PCT = 81 / 162 # Through end of July (~half season)
START_DATE = 20050403 # YYYYMMDD format - 2005 Opening Day
# END_DATE = 20050531 # YYYYMMDD format - May PotM
END_DATE = 20050731 # End of July 2005
SEASON_END_DATE = 20051002 # 2005 regular season end date (used to derive SEASON_PCT)
SEASON_PCT = min(
(
datetime.datetime.strptime(str(END_DATE), "%Y%m%d")
- datetime.datetime.strptime(str(START_DATE), "%Y%m%d")
).days
/ (
datetime.datetime.strptime(str(SEASON_END_DATE), "%Y%m%d")
- datetime.datetime.strptime(str(START_DATE), "%Y%m%d")
).days,
1.0,
)
POST_DATA = True
LAST_WEEK_RATIO = 0.0 if PLAYER_DESCRIPTION == "Live" else 0.0
LAST_TWOWEEKS_RATIO = 0.0
@ -1429,7 +1440,7 @@ def calc_pitching_cards(ps: pd.DataFrame, season_pct: float) -> pd.DataFrame:
"closer_rating": [
cpi.closer_rating(int(row["GF"]), int(row["SV"]), int(row["G"]))
],
"batting": [f'#1W{row["pitch_hand"].upper()}-C'],
"batting": [f"#1W{row['pitch_hand'].upper()}-C"],
}
)
return y.loc[0]
@ -1598,7 +1609,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
]:
if row["key_bbref"] in pos_df.index:
logger.info(
f'Running {position} stats for {row["use_name"]} {row["last_name"]}'
f"Running {position} stats for {row['use_name']} {row['last_name']}"
)
try:
if "bis_runs_total" in pos_df.columns:
@ -1865,8 +1876,8 @@ async def get_or_post_players(
def new_player_payload(row, ratings_df: pd.DataFrame):
return {
"p_name": f'{row["use_name"]} {row["last_name"]}',
"cost": f'{ratings_df.loc[row['key_bbref']]["cost"]}',
"p_name": f"{row['use_name']} {row['last_name']}",
"cost": f"{ratings_df.loc[row['key_bbref']]['cost']}",
"image": "change-me",
"mlbclub": CLUB_LIST[row["Tm"]],
"franchise": FRANCHISE_LIST[row["Tm"]],
@ -1916,11 +1927,11 @@ async def get_or_post_players(
# Update positions for existing players too
all_pos = get_player_record_pos(def_rat_df, row)
patch_params = [
("cost", f'{bat_rat_df.loc[row['key_bbref']]["cost"]}'),
("cost", f"{bat_rat_df.loc[row['key_bbref']]['cost']}"),
("rarity_id", int(bat_rat_df.loc[row["key_bbref"]]["rarity_id"])),
(
"image",
f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
f"{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
),
]
# Add position updates - set all 8 slots to clear any old positions
@ -1964,7 +1975,7 @@ async def get_or_post_players(
params=[
(
"image",
f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
f"{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
)
],
)
@ -2003,11 +2014,11 @@ async def get_or_post_players(
# Determine pitcher positions based on ratings
patch_params = [
("cost", f'{pit_rat_df.loc[row['key_bbref']]["cost"]}'),
("cost", f"{pit_rat_df.loc[row['key_bbref']]['cost']}"),
("rarity_id", int(pit_rat_df.loc[row["key_bbref"]]["rarity_id"])),
(
"image",
f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
f"{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
),
]
@ -2081,7 +2092,7 @@ async def get_or_post_players(
params=[
(
"image",
f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
f"{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
)
],
)
@ -2105,10 +2116,10 @@ async def get_or_post_players(
raise KeyError("Could not get players - not enough stat DFs were supplied")
pd.DataFrame(player_deltas[1:], columns=player_deltas[0]).to_csv(
f'{"batter" if bstat_df is not None else "pitcher"}-deltas.csv'
f"{'batter' if bstat_df is not None else 'pitcher'}-deltas.csv"
)
pd.DataFrame(new_players[1:], columns=new_players[0]).to_csv(
f'new-{"batter" if bstat_df is not None else "pitcher"}s.csv'
f"new-{'batter' if bstat_df is not None else 'pitcher'}s.csv"
)
players_df = pd.DataFrame(all_players).set_index("bbref_id")
@ -2280,7 +2291,7 @@ async def post_positions(pos_df: pd.DataFrame, delete_existing: bool = False):
deleted_count += 1
except Exception as e:
logger.warning(
f'Failed to delete cardposition {pos["id"]}: {e}'
f"Failed to delete cardposition {pos['id']}: {e}"
)
logger.info(f"Deleted {deleted_count} positions for players in current run")