fix: derive SEASON_PCT from date range instead of hardcoding half-season (#9)
Closes #9 Previously SEASON_PCT was hardcoded to 81/162 (~0.5) while END_DATE was set to 20050731 (~65% through the season). Running retrosheet_data.py directly (without the CLI which overrides SEASON_PCT at runtime) would silently generate cards using half-season normalizations on stats covering a larger portion of the season. Fix: move START_DATE/END_DATE before SEASON_PCT and derive SEASON_PCT from the date range using SEASON_END_DATE (2005 regular season end). Now changing END_DATE automatically produces the correct SEASON_PCT. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
770f296938
commit
63a30bd434
@ -60,10 +60,21 @@ MIN_TBF_VR = 40
|
||||
CARDSET_ID = 27 # 27: 2005 Live, 28: 2005 Promos
|
||||
|
||||
# Per-Update Parameters
|
||||
SEASON_PCT = 81 / 162 # Through end of July (~half season)
|
||||
START_DATE = 20050403 # YYYYMMDD format - 2005 Opening Day
|
||||
# END_DATE = 20050531 # YYYYMMDD format - May PotM
|
||||
END_DATE = 20050731 # End of July 2005
|
||||
SEASON_END_DATE = 20051002 # 2005 regular season end date (used to derive SEASON_PCT)
|
||||
SEASON_PCT = min(
|
||||
(
|
||||
datetime.datetime.strptime(str(END_DATE), "%Y%m%d")
|
||||
- datetime.datetime.strptime(str(START_DATE), "%Y%m%d")
|
||||
).days
|
||||
/ (
|
||||
datetime.datetime.strptime(str(SEASON_END_DATE), "%Y%m%d")
|
||||
- datetime.datetime.strptime(str(START_DATE), "%Y%m%d")
|
||||
).days,
|
||||
1.0,
|
||||
)
|
||||
POST_DATA = True
|
||||
LAST_WEEK_RATIO = 0.0
|
||||
LAST_TWOWEEKS_RATIO = 0.0
|
||||
@ -1427,7 +1438,7 @@ def calc_pitching_cards(ps: pd.DataFrame, season_pct: float) -> pd.DataFrame:
|
||||
"closer_rating": [
|
||||
cpi.closer_rating(int(row["GF"]), int(row["SV"]), int(row["G"]))
|
||||
],
|
||||
"batting": [f'#1W{row["pitch_hand"].upper()}-C'],
|
||||
"batting": [f"#1W{row['pitch_hand'].upper()}-C"],
|
||||
}
|
||||
)
|
||||
return y.loc[0]
|
||||
@ -1596,7 +1607,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
||||
]:
|
||||
if row["key_bbref"] in pos_df.index:
|
||||
logger.info(
|
||||
f'Running {position} stats for {row["use_name"]} {row["last_name"]}'
|
||||
f"Running {position} stats for {row['use_name']} {row['last_name']}"
|
||||
)
|
||||
try:
|
||||
if "bis_runs_total" in pos_df.columns:
|
||||
@ -1863,8 +1874,8 @@ async def get_or_post_players(
|
||||
|
||||
def new_player_payload(row, ratings_df: pd.DataFrame):
|
||||
return {
|
||||
"p_name": f'{row["use_name"]} {row["last_name"]}',
|
||||
"cost": f'{ratings_df.loc[row['key_bbref']]["cost"]}',
|
||||
"p_name": f"{row['use_name']} {row['last_name']}",
|
||||
"cost": f"{ratings_df.loc[row['key_bbref']]['cost']}",
|
||||
"image": "change-me",
|
||||
"mlbclub": CLUB_LIST[row["Tm"]],
|
||||
"franchise": FRANCHISE_LIST[row["Tm"]],
|
||||
@ -1914,11 +1925,11 @@ async def get_or_post_players(
|
||||
# Update positions for existing players too
|
||||
all_pos = get_player_record_pos(def_rat_df, row)
|
||||
patch_params = [
|
||||
("cost", f'{bat_rat_df.loc[row['key_bbref']]["cost"]}'),
|
||||
("cost", f"{bat_rat_df.loc[row['key_bbref']]['cost']}"),
|
||||
("rarity_id", int(bat_rat_df.loc[row["key_bbref"]]["rarity_id"])),
|
||||
(
|
||||
"image",
|
||||
f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
|
||||
f"{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
|
||||
),
|
||||
]
|
||||
# Add position updates - set all 8 slots to clear any old positions
|
||||
@ -1962,7 +1973,7 @@ async def get_or_post_players(
|
||||
params=[
|
||||
(
|
||||
"image",
|
||||
f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
|
||||
f"{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
|
||||
)
|
||||
],
|
||||
)
|
||||
@ -2001,11 +2012,11 @@ async def get_or_post_players(
|
||||
|
||||
# Determine pitcher positions based on ratings
|
||||
patch_params = [
|
||||
("cost", f'{pit_rat_df.loc[row['key_bbref']]["cost"]}'),
|
||||
("cost", f"{pit_rat_df.loc[row['key_bbref']]['cost']}"),
|
||||
("rarity_id", int(pit_rat_df.loc[row["key_bbref"]]["rarity_id"])),
|
||||
(
|
||||
"image",
|
||||
f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
|
||||
f"{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
|
||||
),
|
||||
]
|
||||
|
||||
@ -2079,7 +2090,7 @@ async def get_or_post_players(
|
||||
params=[
|
||||
(
|
||||
"image",
|
||||
f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
|
||||
f"{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
|
||||
)
|
||||
],
|
||||
)
|
||||
@ -2103,10 +2114,10 @@ async def get_or_post_players(
|
||||
raise KeyError("Could not get players - not enough stat DFs were supplied")
|
||||
|
||||
pd.DataFrame(player_deltas[1:], columns=player_deltas[0]).to_csv(
|
||||
f'{"batter" if bstat_df is not None else "pitcher"}-deltas.csv'
|
||||
f"{'batter' if bstat_df is not None else 'pitcher'}-deltas.csv"
|
||||
)
|
||||
pd.DataFrame(new_players[1:], columns=new_players[0]).to_csv(
|
||||
f'new-{"batter" if bstat_df is not None else "pitcher"}s.csv'
|
||||
f"new-{'batter' if bstat_df is not None else 'pitcher'}s.csv"
|
||||
)
|
||||
|
||||
players_df = pd.DataFrame(all_players).set_index("bbref_id")
|
||||
@ -2278,7 +2289,7 @@ async def post_positions(pos_df: pd.DataFrame, delete_existing: bool = False):
|
||||
deleted_count += 1
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f'Failed to delete cardposition {pos["id"]}: {e}'
|
||||
f"Failed to delete cardposition {pos['id']}: {e}"
|
||||
)
|
||||
logger.info(f"Deleted {deleted_count} positions for players in current run")
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user