fix: derive SEASON_PCT from date range instead of hardcoding half-season (#9)
Closes #9 Previously SEASON_PCT was hardcoded to 81/162 (~0.5) while END_DATE was set to 20050731 (~65% through the season). Running retrosheet_data.py directly (without the CLI which overrides SEASON_PCT at runtime) would silently generate cards using half-season normalizations on stats covering a larger portion of the season. Fix: move START_DATE/END_DATE before SEASON_PCT and derive SEASON_PCT from the date range using SEASON_END_DATE (2005 regular season end). Now changing END_DATE automatically produces the correct SEASON_PCT. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
770f296938
commit
63a30bd434
@ -60,10 +60,21 @@ MIN_TBF_VR = 40
|
|||||||
CARDSET_ID = 27 # 27: 2005 Live, 28: 2005 Promos
|
CARDSET_ID = 27 # 27: 2005 Live, 28: 2005 Promos
|
||||||
|
|
||||||
# Per-Update Parameters
|
# Per-Update Parameters
|
||||||
SEASON_PCT = 81 / 162 # Through end of July (~half season)
|
|
||||||
START_DATE = 20050403 # YYYYMMDD format - 2005 Opening Day
|
START_DATE = 20050403 # YYYYMMDD format - 2005 Opening Day
|
||||||
# END_DATE = 20050531 # YYYYMMDD format - May PotM
|
# END_DATE = 20050531 # YYYYMMDD format - May PotM
|
||||||
END_DATE = 20050731 # End of July 2005
|
END_DATE = 20050731 # End of July 2005
|
||||||
|
SEASON_END_DATE = 20051002 # 2005 regular season end date (used to derive SEASON_PCT)
|
||||||
|
SEASON_PCT = min(
|
||||||
|
(
|
||||||
|
datetime.datetime.strptime(str(END_DATE), "%Y%m%d")
|
||||||
|
- datetime.datetime.strptime(str(START_DATE), "%Y%m%d")
|
||||||
|
).days
|
||||||
|
/ (
|
||||||
|
datetime.datetime.strptime(str(SEASON_END_DATE), "%Y%m%d")
|
||||||
|
- datetime.datetime.strptime(str(START_DATE), "%Y%m%d")
|
||||||
|
).days,
|
||||||
|
1.0,
|
||||||
|
)
|
||||||
POST_DATA = True
|
POST_DATA = True
|
||||||
LAST_WEEK_RATIO = 0.0
|
LAST_WEEK_RATIO = 0.0
|
||||||
LAST_TWOWEEKS_RATIO = 0.0
|
LAST_TWOWEEKS_RATIO = 0.0
|
||||||
@ -1427,7 +1438,7 @@ def calc_pitching_cards(ps: pd.DataFrame, season_pct: float) -> pd.DataFrame:
|
|||||||
"closer_rating": [
|
"closer_rating": [
|
||||||
cpi.closer_rating(int(row["GF"]), int(row["SV"]), int(row["G"]))
|
cpi.closer_rating(int(row["GF"]), int(row["SV"]), int(row["G"]))
|
||||||
],
|
],
|
||||||
"batting": [f'#1W{row["pitch_hand"].upper()}-C'],
|
"batting": [f"#1W{row['pitch_hand'].upper()}-C"],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return y.loc[0]
|
return y.loc[0]
|
||||||
@ -1596,7 +1607,7 @@ def calc_positions(bs: pd.DataFrame) -> pd.DataFrame:
|
|||||||
]:
|
]:
|
||||||
if row["key_bbref"] in pos_df.index:
|
if row["key_bbref"] in pos_df.index:
|
||||||
logger.info(
|
logger.info(
|
||||||
f'Running {position} stats for {row["use_name"]} {row["last_name"]}'
|
f"Running {position} stats for {row['use_name']} {row['last_name']}"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
if "bis_runs_total" in pos_df.columns:
|
if "bis_runs_total" in pos_df.columns:
|
||||||
@ -1863,8 +1874,8 @@ async def get_or_post_players(
|
|||||||
|
|
||||||
def new_player_payload(row, ratings_df: pd.DataFrame):
|
def new_player_payload(row, ratings_df: pd.DataFrame):
|
||||||
return {
|
return {
|
||||||
"p_name": f'{row["use_name"]} {row["last_name"]}',
|
"p_name": f"{row['use_name']} {row['last_name']}",
|
||||||
"cost": f'{ratings_df.loc[row['key_bbref']]["cost"]}',
|
"cost": f"{ratings_df.loc[row['key_bbref']]['cost']}",
|
||||||
"image": "change-me",
|
"image": "change-me",
|
||||||
"mlbclub": CLUB_LIST[row["Tm"]],
|
"mlbclub": CLUB_LIST[row["Tm"]],
|
||||||
"franchise": FRANCHISE_LIST[row["Tm"]],
|
"franchise": FRANCHISE_LIST[row["Tm"]],
|
||||||
@ -1914,11 +1925,11 @@ async def get_or_post_players(
|
|||||||
# Update positions for existing players too
|
# Update positions for existing players too
|
||||||
all_pos = get_player_record_pos(def_rat_df, row)
|
all_pos = get_player_record_pos(def_rat_df, row)
|
||||||
patch_params = [
|
patch_params = [
|
||||||
("cost", f'{bat_rat_df.loc[row['key_bbref']]["cost"]}'),
|
("cost", f"{bat_rat_df.loc[row['key_bbref']]['cost']}"),
|
||||||
("rarity_id", int(bat_rat_df.loc[row["key_bbref"]]["rarity_id"])),
|
("rarity_id", int(bat_rat_df.loc[row["key_bbref"]]["rarity_id"])),
|
||||||
(
|
(
|
||||||
"image",
|
"image",
|
||||||
f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
|
f"{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
# Add position updates - set all 8 slots to clear any old positions
|
# Add position updates - set all 8 slots to clear any old positions
|
||||||
@ -1962,7 +1973,7 @@ async def get_or_post_players(
|
|||||||
params=[
|
params=[
|
||||||
(
|
(
|
||||||
"image",
|
"image",
|
||||||
f'{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
|
f"{CARD_BASE_URL}{player_id}/battingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@ -2001,11 +2012,11 @@ async def get_or_post_players(
|
|||||||
|
|
||||||
# Determine pitcher positions based on ratings
|
# Determine pitcher positions based on ratings
|
||||||
patch_params = [
|
patch_params = [
|
||||||
("cost", f'{pit_rat_df.loc[row['key_bbref']]["cost"]}'),
|
("cost", f"{pit_rat_df.loc[row['key_bbref']]['cost']}"),
|
||||||
("rarity_id", int(pit_rat_df.loc[row["key_bbref"]]["rarity_id"])),
|
("rarity_id", int(pit_rat_df.loc[row["key_bbref"]]["rarity_id"])),
|
||||||
(
|
(
|
||||||
"image",
|
"image",
|
||||||
f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
|
f"{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -2079,7 +2090,7 @@ async def get_or_post_players(
|
|||||||
params=[
|
params=[
|
||||||
(
|
(
|
||||||
"image",
|
"image",
|
||||||
f'{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote("?d=")}{RELEASE_DIRECTORY}',
|
f"{CARD_BASE_URL}{player_id}/pitchingcard{urllib.parse.quote('?d=')}{RELEASE_DIRECTORY}",
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@ -2103,10 +2114,10 @@ async def get_or_post_players(
|
|||||||
raise KeyError("Could not get players - not enough stat DFs were supplied")
|
raise KeyError("Could not get players - not enough stat DFs were supplied")
|
||||||
|
|
||||||
pd.DataFrame(player_deltas[1:], columns=player_deltas[0]).to_csv(
|
pd.DataFrame(player_deltas[1:], columns=player_deltas[0]).to_csv(
|
||||||
f'{"batter" if bstat_df is not None else "pitcher"}-deltas.csv'
|
f"{'batter' if bstat_df is not None else 'pitcher'}-deltas.csv"
|
||||||
)
|
)
|
||||||
pd.DataFrame(new_players[1:], columns=new_players[0]).to_csv(
|
pd.DataFrame(new_players[1:], columns=new_players[0]).to_csv(
|
||||||
f'new-{"batter" if bstat_df is not None else "pitcher"}s.csv'
|
f"new-{'batter' if bstat_df is not None else 'pitcher'}s.csv"
|
||||||
)
|
)
|
||||||
|
|
||||||
players_df = pd.DataFrame(all_players).set_index("bbref_id")
|
players_df = pd.DataFrame(all_players).set_index("bbref_id")
|
||||||
@ -2278,7 +2289,7 @@ async def post_positions(pos_df: pd.DataFrame, delete_existing: bool = False):
|
|||||||
deleted_count += 1
|
deleted_count += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f'Failed to delete cardposition {pos["id"]}: {e}'
|
f"Failed to delete cardposition {pos['id']}: {e}"
|
||||||
)
|
)
|
||||||
logger.info(f"Deleted {deleted_count} positions for players in current run")
|
logger.info(f"Deleted {deleted_count} positions for players in current run")
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user