Standardize formatting with black and apply ruff auto-fixes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
470 lines
15 KiB
Python
470 lines
15 KiB
Python
"""
|
|
Script to analyze rarity distribution and card costs for a specific cardset.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import pandas as pd
|
|
from db_calls import db_get
|
|
from rarity_thresholds import get_pitcher_thresholds, get_batter_thresholds
|
|
|
|
# Set up rotating logger
|
|
logger = logging.getLogger(f"{__name__}")
|
|
handler = logging.StreamHandler()
|
|
handler.setFormatter(
|
|
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
)
|
|
logger.addHandler(handler)
|
|
logger.setLevel(logging.INFO)
|
|
|
|
|
|
async def analyze_cardset(cardset_id: int):
|
|
"""Analyze rarity distribution and card costs for a specific cardset."""
|
|
|
|
logger.info(f"Starting analysis for cardset {cardset_id}...\n")
|
|
|
|
# Get cardset info to extract the season year
|
|
c_query = await db_get("cardsets", object_id=cardset_id)
|
|
if c_query is None:
|
|
logger.error(f"Cardset {cardset_id} not found")
|
|
return
|
|
|
|
cardset_name = c_query.get("name", "")
|
|
logger.info(f"Cardset: {cardset_name}")
|
|
|
|
# Extract year from cardset name (e.g., "2005 Live" -> 2005)
|
|
import re
|
|
|
|
year_match = re.search(r"(\d{4})", cardset_name)
|
|
if year_match:
|
|
season = int(year_match.group(1))
|
|
logger.info(f"Detected season: {season}")
|
|
else:
|
|
season = 2024 # Default fallback
|
|
logger.warning(
|
|
f"Could not detect season from cardset name, using default: {season}"
|
|
)
|
|
|
|
logger.info("")
|
|
|
|
# Get all players in this cardset
|
|
p_query = await db_get("players", params=[("cardset_id", cardset_id)])
|
|
if p_query is None or p_query["count"] == 0:
|
|
logger.error("No players found")
|
|
return
|
|
|
|
players_df = pd.DataFrame(p_query["players"])
|
|
logger.info(f"Found {len(players_df)} total players")
|
|
|
|
# Extract rarity ID if it's a dict
|
|
if isinstance(players_df["rarity"].iloc[0], dict):
|
|
players_df["rarity_id"] = players_df["rarity"].apply(
|
|
lambda x: x["id"] if isinstance(x, dict) else x
|
|
)
|
|
else:
|
|
players_df["rarity_id"] = players_df["rarity"]
|
|
|
|
# Get batting and pitching cards separately
|
|
bc_query = await db_get("battingcards", params=[("cardset_id", cardset_id)])
|
|
pc_query = await db_get("pitchingcards", params=[("cardset_id", cardset_id)])
|
|
|
|
batting_cards_df = (
|
|
pd.DataFrame(bc_query["cards"]) if bc_query["count"] > 0 else pd.DataFrame()
|
|
)
|
|
pitching_cards_df = (
|
|
pd.DataFrame(pc_query["cards"]) if pc_query["count"] > 0 else pd.DataFrame()
|
|
)
|
|
|
|
logger.info(f"Found {len(batting_cards_df)} batting cards")
|
|
logger.info(f"Found {len(pitching_cards_df)} pitching cards\n")
|
|
|
|
# Get thresholds for the season
|
|
pitcher_thresholds = get_pitcher_thresholds(season)
|
|
batter_thresholds = get_batter_thresholds(season)
|
|
|
|
# Analyze overall rarity distribution
|
|
analyze_overall_rarity(players_df)
|
|
|
|
# Analyze batting cards
|
|
if len(batting_cards_df) > 0:
|
|
analyze_batting_cards(batting_cards_df, players_df, batter_thresholds)
|
|
|
|
# Analyze pitching cards
|
|
if len(pitching_cards_df) > 0:
|
|
analyze_pitching_cards(pitching_cards_df, players_df, pitcher_thresholds)
|
|
|
|
|
|
def analyze_overall_rarity(players_df: pd.DataFrame):
|
|
"""Analyze overall rarity distribution."""
|
|
logger.info("=" * 60)
|
|
logger.info("OVERALL RARITY DISTRIBUTION")
|
|
logger.info("=" * 60)
|
|
|
|
rarity_counts = players_df["rarity_id"].value_counts().sort_index()
|
|
|
|
rarity_names = {
|
|
1: "Diamond",
|
|
2: "Gold",
|
|
3: "Silver",
|
|
4: "Bronze",
|
|
5: "Common",
|
|
99: "Hall of Fame",
|
|
}
|
|
|
|
total = len(players_df)
|
|
for rarity_id, count in rarity_counts.items():
|
|
rarity_name = rarity_names.get(rarity_id, f"Unknown ({rarity_id})")
|
|
pct = (count / total) * 100
|
|
logger.info(f"{rarity_name:15} ({rarity_id:2}): {count:5} cards ({pct:5.1f}%)")
|
|
|
|
logger.info("-" * 60)
|
|
logger.info(f"Total: {total} cards\n")
|
|
|
|
|
|
def analyze_batting_cards(
|
|
batting_cards_df: pd.DataFrame, players_df: pd.DataFrame, thresholds
|
|
):
|
|
"""Analyze batting card rarities and costs."""
|
|
logger.info("=" * 60)
|
|
logger.info("BATTING CARD ANALYSIS")
|
|
logger.info("=" * 60)
|
|
|
|
# Extract player ID from player reference (may be URL or dict)
|
|
sample_player = batting_cards_df["player"].iloc[0]
|
|
if isinstance(sample_player, dict):
|
|
# The dict has 'player_id' not 'id'
|
|
batting_cards_df["player_ref"] = batting_cards_df["player"].apply(
|
|
lambda x: (
|
|
int(x.get("player_id"))
|
|
if isinstance(x, dict) and x.get("player_id")
|
|
else None
|
|
)
|
|
)
|
|
elif isinstance(sample_player, str):
|
|
# Extract ID from URL like "/api/v2/players/123"
|
|
batting_cards_df["player_ref"] = (
|
|
batting_cards_df["player"].str.extract(r"/(\d+)$")[0].astype(int)
|
|
)
|
|
elif isinstance(sample_player, int):
|
|
batting_cards_df["player_ref"] = batting_cards_df["player"]
|
|
else:
|
|
logger.error(f"Unknown player reference type: {type(sample_player)}")
|
|
batting_cards_df["player_ref"] = batting_cards_df["player"]
|
|
|
|
# Merge with player data to get rarity
|
|
batting_with_player = batting_cards_df.merge(
|
|
players_df[["player_id", "p_name", "rarity_id"]],
|
|
left_on="player_ref",
|
|
right_on="player_id",
|
|
how="left",
|
|
)
|
|
|
|
# Count rarities
|
|
rarity_counts = batting_with_player["rarity_id"].value_counts().sort_index()
|
|
|
|
rarity_names = {
|
|
1: "Diamond",
|
|
2: "Gold",
|
|
3: "Silver",
|
|
4: "Bronze",
|
|
5: "Common",
|
|
99: "Hall of Fame",
|
|
}
|
|
|
|
logger.info("\nBatting Card Rarity Distribution:")
|
|
total = len(batting_with_player)
|
|
for rarity_id, count in rarity_counts.items():
|
|
rarity_name = rarity_names.get(rarity_id, f"Unknown ({rarity_id})")
|
|
pct = (count / total) * 100
|
|
logger.info(f"{rarity_name:15} ({rarity_id:2}): {count:5} cards ({pct:5.1f}%)")
|
|
|
|
# Check for cost anomalies
|
|
logger.info("\n" + "-" * 60)
|
|
logger.info("BATTING CARD COST ANALYSIS")
|
|
logger.info("-" * 60)
|
|
|
|
expected_costs = {
|
|
5: 20, # Common
|
|
4: 40, # Bronze
|
|
3: 80, # Silver
|
|
2: 160, # Gold
|
|
1: 320, # Diamond
|
|
99: 640, # Hall of Fame
|
|
}
|
|
|
|
# Note: cost is stored on the player record, not the card record
|
|
# We'll check player costs instead
|
|
player_costs = players_df[
|
|
players_df["player_id"].isin(batting_with_player["player_ref"])
|
|
].copy()
|
|
player_costs_with_rarity = player_costs.merge(
|
|
batting_with_player[["player_ref", "rarity_id"]].drop_duplicates(),
|
|
left_on="player_id",
|
|
right_on="player_ref",
|
|
how="left",
|
|
suffixes=("", "_card"),
|
|
)
|
|
|
|
cost_issues = []
|
|
for _, player in player_costs_with_rarity.iterrows():
|
|
rarity = player.get("rarity_id_card")
|
|
cost = player.get("cost")
|
|
expected = expected_costs.get(rarity)
|
|
|
|
if cost != expected:
|
|
cost_issues.append(
|
|
{
|
|
"player": player.get("p_name"),
|
|
"player_id": player.get("player_id"),
|
|
"rarity": rarity,
|
|
"actual_cost": cost,
|
|
"expected_cost": expected,
|
|
}
|
|
)
|
|
|
|
if cost_issues:
|
|
logger.warning(f"Found {len(cost_issues)} cost anomalies:")
|
|
for issue in cost_issues[:20]: # Show first 20
|
|
logger.warning(
|
|
f" {issue['player']} (Player ID: {issue['player_id']}): "
|
|
f"Rarity {issue['rarity']} has cost {issue['actual_cost']}, "
|
|
f"expected {issue['expected_cost']}"
|
|
)
|
|
if len(cost_issues) > 20:
|
|
logger.warning(f" ... and {len(cost_issues) - 20} more")
|
|
else:
|
|
logger.info("✓ No cost anomalies found")
|
|
|
|
# Check for OPS-rarity alignment
|
|
logger.info("\n" + "-" * 60)
|
|
logger.info("BATTING OPS-RARITY ALIGNMENT")
|
|
logger.info("-" * 60)
|
|
|
|
ops_mismatches = []
|
|
for _, card in batting_with_player.iterrows():
|
|
ops = card.get("total_OPS")
|
|
rarity = card.get("rarity_id")
|
|
|
|
if pd.isna(ops) or ops is None:
|
|
continue
|
|
|
|
expected_rarity = thresholds.get_rarity(ops)
|
|
|
|
if expected_rarity != rarity:
|
|
ops_mismatches.append(
|
|
{
|
|
"player": card.get("p_name"),
|
|
"card_id": card.get("id"),
|
|
"ops": ops,
|
|
"actual_rarity": rarity,
|
|
"expected_rarity": expected_rarity,
|
|
}
|
|
)
|
|
|
|
if ops_mismatches:
|
|
logger.warning(f"Found {len(ops_mismatches)} OPS-rarity mismatches:")
|
|
for issue in ops_mismatches[:20]:
|
|
logger.warning(
|
|
f" {issue['player']} (Card ID: {issue['card_id']}): "
|
|
f"OPS {issue['ops']:.3f} assigned rarity {issue['actual_rarity']}, "
|
|
f"expected {issue['expected_rarity']}"
|
|
)
|
|
if len(ops_mismatches) > 20:
|
|
logger.warning(f" ... and {len(ops_mismatches) - 20} more")
|
|
else:
|
|
logger.info("✓ All OPS values align with rarity assignments")
|
|
|
|
logger.info("")
|
|
|
|
|
|
def analyze_pitching_cards(
|
|
pitching_cards_df: pd.DataFrame, players_df: pd.DataFrame, thresholds
|
|
):
|
|
"""Analyze pitching card rarities and costs."""
|
|
logger.info("=" * 60)
|
|
logger.info("PITCHING CARD ANALYSIS")
|
|
logger.info("=" * 60)
|
|
|
|
# Extract player ID from player reference (may be URL or dict)
|
|
sample_player = pitching_cards_df["player"].iloc[0]
|
|
if isinstance(sample_player, dict):
|
|
# The dict has 'player_id' not 'id'
|
|
pitching_cards_df["player_ref"] = pitching_cards_df["player"].apply(
|
|
lambda x: (
|
|
int(x.get("player_id"))
|
|
if isinstance(x, dict) and x.get("player_id")
|
|
else None
|
|
)
|
|
)
|
|
elif isinstance(sample_player, str):
|
|
# Extract ID from URL like "/api/v2/players/123"
|
|
pitching_cards_df["player_ref"] = (
|
|
pitching_cards_df["player"].str.extract(r"/(\d+)$")[0].astype(int)
|
|
)
|
|
else:
|
|
pitching_cards_df["player_ref"] = pitching_cards_df["player"]
|
|
|
|
# Merge with player data to get rarity
|
|
pitching_with_player = pitching_cards_df.merge(
|
|
players_df[["player_id", "p_name", "rarity_id"]],
|
|
left_on="player_ref",
|
|
right_on="player_id",
|
|
how="left",
|
|
)
|
|
|
|
# Count rarities
|
|
rarity_counts = pitching_with_player["rarity_id"].value_counts().sort_index()
|
|
|
|
rarity_names = {
|
|
1: "Diamond",
|
|
2: "Gold",
|
|
3: "Silver",
|
|
4: "Bronze",
|
|
5: "Common",
|
|
99: "Hall of Fame",
|
|
}
|
|
|
|
logger.info("\nPitching Card Rarity Distribution:")
|
|
total = len(pitching_with_player)
|
|
for rarity_id, count in rarity_counts.items():
|
|
rarity_name = rarity_names.get(rarity_id, f"Unknown ({rarity_id})")
|
|
pct = (count / total) * 100
|
|
logger.info(f"{rarity_name:15} ({rarity_id:2}): {count:5} cards ({pct:5.1f}%)")
|
|
|
|
# Separate starters and relievers
|
|
pitching_with_player["is_starter"] = (
|
|
pitching_with_player["starter_rating"].fillna(0) >= 4
|
|
)
|
|
starters = pitching_with_player[pitching_with_player["is_starter"]]
|
|
relievers = pitching_with_player[~pitching_with_player["is_starter"]]
|
|
|
|
logger.info(f"\nStarters: {len(starters)}, Relievers: {len(relievers)}")
|
|
|
|
# Check for cost anomalies
|
|
logger.info("\n" + "-" * 60)
|
|
logger.info("PITCHING CARD COST ANALYSIS")
|
|
logger.info("-" * 60)
|
|
|
|
expected_costs = {
|
|
5: 20, # Common
|
|
4: 40, # Bronze
|
|
3: 80, # Silver
|
|
2: 160, # Gold
|
|
1: 320, # Diamond
|
|
99: 640, # Hall of Fame
|
|
}
|
|
|
|
# Note: cost is stored on the player record, not the card record
|
|
# We'll check player costs instead
|
|
player_costs = players_df[
|
|
players_df["player_id"].isin(pitching_with_player["player_ref"])
|
|
].copy()
|
|
player_costs_with_rarity = player_costs.merge(
|
|
pitching_with_player[["player_ref", "rarity_id"]].drop_duplicates(),
|
|
left_on="player_id",
|
|
right_on="player_ref",
|
|
how="left",
|
|
suffixes=("", "_card"),
|
|
)
|
|
|
|
cost_issues = []
|
|
for _, player in player_costs_with_rarity.iterrows():
|
|
rarity = player.get("rarity_id_card")
|
|
cost = player.get("cost")
|
|
expected = expected_costs.get(rarity)
|
|
|
|
if cost != expected:
|
|
cost_issues.append(
|
|
{
|
|
"player": player.get("p_name"),
|
|
"player_id": player.get("player_id"),
|
|
"rarity": rarity,
|
|
"actual_cost": cost,
|
|
"expected_cost": expected,
|
|
}
|
|
)
|
|
|
|
if cost_issues:
|
|
logger.warning(f"Found {len(cost_issues)} cost anomalies:")
|
|
for issue in cost_issues[:20]:
|
|
logger.warning(
|
|
f" {issue['player']} (Player ID: {issue['player_id']}): "
|
|
f"Rarity {issue['rarity']} has cost {issue['actual_cost']}, "
|
|
f"expected {issue['expected_cost']}"
|
|
)
|
|
if len(cost_issues) > 20:
|
|
logger.warning(f" ... and {len(cost_issues) - 20} more")
|
|
else:
|
|
logger.info("✓ No cost anomalies found")
|
|
|
|
# Check for OPS-rarity alignment
|
|
logger.info("\n" + "-" * 60)
|
|
logger.info("PITCHING OPS-RARITY ALIGNMENT")
|
|
logger.info("-" * 60)
|
|
|
|
ops_mismatches = []
|
|
for _, card in pitching_with_player.iterrows():
|
|
ops = card.get("total_OPS")
|
|
rarity = card.get("rarity_id")
|
|
is_starter = card.get("is_starter", False)
|
|
|
|
if pd.isna(ops) or ops is None:
|
|
continue
|
|
|
|
if is_starter:
|
|
expected_rarity = thresholds.get_rarity_for_starter(ops)
|
|
else:
|
|
expected_rarity = thresholds.get_rarity_for_reliever(ops)
|
|
|
|
if expected_rarity != rarity:
|
|
ops_mismatches.append(
|
|
{
|
|
"player": card.get("p_name"),
|
|
"card_id": card.get("id"),
|
|
"ops": ops,
|
|
"is_starter": is_starter,
|
|
"actual_rarity": rarity,
|
|
"expected_rarity": expected_rarity,
|
|
}
|
|
)
|
|
|
|
if ops_mismatches:
|
|
logger.warning(f"Found {len(ops_mismatches)} OPS-rarity mismatches:")
|
|
for issue in ops_mismatches[:20]:
|
|
role = "SP" if issue["is_starter"] else "RP"
|
|
logger.warning(
|
|
f" {issue['player']} ({role}, Card ID: {issue['card_id']}): "
|
|
f"OPS {issue['ops']:.3f} assigned rarity {issue['actual_rarity']}, "
|
|
f"expected {issue['expected_rarity']}"
|
|
)
|
|
if len(ops_mismatches) > 20:
|
|
logger.warning(f" ... and {len(ops_mismatches) - 20} more")
|
|
else:
|
|
logger.info("✓ All OPS values align with rarity assignments")
|
|
|
|
logger.info("")
|
|
|
|
|
|
async def main():
|
|
"""Main execution function."""
|
|
import sys
|
|
|
|
# Parse command-line arguments
|
|
if len(sys.argv) >= 2:
|
|
CARDSET_ID = int(sys.argv[1])
|
|
else:
|
|
# Default value if no arguments provided
|
|
CARDSET_ID = 27
|
|
logger.info(f"Using default cardset_id: {CARDSET_ID}")
|
|
logger.info("Usage: python check_cardset_rarity.py <cardset_id>\n")
|
|
|
|
await analyze_cardset(CARDSET_ID)
|
|
|
|
logger.info("=" * 60)
|
|
logger.info("Analysis complete")
|
|
logger.info("=" * 60)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|