422 lines
15 KiB
Python
422 lines
15 KiB
Python
"""
|
|
Script to analyze rarity distribution and card costs for a specific cardset.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from collections import Counter
|
|
import pandas as pd
|
|
from db_calls import db_get
|
|
from rarity_thresholds import get_pitcher_thresholds, get_batter_thresholds
|
|
|
|
# Set up rotating logger
|
|
logger = logging.getLogger(f'{__name__}')
|
|
handler = logging.StreamHandler()
|
|
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
|
logger.addHandler(handler)
|
|
logger.setLevel(logging.INFO)
|
|
|
|
|
|
async def analyze_cardset(cardset_id: int):
|
|
"""Analyze rarity distribution and card costs for a specific cardset."""
|
|
|
|
logger.info(f'Starting analysis for cardset {cardset_id}...\n')
|
|
|
|
# Get cardset info to extract the season year
|
|
c_query = await db_get('cardsets', object_id=cardset_id)
|
|
if c_query is None:
|
|
logger.error(f'Cardset {cardset_id} not found')
|
|
return
|
|
|
|
cardset_name = c_query.get('name', '')
|
|
logger.info(f'Cardset: {cardset_name}')
|
|
|
|
# Extract year from cardset name (e.g., "2005 Live" -> 2005)
|
|
import re
|
|
year_match = re.search(r'(\d{4})', cardset_name)
|
|
if year_match:
|
|
season = int(year_match.group(1))
|
|
logger.info(f'Detected season: {season}')
|
|
else:
|
|
season = 2024 # Default fallback
|
|
logger.warning(f'Could not detect season from cardset name, using default: {season}')
|
|
|
|
logger.info('')
|
|
|
|
# Get all players in this cardset
|
|
p_query = await db_get('players', params=[('cardset_id', cardset_id)])
|
|
if p_query is None or p_query['count'] == 0:
|
|
logger.error('No players found')
|
|
return
|
|
|
|
players_df = pd.DataFrame(p_query['players'])
|
|
logger.info(f'Found {len(players_df)} total players')
|
|
|
|
# Extract rarity ID if it's a dict
|
|
if isinstance(players_df['rarity'].iloc[0], dict):
|
|
players_df['rarity_id'] = players_df['rarity'].apply(lambda x: x['id'] if isinstance(x, dict) else x)
|
|
else:
|
|
players_df['rarity_id'] = players_df['rarity']
|
|
|
|
# Get batting and pitching cards separately
|
|
bc_query = await db_get('battingcards', params=[('cardset_id', cardset_id)])
|
|
pc_query = await db_get('pitchingcards', params=[('cardset_id', cardset_id)])
|
|
|
|
batting_cards_df = pd.DataFrame(bc_query['cards']) if bc_query['count'] > 0 else pd.DataFrame()
|
|
pitching_cards_df = pd.DataFrame(pc_query['cards']) if pc_query['count'] > 0 else pd.DataFrame()
|
|
|
|
logger.info(f'Found {len(batting_cards_df)} batting cards')
|
|
logger.info(f'Found {len(pitching_cards_df)} pitching cards\n')
|
|
|
|
# Get thresholds for the season
|
|
pitcher_thresholds = get_pitcher_thresholds(season)
|
|
batter_thresholds = get_batter_thresholds(season)
|
|
|
|
# Analyze overall rarity distribution
|
|
analyze_overall_rarity(players_df)
|
|
|
|
# Analyze batting cards
|
|
if len(batting_cards_df) > 0:
|
|
analyze_batting_cards(batting_cards_df, players_df, batter_thresholds)
|
|
|
|
# Analyze pitching cards
|
|
if len(pitching_cards_df) > 0:
|
|
analyze_pitching_cards(pitching_cards_df, players_df, pitcher_thresholds)
|
|
|
|
|
|
def analyze_overall_rarity(players_df: pd.DataFrame):
|
|
"""Analyze overall rarity distribution."""
|
|
logger.info('=' * 60)
|
|
logger.info('OVERALL RARITY DISTRIBUTION')
|
|
logger.info('=' * 60)
|
|
|
|
rarity_counts = players_df['rarity_id'].value_counts().sort_index()
|
|
|
|
rarity_names = {
|
|
1: 'Diamond',
|
|
2: 'Gold',
|
|
3: 'Silver',
|
|
4: 'Bronze',
|
|
5: 'Common',
|
|
99: 'Hall of Fame'
|
|
}
|
|
|
|
total = len(players_df)
|
|
for rarity_id, count in rarity_counts.items():
|
|
rarity_name = rarity_names.get(rarity_id, f'Unknown ({rarity_id})')
|
|
pct = (count / total) * 100
|
|
logger.info(f'{rarity_name:15} ({rarity_id:2}): {count:5} cards ({pct:5.1f}%)')
|
|
|
|
logger.info('-' * 60)
|
|
logger.info(f'Total: {total} cards\n')
|
|
|
|
|
|
def analyze_batting_cards(batting_cards_df: pd.DataFrame, players_df: pd.DataFrame, thresholds):
|
|
"""Analyze batting card rarities and costs."""
|
|
logger.info('=' * 60)
|
|
logger.info('BATTING CARD ANALYSIS')
|
|
logger.info('=' * 60)
|
|
|
|
# Extract player ID from player reference (may be URL or dict)
|
|
sample_player = batting_cards_df['player'].iloc[0]
|
|
if isinstance(sample_player, dict):
|
|
# The dict has 'player_id' not 'id'
|
|
batting_cards_df['player_ref'] = batting_cards_df['player'].apply(
|
|
lambda x: int(x.get('player_id')) if isinstance(x, dict) and x.get('player_id') else None
|
|
)
|
|
elif isinstance(sample_player, str):
|
|
# Extract ID from URL like "/api/v2/players/123"
|
|
batting_cards_df['player_ref'] = batting_cards_df['player'].str.extract(r'/(\d+)$')[0].astype(int)
|
|
elif isinstance(sample_player, int):
|
|
batting_cards_df['player_ref'] = batting_cards_df['player']
|
|
else:
|
|
logger.error(f'Unknown player reference type: {type(sample_player)}')
|
|
batting_cards_df['player_ref'] = batting_cards_df['player']
|
|
|
|
# Merge with player data to get rarity
|
|
batting_with_player = batting_cards_df.merge(
|
|
players_df[['player_id', 'p_name', 'rarity_id']],
|
|
left_on='player_ref',
|
|
right_on='player_id',
|
|
how='left'
|
|
)
|
|
|
|
# Count rarities
|
|
rarity_counts = batting_with_player['rarity_id'].value_counts().sort_index()
|
|
|
|
rarity_names = {
|
|
1: 'Diamond',
|
|
2: 'Gold',
|
|
3: 'Silver',
|
|
4: 'Bronze',
|
|
5: 'Common',
|
|
99: 'Hall of Fame'
|
|
}
|
|
|
|
logger.info('\nBatting Card Rarity Distribution:')
|
|
total = len(batting_with_player)
|
|
for rarity_id, count in rarity_counts.items():
|
|
rarity_name = rarity_names.get(rarity_id, f'Unknown ({rarity_id})')
|
|
pct = (count / total) * 100
|
|
logger.info(f'{rarity_name:15} ({rarity_id:2}): {count:5} cards ({pct:5.1f}%)')
|
|
|
|
# Check for cost anomalies
|
|
logger.info('\n' + '-' * 60)
|
|
logger.info('BATTING CARD COST ANALYSIS')
|
|
logger.info('-' * 60)
|
|
|
|
expected_costs = {
|
|
5: 20, # Common
|
|
4: 40, # Bronze
|
|
3: 80, # Silver
|
|
2: 160, # Gold
|
|
1: 320, # Diamond
|
|
99: 640 # Hall of Fame
|
|
}
|
|
|
|
# Note: cost is stored on the player record, not the card record
|
|
# We'll check player costs instead
|
|
player_costs = players_df[players_df['player_id'].isin(batting_with_player['player_ref'])].copy()
|
|
player_costs_with_rarity = player_costs.merge(
|
|
batting_with_player[['player_ref', 'rarity_id']].drop_duplicates(),
|
|
left_on='player_id',
|
|
right_on='player_ref',
|
|
how='left',
|
|
suffixes=('', '_card')
|
|
)
|
|
|
|
cost_issues = []
|
|
for _, player in player_costs_with_rarity.iterrows():
|
|
rarity = player.get('rarity_id_card')
|
|
cost = player.get('cost')
|
|
expected = expected_costs.get(rarity)
|
|
|
|
if cost != expected:
|
|
cost_issues.append({
|
|
'player': player.get('p_name'),
|
|
'player_id': player.get('player_id'),
|
|
'rarity': rarity,
|
|
'actual_cost': cost,
|
|
'expected_cost': expected
|
|
})
|
|
|
|
if cost_issues:
|
|
logger.warning(f'Found {len(cost_issues)} cost anomalies:')
|
|
for issue in cost_issues[:20]: # Show first 20
|
|
logger.warning(f" {issue['player']} (Player ID: {issue['player_id']}): "
|
|
f"Rarity {issue['rarity']} has cost {issue['actual_cost']}, "
|
|
f"expected {issue['expected_cost']}")
|
|
if len(cost_issues) > 20:
|
|
logger.warning(f' ... and {len(cost_issues) - 20} more')
|
|
else:
|
|
logger.info('✓ No cost anomalies found')
|
|
|
|
# Check for OPS-rarity alignment
|
|
logger.info('\n' + '-' * 60)
|
|
logger.info('BATTING OPS-RARITY ALIGNMENT')
|
|
logger.info('-' * 60)
|
|
|
|
ops_mismatches = []
|
|
for _, card in batting_with_player.iterrows():
|
|
ops = card.get('total_OPS')
|
|
rarity = card.get('rarity_id')
|
|
|
|
if pd.isna(ops) or ops is None:
|
|
continue
|
|
|
|
expected_rarity = thresholds.get_rarity(ops)
|
|
|
|
if expected_rarity != rarity:
|
|
ops_mismatches.append({
|
|
'player': card.get('p_name'),
|
|
'card_id': card.get('id'),
|
|
'ops': ops,
|
|
'actual_rarity': rarity,
|
|
'expected_rarity': expected_rarity
|
|
})
|
|
|
|
if ops_mismatches:
|
|
logger.warning(f'Found {len(ops_mismatches)} OPS-rarity mismatches:')
|
|
for issue in ops_mismatches[:20]:
|
|
logger.warning(f" {issue['player']} (Card ID: {issue['card_id']}): "
|
|
f"OPS {issue['ops']:.3f} assigned rarity {issue['actual_rarity']}, "
|
|
f"expected {issue['expected_rarity']}")
|
|
if len(ops_mismatches) > 20:
|
|
logger.warning(f' ... and {len(ops_mismatches) - 20} more')
|
|
else:
|
|
logger.info('✓ All OPS values align with rarity assignments')
|
|
|
|
logger.info('')
|
|
|
|
|
|
def analyze_pitching_cards(pitching_cards_df: pd.DataFrame, players_df: pd.DataFrame, thresholds):
|
|
"""Analyze pitching card rarities and costs."""
|
|
logger.info('=' * 60)
|
|
logger.info('PITCHING CARD ANALYSIS')
|
|
logger.info('=' * 60)
|
|
|
|
# Extract player ID from player reference (may be URL or dict)
|
|
sample_player = pitching_cards_df['player'].iloc[0]
|
|
if isinstance(sample_player, dict):
|
|
# The dict has 'player_id' not 'id'
|
|
pitching_cards_df['player_ref'] = pitching_cards_df['player'].apply(
|
|
lambda x: int(x.get('player_id')) if isinstance(x, dict) and x.get('player_id') else None
|
|
)
|
|
elif isinstance(sample_player, str):
|
|
# Extract ID from URL like "/api/v2/players/123"
|
|
pitching_cards_df['player_ref'] = pitching_cards_df['player'].str.extract(r'/(\d+)$')[0].astype(int)
|
|
else:
|
|
pitching_cards_df['player_ref'] = pitching_cards_df['player']
|
|
|
|
# Merge with player data to get rarity
|
|
pitching_with_player = pitching_cards_df.merge(
|
|
players_df[['player_id', 'p_name', 'rarity_id']],
|
|
left_on='player_ref',
|
|
right_on='player_id',
|
|
how='left'
|
|
)
|
|
|
|
# Count rarities
|
|
rarity_counts = pitching_with_player['rarity_id'].value_counts().sort_index()
|
|
|
|
rarity_names = {
|
|
1: 'Diamond',
|
|
2: 'Gold',
|
|
3: 'Silver',
|
|
4: 'Bronze',
|
|
5: 'Common',
|
|
99: 'Hall of Fame'
|
|
}
|
|
|
|
logger.info('\nPitching Card Rarity Distribution:')
|
|
total = len(pitching_with_player)
|
|
for rarity_id, count in rarity_counts.items():
|
|
rarity_name = rarity_names.get(rarity_id, f'Unknown ({rarity_id})')
|
|
pct = (count / total) * 100
|
|
logger.info(f'{rarity_name:15} ({rarity_id:2}): {count:5} cards ({pct:5.1f}%)')
|
|
|
|
# Separate starters and relievers
|
|
pitching_with_player['is_starter'] = pitching_with_player['starter_rating'].fillna(0) >= 4
|
|
starters = pitching_with_player[pitching_with_player['is_starter']]
|
|
relievers = pitching_with_player[~pitching_with_player['is_starter']]
|
|
|
|
logger.info(f'\nStarters: {len(starters)}, Relievers: {len(relievers)}')
|
|
|
|
# Check for cost anomalies
|
|
logger.info('\n' + '-' * 60)
|
|
logger.info('PITCHING CARD COST ANALYSIS')
|
|
logger.info('-' * 60)
|
|
|
|
expected_costs = {
|
|
5: 20, # Common
|
|
4: 40, # Bronze
|
|
3: 80, # Silver
|
|
2: 160, # Gold
|
|
1: 320, # Diamond
|
|
99: 640 # Hall of Fame
|
|
}
|
|
|
|
# Note: cost is stored on the player record, not the card record
|
|
# We'll check player costs instead
|
|
player_costs = players_df[players_df['player_id'].isin(pitching_with_player['player_ref'])].copy()
|
|
player_costs_with_rarity = player_costs.merge(
|
|
pitching_with_player[['player_ref', 'rarity_id']].drop_duplicates(),
|
|
left_on='player_id',
|
|
right_on='player_ref',
|
|
how='left',
|
|
suffixes=('', '_card')
|
|
)
|
|
|
|
cost_issues = []
|
|
for _, player in player_costs_with_rarity.iterrows():
|
|
rarity = player.get('rarity_id_card')
|
|
cost = player.get('cost')
|
|
expected = expected_costs.get(rarity)
|
|
|
|
if cost != expected:
|
|
cost_issues.append({
|
|
'player': player.get('p_name'),
|
|
'player_id': player.get('player_id'),
|
|
'rarity': rarity,
|
|
'actual_cost': cost,
|
|
'expected_cost': expected
|
|
})
|
|
|
|
if cost_issues:
|
|
logger.warning(f'Found {len(cost_issues)} cost anomalies:')
|
|
for issue in cost_issues[:20]:
|
|
logger.warning(f" {issue['player']} (Player ID: {issue['player_id']}): "
|
|
f"Rarity {issue['rarity']} has cost {issue['actual_cost']}, "
|
|
f"expected {issue['expected_cost']}")
|
|
if len(cost_issues) > 20:
|
|
logger.warning(f' ... and {len(cost_issues) - 20} more')
|
|
else:
|
|
logger.info('✓ No cost anomalies found')
|
|
|
|
# Check for OPS-rarity alignment
|
|
logger.info('\n' + '-' * 60)
|
|
logger.info('PITCHING OPS-RARITY ALIGNMENT')
|
|
logger.info('-' * 60)
|
|
|
|
ops_mismatches = []
|
|
for _, card in pitching_with_player.iterrows():
|
|
ops = card.get('total_OPS')
|
|
rarity = card.get('rarity_id')
|
|
is_starter = card.get('is_starter', False)
|
|
|
|
if pd.isna(ops) or ops is None:
|
|
continue
|
|
|
|
if is_starter:
|
|
expected_rarity = thresholds.get_rarity_for_starter(ops)
|
|
else:
|
|
expected_rarity = thresholds.get_rarity_for_reliever(ops)
|
|
|
|
if expected_rarity != rarity:
|
|
ops_mismatches.append({
|
|
'player': card.get('p_name'),
|
|
'card_id': card.get('id'),
|
|
'ops': ops,
|
|
'is_starter': is_starter,
|
|
'actual_rarity': rarity,
|
|
'expected_rarity': expected_rarity
|
|
})
|
|
|
|
if ops_mismatches:
|
|
logger.warning(f'Found {len(ops_mismatches)} OPS-rarity mismatches:')
|
|
for issue in ops_mismatches[:20]:
|
|
role = 'SP' if issue['is_starter'] else 'RP'
|
|
logger.warning(f" {issue['player']} ({role}, Card ID: {issue['card_id']}): "
|
|
f"OPS {issue['ops']:.3f} assigned rarity {issue['actual_rarity']}, "
|
|
f"expected {issue['expected_rarity']}")
|
|
if len(ops_mismatches) > 20:
|
|
logger.warning(f' ... and {len(ops_mismatches) - 20} more')
|
|
else:
|
|
logger.info('✓ All OPS values align with rarity assignments')
|
|
|
|
logger.info('')
|
|
|
|
|
|
async def main():
|
|
"""Main execution function."""
|
|
import sys
|
|
|
|
# Parse command-line arguments
|
|
if len(sys.argv) >= 2:
|
|
CARDSET_ID = int(sys.argv[1])
|
|
else:
|
|
# Default value if no arguments provided
|
|
CARDSET_ID = 27
|
|
logger.info(f'Using default cardset_id: {CARDSET_ID}')
|
|
logger.info('Usage: python check_cardset_rarity.py <cardset_id>\n')
|
|
|
|
await analyze_cardset(CARDSET_ID)
|
|
|
|
logger.info('=' * 60)
|
|
logger.info('Analysis complete')
|
|
logger.info('=' * 60)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
asyncio.run(main())
|