paper-dynasty-card-creation/.claude/ops-rework/investigate_hof_batters.py
2025-11-08 16:57:35 -06:00

121 lines
5.4 KiB
Python

import asyncio
from db_calls import db_get
import pandas as pd
async def main():
"""Investigate why so many batters have HOF rarity without ratings."""
# Get cardset
c_query = await db_get('cardsets', params=[('name', '2025 Season')])
cardset_id = c_query['cardsets'][0]['id']
print(f'Investigating HOF batters in: 2025 Season (ID: {cardset_id})\n')
# Get all players
p_query = await db_get('players', params=[('cardset_id', cardset_id)])
players_df = pd.DataFrame(p_query['players'])
if isinstance(players_df['rarity'].iloc[0], dict):
players_df['rarity_id'] = players_df['rarity'].apply(lambda x: x['id'] if isinstance(x, dict) else x)
else:
players_df['rarity_id'] = players_df['rarity']
# Get HOF batters
hof_players = players_df[players_df['rarity_id'] == 99]
# Get batting cards
bc_query = await db_get('battingcards', params=[('cardset_id', cardset_id), ('short_output', True)])
batting_cards = pd.DataFrame(bc_query['cards']) if bc_query['count'] > 0 else pd.DataFrame()
# Get batting card ratings
vl_query = await db_get('battingcardratings', params=[
('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True),
('team_id', 31), ('ts', 's37136685556r6135248705')
], timeout=30)
vr_query = await db_get('battingcardratings', params=[
('cardset_id', cardset_id), ('vs_hand', 'R'), ('short_output', True),
('team_id', 31), ('ts', 's37136685556r6135248705')
], timeout=30)
vl_ratings = pd.DataFrame(vl_query['ratings']) if vl_query and vl_query['count'] > 0 else pd.DataFrame()
vr_ratings = pd.DataFrame(vr_query['ratings']) if vr_query and vr_query['count'] > 0 else pd.DataFrame()
print(f"Total HOF players: {len(hof_players)}")
print(f"Total batting cards: {len(batting_cards)}")
print(f"Total batting ratings (vL): {len(vl_ratings)}")
print(f"Total batting ratings (vR): {len(vr_ratings)}")
print()
# Find HOF batters
if len(batting_cards) > 0:
hof_batters = hof_players[hof_players['player_id'].isin(batting_cards['player'])]
print(f"HOF players with batting cards: {len(hof_batters)}")
else:
hof_batters = pd.DataFrame()
print(f"HOF players with batting cards: 0")
# Find HOF batters with ratings
if len(batting_cards) > 0 and len(vl_ratings) > 0:
batting_cards_with_ratings = batting_cards[batting_cards['id'].isin(vl_ratings['battingcard'])]
hof_batters_with_ratings = hof_players[hof_players['player_id'].isin(batting_cards_with_ratings['player'])]
print(f"HOF players with ratings: {len(hof_batters_with_ratings)}")
else:
hof_batters_with_ratings = pd.DataFrame()
print(f"HOF players with ratings: 0")
# Find HOF batters WITHOUT ratings (the problem!)
if len(hof_batters) > 0 and len(hof_batters_with_ratings) > 0:
hof_batters_no_ratings = hof_batters[~hof_batters['player_id'].isin(hof_batters_with_ratings['player_id'])]
elif len(hof_batters) > 0:
hof_batters_no_ratings = hof_batters
else:
hof_batters_no_ratings = pd.DataFrame()
print(f"HOF batters WITHOUT ratings: {len(hof_batters_no_ratings)}")
print()
# Sample of HOF batters without ratings
if len(hof_batters_no_ratings) > 0:
print("Sample of HOF batters WITHOUT ratings (first 20):")
print(f"{'Player Name':<30} {'Player ID':<12} {'Cost':<10} {'Description':<20}")
print("-" * 80)
for _, row in hof_batters_no_ratings.head(20).iterrows():
print(f"{row['p_name']:<30} {row['player_id']:<12} {row['cost']:<10} {row['description']:<20}")
print()
print("Checking if these have batting cards but no ratings...")
if len(batting_cards) > 0:
has_cards = hof_batters_no_ratings['player_id'].isin(batting_cards['player'])
print(f" - Have batting cards: {has_cards.sum()}")
print(f" - No batting cards: {(~has_cards).sum()}")
# Check pitching cards for HOF players
pc_query = await db_get('pitchingcards', params=[('cardset_id', cardset_id), ('short_output', True)])
pitching_cards = pd.DataFrame(pc_query['cards']) if pc_query['count'] > 0 else pd.DataFrame()
print()
print(f"Checking if HOF 'batters' are actually pitchers...")
if len(pitching_cards) > 0 and len(hof_batters_no_ratings) > 0:
hof_are_pitchers = hof_batters_no_ratings['player_id'].isin(pitching_cards['player'])
print(f" - HOF 'batters' who are actually pitchers: {hof_are_pitchers.sum()}")
if hof_are_pitchers.sum() > 0:
print()
print("These HOF players are pitchers, not batters:")
pitcher_hofs = hof_batters_no_ratings[hof_are_pitchers]
for _, row in pitcher_hofs.head(10).iterrows():
print(f" - {row['p_name']} (ID: {row['player_id']})")
# Check the cost field - if cost is 99999, it means new player without calculated cost
if len(hof_batters_no_ratings) > 0:
print()
print("Checking if these are new players (cost = 99999):")
new_players = hof_batters_no_ratings[hof_batters_no_ratings['cost'] == 99999]
existing_players = hof_batters_no_ratings[hof_batters_no_ratings['cost'] != 99999]
print(f" - New players (cost=99999): {len(new_players)}")
print(f" - Existing players: {len(existing_players)}")
if __name__ == '__main__':
asyncio.run(main())