paper-dynasty-card-creation/.claude/ops-rework/check_missing_ratings.py
2025-11-08 16:57:35 -06:00

111 lines
4.9 KiB
Python

import asyncio
from db_calls import db_get
import pandas as pd
async def main():
"""Check which players have batting cards but no ratings."""
c_query = await db_get('cardsets', params=[('name', '2025 Season')])
cardset_id = c_query['cardsets'][0]['id']
print(f'Checking cardset: 2025 Season (ID: {cardset_id})\n')
# Get batting cards
bc_query = await db_get('battingcards', params=[('cardset_id', cardset_id)])
batting_cards = pd.DataFrame(bc_query['cards'])
print(f"Total batting cards: {len(batting_cards)}")
# Get batting ratings
vl_query = await db_get('battingcardratings', params=[
('cardset_id', cardset_id), ('vs_hand', 'L'),
('team_id', 31), ('ts', 's37136685556r6135248705')
], timeout=30)
vr_query = await db_get('battingcardratings', params=[
('cardset_id', cardset_id), ('vs_hand', 'R'),
('team_id', 31), ('ts', 's37136685556r6135248705')
], timeout=30)
vl_ratings = pd.DataFrame(vl_query['ratings']) if vl_query and vl_query['count'] > 0 else pd.DataFrame()
vr_ratings = pd.DataFrame(vr_query['ratings']) if vr_query and vr_query['count'] > 0 else pd.DataFrame()
print(f"Total batting ratings (vL): {len(vl_ratings)}")
print(f"Total batting ratings (vR): {len(vr_ratings)}")
# Debug: print column names and sample data
if len(vl_ratings) > 0:
print(f"\nvL Ratings columns: {vl_ratings.columns.tolist()}")
print(f"Sample vL rating battingcard values: {vl_ratings['battingcard'].head().tolist()}")
print(f"Sample vL rating battingcard types: {type(vl_ratings['battingcard'].iloc[0])}")
print(f"Batting cards 'id' column type: {batting_cards['id'].dtype}")
print(f"Sample batting card IDs: {batting_cards['id'].head().tolist()}")
# Extract battingcard ID from dict if needed
if len(vl_ratings) > 0:
if isinstance(vl_ratings['battingcard'].iloc[0], dict):
vl_ratings['battingcard_id'] = vl_ratings['battingcard'].apply(lambda x: x['id'])
vr_ratings['battingcard_id'] = vr_ratings['battingcard'].apply(lambda x: x['id'])
else:
vl_ratings['battingcard_id'] = vl_ratings['battingcard']
vr_ratings['battingcard_id'] = vr_ratings['battingcard']
# Find cards with and without ratings
cards_with_vl = batting_cards[batting_cards['id'].isin(vl_ratings['battingcard_id'])] if len(vl_ratings) > 0 else pd.DataFrame()
cards_with_vr = batting_cards[batting_cards['id'].isin(vr_ratings['battingcard_id'])] if len(vr_ratings) > 0 else pd.DataFrame()
# Cards should have BOTH vL and vR ratings
if len(vl_ratings) > 0 and len(vr_ratings) > 0:
cards_with_both = batting_cards[
batting_cards['id'].isin(vl_ratings['battingcard_id']) &
batting_cards['id'].isin(vr_ratings['battingcard_id'])
]
else:
cards_with_both = pd.DataFrame()
cards_without_ratings = batting_cards[~batting_cards['id'].isin(cards_with_both['id'])] if len(cards_with_both) > 0 else batting_cards
print(f"\nCards with vL ratings: {len(cards_with_vl)}")
print(f"Cards with vR ratings: {len(cards_with_vr)}")
print(f"Cards with BOTH ratings: {len(cards_with_both)}")
print(f"Cards WITHOUT complete ratings: {len(cards_without_ratings)}")
# Get player info
p_query = await db_get('players', params=[('cardset_id', cardset_id)])
players_df = pd.DataFrame(p_query['players'])
# Merge to get player names
# Extract player ID if it's a dict
if len(cards_without_ratings) > 0 and isinstance(cards_without_ratings['player'].iloc[0], dict):
cards_without_ratings['player_id_extract'] = cards_without_ratings['player'].apply(lambda x: x['player_id'] if isinstance(x, dict) else x)
else:
cards_without_ratings['player_id_extract'] = cards_without_ratings['player']
cards_without = pd.merge(
cards_without_ratings,
players_df[['player_id', 'p_name', 'cost', 'description']],
left_on='player_id_extract',
right_on='player_id'
)
print(f"\nSample of players without complete ratings (first 20):")
print(f"{'Player Name':<30} {'Batting Card ID':<15} {'Cost':<10} {'Description'}")
print("-" * 90)
for _, row in cards_without.head(20).iterrows():
print(f"{row['p_name']:<30} {row['id']:<15} {row['cost']:<10} {row['description']}")
# Check if these are mostly new players (cost=99999)
new_players = cards_without[cards_without['cost'] == 99999]
old_players = cards_without[cards_without['cost'] != 99999]
print(f"\nNew players (cost=99999): {len(new_players)}")
print(f"Existing players: {len(old_players)}")
if len(old_players) > 0:
print(f"\nExisting players WITHOUT ratings:")
for _, row in old_players.head(10).iterrows():
print(f" - {row['p_name']} (Card ID: {row['id']}, Cost: {row['cost']})")
if __name__ == '__main__':
asyncio.run(main())