paper-dynasty-card-creation/.claude/ops-rework/check_prod_missing_ratings.py
2025-11-08 16:57:35 -06:00

184 lines
7.7 KiB
Python

import asyncio
import aiohttp
import pandas as pd
AUTH_TOKEN = {'Authorization': f'Bearer Tp3aO3jhYve5NJF1IqOmJTmk'}
PROD_URL = 'https://pd.manticorum.com/api'
async def db_get_prod(endpoint: str, params: list = None, timeout: int = 30):
"""Get data from production API."""
req_url = f'{PROD_URL}/v2/{endpoint}'
if params:
param_str = '&'.join([f'{k}={v}' for k, v in params])
req_url += f'?{param_str}'
async with aiohttp.ClientSession(headers=AUTH_TOKEN) as session:
async with session.get(req_url, timeout=aiohttp.ClientTimeout(total=timeout)) as r:
if r.status == 200:
return await r.json()
else:
print(f"Error {r.status}: {await r.text()}")
return None
async def main():
"""Check production database for missing ratings."""
print("Checking PRODUCTION database")
print("=" * 80)
# Get cardset
c_query = await db_get_prod('cardsets', params=[('name', '2025 Season')])
if not c_query or c_query['count'] == 0:
print("2025 Season cardset not found in production")
return
cardset_id = c_query['cardsets'][0]['id']
print(f'Cardset: 2025 Season (ID: {cardset_id})\n')
# Get batting cards
bc_query = await db_get_prod('battingcards', params=[('cardset_id', cardset_id)])
batting_cards = pd.DataFrame(bc_query['cards']) if bc_query and bc_query['count'] > 0 else pd.DataFrame()
print(f"Total batting cards: {len(batting_cards)}")
# Get batting ratings
vl_query = await db_get_prod('battingcardratings', params=[
('cardset_id', cardset_id), ('vs_hand', 'L'),
('team_id', 31), ('ts', 's37136685556r6135248705')
], timeout=60)
vr_query = await db_get_prod('battingcardratings', params=[
('cardset_id', cardset_id), ('vs_hand', 'R'),
('team_id', 31), ('ts', 's37136685556r6135248705')
], timeout=60)
vl_ratings = pd.DataFrame(vl_query['ratings']) if vl_query and vl_query['count'] > 0 else pd.DataFrame()
vr_ratings = pd.DataFrame(vr_query['ratings']) if vr_query and vr_query['count'] > 0 else pd.DataFrame()
print(f"Total batting ratings (vL): {len(vl_ratings)}")
print(f"Total batting ratings (vR): {len(vr_ratings)}")
# Extract battingcard IDs
if len(vl_ratings) > 0:
if isinstance(vl_ratings['battingcard'].iloc[0], dict):
vl_ratings['battingcard_id'] = vl_ratings['battingcard'].apply(lambda x: x['id'])
vr_ratings['battingcard_id'] = vr_ratings['battingcard'].apply(lambda x: x['id'])
else:
vl_ratings['battingcard_id'] = vl_ratings['battingcard']
vr_ratings['battingcard_id'] = vr_ratings['battingcard']
# Find cards with both ratings
cards_with_both = batting_cards[
batting_cards['id'].isin(vl_ratings['battingcard_id']) &
batting_cards['id'].isin(vr_ratings['battingcard_id'])
]
else:
cards_with_both = pd.DataFrame()
cards_without_ratings = batting_cards[~batting_cards['id'].isin(cards_with_both['id'])] if len(cards_with_both) > 0 else batting_cards
print(f"\nCards with BOTH ratings: {len(cards_with_both)}")
print(f"Cards WITHOUT complete ratings: {len(cards_without_ratings)}")
# Get player info for cards without ratings
if len(cards_without_ratings) > 0:
p_query = await db_get_prod('players', params=[('cardset_id', cardset_id)])
players_df = pd.DataFrame(p_query['players']) if p_query else pd.DataFrame()
# Extract player ID
if isinstance(cards_without_ratings['player'].iloc[0], dict):
cards_without_ratings['player_id_extract'] = cards_without_ratings['player'].apply(
lambda x: x['player_id'] if isinstance(x, dict) else x
)
else:
cards_without_ratings['player_id_extract'] = cards_without_ratings['player']
cards_without = pd.merge(
cards_without_ratings,
players_df[['player_id', 'p_name', 'cost', 'description']],
left_on='player_id_extract',
right_on='player_id',
how='left'
)
print(f"\nSample of players without ratings (first 20):")
print(f"{'Player Name':<30} {'Card ID':<10} {'Cost':<10} {'Description'}")
print("-" * 80)
for _, row in cards_without.head(20).iterrows():
name = row.get('p_name', 'Unknown')
card_id = row.get('id', 'N/A')
cost = row.get('cost', 'N/A')
desc = row.get('description', 'N/A')
print(f"{name:<30} {card_id:<10} {cost:<10} {desc}")
# Count new vs existing
new_players = cards_without[cards_without['cost'] == 99999]
old_players = cards_without[cards_without['cost'] != 99999]
print(f"\nNew players (cost=99999): {len(new_players)}")
print(f"Existing players: {len(old_players)}")
# Check rarity distribution for cards without ratings
if 'rarity' in players_df.columns:
players_without_ratings = players_df[players_df['player_id'].isin(cards_without['player_id'])]
if isinstance(players_without_ratings['rarity'].iloc[0], dict):
players_without_ratings['rarity_id'] = players_without_ratings['rarity'].apply(
lambda x: x['id'] if isinstance(x, dict) else x
)
else:
players_without_ratings['rarity_id'] = players_without_ratings['rarity']
rarity_counts = players_without_ratings['rarity_id'].value_counts().sort_index()
rarity_names = {99: 'Hall of Fame', 1: 'Diamond', 2: 'Gold', 3: 'Silver', 4: 'Bronze', 5: 'Common'}
print(f"\nRarity distribution for players WITHOUT ratings:")
print(f" {'Rarity':<15} {'Count':<10}")
print(" " + "-" * 30)
for rarity_id, count in rarity_counts.items():
name = rarity_names.get(rarity_id, f'Unknown ({rarity_id})')
print(f" {name:<15} {count:<10}")
# Check pitching cards too
print("\n" + "=" * 80)
print("Checking PITCHING cards:")
print("=" * 80)
pc_query = await db_get_prod('pitchingcards', params=[('cardset_id', cardset_id)])
pitching_cards = pd.DataFrame(pc_query['cards']) if pc_query and pc_query['count'] > 0 else pd.DataFrame()
print(f"\nTotal pitching cards: {len(pitching_cards)}")
pvl_query = await db_get_prod('pitchingcardratings', params=[
('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True)
], timeout=60)
pvr_query = await db_get_prod('pitchingcardratings', params=[
('cardset_id', cardset_id), ('vs_hand', 'R'), ('short_output', True)
], timeout=60)
pvl_ratings = pd.DataFrame(pvl_query['ratings']) if pvl_query and pvl_query['count'] > 0 else pd.DataFrame()
pvr_ratings = pd.DataFrame(pvr_query['ratings']) if pvr_query and pvr_query['count'] > 0 else pd.DataFrame()
print(f"Total pitching ratings (vL): {len(pvl_ratings)}")
print(f"Total pitching ratings (vR): {len(pvr_ratings)}")
if len(pvl_ratings) > 0:
# Check for missing pitching ratings
pvl_ratings['pitchingcard_id'] = pvl_ratings['pitchingcard']
pvr_ratings['pitchingcard_id'] = pvr_ratings['pitchingcard']
pitchers_with_both = pitching_cards[
pitching_cards['id'].isin(pvl_ratings['pitchingcard_id']) &
pitching_cards['id'].isin(pvr_ratings['pitchingcard_id'])
]
pitchers_without = pitching_cards[~pitching_cards['id'].isin(pitchers_with_both['id'])]
print(f"\nPitchers with BOTH ratings: {len(pitchers_with_both)}")
print(f"Pitchers WITHOUT complete ratings: {len(pitchers_without)}")
if __name__ == '__main__':
asyncio.run(main())