paper-dynasty-card-creation/.claude/ops-rework/analyze_all_ops_distribution.py

import asyncio
from db_calls import db_get
import pandas as pd
import numpy as np


async def main():
    """Analyze OPS distribution for ALL pitchers to find proper thresholds"""

    # Get cardset
    c_query = await db_get('cardsets', params=[('name', '2025 Season')])
    cardset_id = c_query['cardsets'][0]['id']
    print(f'Analyzing all pitchers in: 2025 Season (ID: {cardset_id})\n')

    # Get pitching cards
    pc_query = await db_get('pitchingcards', params=[('cardset_id', cardset_id), ('short_output', True)])
    pitching_cards = pd.DataFrame(pc_query['cards'])

    vl_query = await db_get('pitchingcardratings', params=[('cardset_id', cardset_id), ('vs_hand', 'L'), ('short_output', True)], timeout=30)
    vr_query = await db_get('pitchingcardratings', params=[('cardset_id', cardset_id), ('vs_hand', 'R'), ('short_output', True)], timeout=30)

    vl = pd.DataFrame(vl_query['ratings'])
    vr = pd.DataFrame(vr_query['ratings'])

    # Calculate OPS for each hand
    vl['ops_vL'] = vl['obp'] + vl['slg']
    vr['ops_vR'] = vr['obp'] + vr['slg']

    # Merge
    pit_ratings = pd.merge(vl[['pitchingcard', 'ops_vL']], vr[['pitchingcard', 'ops_vR']], on='pitchingcard')
    pit_ratings = pd.merge(pitching_cards[['id', 'player', 'starter_rating']], pit_ratings, left_on='id', right_on='pitchingcard')

    # Calculate total_OPS using max (as in the code)
    pit_ratings['total_OPS'] = (pit_ratings['ops_vR'] + pit_ratings['ops_vL'] + pit_ratings[['ops_vL', 'ops_vR']].max(axis=1)) / 3

    # Split by starter/reliever
    starters = pit_ratings[pit_ratings['starter_rating'] > 3]
    relievers = pit_ratings[pit_ratings['starter_rating'] <= 3]

    print("=" * 80)
    print(f"STARTERS (n={len(starters)})")
    print("=" * 80)
    print(f"\nOPS-Against Distribution:")
    print(f"  Min:    {starters['total_OPS'].min():.3f}")
    print(f"  5th %:  {starters['total_OPS'].quantile(0.05):.3f}")
    print(f"  10th %: {starters['total_OPS'].quantile(0.10):.3f}")
    print(f"  25th %: {starters['total_OPS'].quantile(0.25):.3f}")
    print(f"  Median: {starters['total_OPS'].quantile(0.50):.3f}")
    print(f"  75th %: {starters['total_OPS'].quantile(0.75):.3f}")
    print(f"  90th %: {starters['total_OPS'].quantile(0.90):.3f}")
    print(f"  95th %: {starters['total_OPS'].quantile(0.95):.3f}")
    print(f"  Max:    {starters['total_OPS'].max():.3f}")

    print(f"\n\nCurrent Thresholds vs Reality:")
    print(f"  {'Rarity':<15} {'Threshold':<12} {'Count at/below':<20} {'% of Total':<12}")
    print("-" * 80)

    thresholds = [
        ('Hall of Fame', 0.4),
        ('Diamond', 0.475),
        ('Gold', 0.53),
        ('Silver', 0.6),
        ('Bronze', 0.675)
    ]

    for name, thresh in thresholds:
        count = len(starters[starters['total_OPS'] <= thresh])
        pct = (count / len(starters)) * 100
        print(f"  {name:<15} <= {thresh:<9.3f} {count:<20} {pct:>5.1f}%")

    print(f"\n\nSuggested Thresholds (using percentiles):")
    print(f"  Hall of Fame (top 5%):   <= {starters['total_OPS'].quantile(0.05):.3f}")
    print(f"  Diamond (top 15%):       <= {starters['total_OPS'].quantile(0.15):.3f}")
    print(f"  Gold (top 30%):          <= {starters['total_OPS'].quantile(0.30):.3f}")
    print(f"  Silver (top 50%):        <= {starters['total_OPS'].quantile(0.50):.3f}")
    print(f"  Bronze (top 70%):        <= {starters['total_OPS'].quantile(0.70):.3f}")

    print("\n" + "=" * 80)
    print(f"RELIEVERS (n={len(relievers)})")
    print("=" * 80)
    print(f"\nOPS-Against Distribution:")
    print(f"  Min:    {relievers['total_OPS'].min():.3f}")
    print(f"  5th %:  {relievers['total_OPS'].quantile(0.05):.3f}")
    print(f"  10th %: {relievers['total_OPS'].quantile(0.10):.3f}")
    print(f"  25th %: {relievers['total_OPS'].quantile(0.25):.3f}")
    print(f"  Median: {relievers['total_OPS'].quantile(0.50):.3f}")
    print(f"  75th %: {relievers['total_OPS'].quantile(0.75):.3f}")
    print(f"  90th %: {relievers['total_OPS'].quantile(0.90):.3f}")
    print(f"  95th %: {relievers['total_OPS'].quantile(0.95):.3f}")
    print(f"  Max:    {relievers['total_OPS'].max():.3f}")

    print(f"\n\nCurrent Thresholds vs Reality:")
    print(f"  {'Rarity':<15} {'Threshold':<12} {'Count at/below':<20} {'% of Total':<12}")
    print("-" * 80)

    thresholds = [
        ('Hall of Fame', 0.325),
        ('Diamond', 0.4),
        ('Gold', 0.475),
        ('Silver', 0.55),
        ('Bronze', 0.625)
    ]

    for name, thresh in thresholds:
        count = len(relievers[relievers['total_OPS'] <= thresh])
        pct = (count / len(relievers)) * 100
        print(f"  {name:<15} <= {thresh:<9.3f} {count:<20} {pct:>5.1f}%")

    print(f"\n\nSuggested Thresholds (using percentiles):")
    print(f"  Hall of Fame (top 5%):   <= {relievers['total_OPS'].quantile(0.05):.3f}")
    print(f"  Diamond (top 15%):       <= {relievers['total_OPS'].quantile(0.15):.3f}")
    print(f"  Gold (top 30%):          <= {relievers['total_OPS'].quantile(0.30):.3f}")
    print(f"  Silver (top 50%):        <= {relievers['total_OPS'].quantile(0.50):.3f}")
    print(f"  Bronze (top 70%):        <= {relievers['total_OPS'].quantile(0.70):.3f}")


if __name__ == '__main__':
    asyncio.run(main())