#!/usr/bin/env python3 """Debug the 508 emergency players to understand why they weren't matched""" import json import csv from comprehensive_player_matching import normalize_name, create_name_variants def main(): # Load data all_players = [] for season in range(1, 13): with open(f"/tmp/players_season_{season}.json", 'r') as f: season_data = json.load(f) all_players.extend(season_data) with open('/tmp/sbaplayers.json', 'r') as f: sbaplayers = json.load(f) # Read the assignment file to identify emergency players emergency_players = [] with open('player_sbaplayer_assignments.csv', 'r') as f: reader = csv.DictReader(f) for row in reader: if row['assignment_source'] == 'emergency_new': emergency_players.append({ 'player_id': int(row['player_id']), 'name': row['player_name'], 'season': int(row['season']), 'bbref_id': row['bbref_id'] }) print(f"Found {len(emergency_players)} emergency assignments") # Check some samples to see why they weren't matched sample_emergencies = emergency_players[:10] # Create SbaPlayer lookup maps sbaplayer_name_map = {} sbaplayer_bbref_map = {} for sba in sbaplayers: full_name = f"{sba['first_name']} {sba['last_name']}" normalized = normalize_name(full_name) sbaplayer_name_map[normalized] = sba if sba.get('key_bbref'): sbaplayer_bbref_map[sba['key_bbref']] = sba print("\nAnalyzing sample emergency players:") print("-" * 50) for emergency in sample_emergencies: print(f"\nPlayer: {emergency['name']} (Season {emergency['season']})") print(f" bbref_id: {emergency['bbref_id']}") # Check bbref_id match if emergency['bbref_id'] and emergency['bbref_id'] in sbaplayer_bbref_map: sba = sbaplayer_bbref_map[emergency['bbref_id']] print(f" 🔍 BBREF MATCH FOUND: {sba['first_name']} {sba['last_name']} (ID {sba['id']})") print(f" ❌ This should have been Tier 1 matched!") elif emergency['bbref_id']: print(f" ❌ bbref_id '{emergency['bbref_id']}' not found in SbaPlayer records") # Check name match normalized = normalize_name(emergency['name']) if normalized in sbaplayer_name_map: sba = sbaplayer_name_map[normalized] print(f" 🔍 NAME MATCH FOUND: {sba['first_name']} {sba['last_name']} (ID {sba['id']})") print(f" ❌ This should have been Tier 2 matched!") else: print(f" ❌ Normalized name '{normalized}' not found in SbaPlayer records") # Check for similar names similar_names = [name for name in sbaplayer_name_map.keys() if emergency['name'].lower() in name.lower() or name.lower() in emergency['name'].lower()] if similar_names[:3]: print(f" Similar names: {similar_names[:3]}") # Check season distribution of emergency players season_counts = {} for ep in emergency_players: season = ep['season'] season_counts[season] = season_counts.get(season, 0) + 1 print(f"\nSeason distribution of emergency players:") for season in sorted(season_counts.keys()): print(f" Season {season}: {season_counts[season]} players") # Check if these players exist in the original data but were somehow missed print(f"\nChecking if emergency players were in original comprehensive matching scope...") emergency_names = set(ep['name'] for ep in emergency_players) original_unmatched_names = set() # Try to load from matching report if it exists try: with open('matching_report.txt', 'r') as f: content = f.read() # This is rough parsing - just to get an idea if "SAMPLE UNMATCHED PLAYERS" in content: print("Found original unmatched players list in matching report") except FileNotFoundError: print("No matching report found") if __name__ == "__main__": main()