major-domo-database/.claude/sqlite-to-postgres/player-to-sbaplayer-matching/debug_emergency_players.py
Cal Corum 7130a1fd43 Postgres Migration
Migration documentation and scripts
2025-08-25 07:18:31 -05:00

105 lines
4.2 KiB
Python

#!/usr/bin/env python3
"""Debug the 508 emergency players to understand why they weren't matched"""
import json
import csv
from comprehensive_player_matching import normalize_name, create_name_variants
def main():
# Load data
all_players = []
for season in range(1, 13):
with open(f"/tmp/players_season_{season}.json", 'r') as f:
season_data = json.load(f)
all_players.extend(season_data)
with open('/tmp/sbaplayers.json', 'r') as f:
sbaplayers = json.load(f)
# Read the assignment file to identify emergency players
emergency_players = []
with open('player_sbaplayer_assignments.csv', 'r') as f:
reader = csv.DictReader(f)
for row in reader:
if row['assignment_source'] == 'emergency_new':
emergency_players.append({
'player_id': int(row['player_id']),
'name': row['player_name'],
'season': int(row['season']),
'bbref_id': row['bbref_id']
})
print(f"Found {len(emergency_players)} emergency assignments")
# Check some samples to see why they weren't matched
sample_emergencies = emergency_players[:10]
# Create SbaPlayer lookup maps
sbaplayer_name_map = {}
sbaplayer_bbref_map = {}
for sba in sbaplayers:
full_name = f"{sba['first_name']} {sba['last_name']}"
normalized = normalize_name(full_name)
sbaplayer_name_map[normalized] = sba
if sba.get('key_bbref'):
sbaplayer_bbref_map[sba['key_bbref']] = sba
print("\nAnalyzing sample emergency players:")
print("-" * 50)
for emergency in sample_emergencies:
print(f"\nPlayer: {emergency['name']} (Season {emergency['season']})")
print(f" bbref_id: {emergency['bbref_id']}")
# Check bbref_id match
if emergency['bbref_id'] and emergency['bbref_id'] in sbaplayer_bbref_map:
sba = sbaplayer_bbref_map[emergency['bbref_id']]
print(f" 🔍 BBREF MATCH FOUND: {sba['first_name']} {sba['last_name']} (ID {sba['id']})")
print(f" ❌ This should have been Tier 1 matched!")
elif emergency['bbref_id']:
print(f" ❌ bbref_id '{emergency['bbref_id']}' not found in SbaPlayer records")
# Check name match
normalized = normalize_name(emergency['name'])
if normalized in sbaplayer_name_map:
sba = sbaplayer_name_map[normalized]
print(f" 🔍 NAME MATCH FOUND: {sba['first_name']} {sba['last_name']} (ID {sba['id']})")
print(f" ❌ This should have been Tier 2 matched!")
else:
print(f" ❌ Normalized name '{normalized}' not found in SbaPlayer records")
# Check for similar names
similar_names = [name for name in sbaplayer_name_map.keys()
if emergency['name'].lower() in name.lower() or name.lower() in emergency['name'].lower()]
if similar_names[:3]:
print(f" Similar names: {similar_names[:3]}")
# Check season distribution of emergency players
season_counts = {}
for ep in emergency_players:
season = ep['season']
season_counts[season] = season_counts.get(season, 0) + 1
print(f"\nSeason distribution of emergency players:")
for season in sorted(season_counts.keys()):
print(f" Season {season}: {season_counts[season]} players")
# Check if these players exist in the original data but were somehow missed
print(f"\nChecking if emergency players were in original comprehensive matching scope...")
emergency_names = set(ep['name'] for ep in emergency_players)
original_unmatched_names = set()
# Try to load from matching report if it exists
try:
with open('matching_report.txt', 'r') as f:
content = f.read()
# This is rough parsing - just to get an idea
if "SAMPLE UNMATCHED PLAYERS" in content:
print("Found original unmatched players list in matching report")
except FileNotFoundError:
print("No matching report found")
if __name__ == "__main__":
main()