105 lines
4.2 KiB
Python
105 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Debug the 508 emergency players to understand why they weren't matched"""
|
|
|
|
import json
|
|
import csv
|
|
from comprehensive_player_matching import normalize_name, create_name_variants
|
|
|
|
def main():
|
|
# Load data
|
|
all_players = []
|
|
for season in range(1, 13):
|
|
with open(f"/tmp/players_season_{season}.json", 'r') as f:
|
|
season_data = json.load(f)
|
|
all_players.extend(season_data)
|
|
|
|
with open('/tmp/sbaplayers.json', 'r') as f:
|
|
sbaplayers = json.load(f)
|
|
|
|
# Read the assignment file to identify emergency players
|
|
emergency_players = []
|
|
with open('player_sbaplayer_assignments.csv', 'r') as f:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
if row['assignment_source'] == 'emergency_new':
|
|
emergency_players.append({
|
|
'player_id': int(row['player_id']),
|
|
'name': row['player_name'],
|
|
'season': int(row['season']),
|
|
'bbref_id': row['bbref_id']
|
|
})
|
|
|
|
print(f"Found {len(emergency_players)} emergency assignments")
|
|
|
|
# Check some samples to see why they weren't matched
|
|
sample_emergencies = emergency_players[:10]
|
|
|
|
# Create SbaPlayer lookup maps
|
|
sbaplayer_name_map = {}
|
|
sbaplayer_bbref_map = {}
|
|
for sba in sbaplayers:
|
|
full_name = f"{sba['first_name']} {sba['last_name']}"
|
|
normalized = normalize_name(full_name)
|
|
sbaplayer_name_map[normalized] = sba
|
|
|
|
if sba.get('key_bbref'):
|
|
sbaplayer_bbref_map[sba['key_bbref']] = sba
|
|
|
|
print("\nAnalyzing sample emergency players:")
|
|
print("-" * 50)
|
|
|
|
for emergency in sample_emergencies:
|
|
print(f"\nPlayer: {emergency['name']} (Season {emergency['season']})")
|
|
print(f" bbref_id: {emergency['bbref_id']}")
|
|
|
|
# Check bbref_id match
|
|
if emergency['bbref_id'] and emergency['bbref_id'] in sbaplayer_bbref_map:
|
|
sba = sbaplayer_bbref_map[emergency['bbref_id']]
|
|
print(f" 🔍 BBREF MATCH FOUND: {sba['first_name']} {sba['last_name']} (ID {sba['id']})")
|
|
print(f" ❌ This should have been Tier 1 matched!")
|
|
elif emergency['bbref_id']:
|
|
print(f" ❌ bbref_id '{emergency['bbref_id']}' not found in SbaPlayer records")
|
|
|
|
# Check name match
|
|
normalized = normalize_name(emergency['name'])
|
|
if normalized in sbaplayer_name_map:
|
|
sba = sbaplayer_name_map[normalized]
|
|
print(f" 🔍 NAME MATCH FOUND: {sba['first_name']} {sba['last_name']} (ID {sba['id']})")
|
|
print(f" ❌ This should have been Tier 2 matched!")
|
|
else:
|
|
print(f" ❌ Normalized name '{normalized}' not found in SbaPlayer records")
|
|
|
|
# Check for similar names
|
|
similar_names = [name for name in sbaplayer_name_map.keys()
|
|
if emergency['name'].lower() in name.lower() or name.lower() in emergency['name'].lower()]
|
|
if similar_names[:3]:
|
|
print(f" Similar names: {similar_names[:3]}")
|
|
|
|
# Check season distribution of emergency players
|
|
season_counts = {}
|
|
for ep in emergency_players:
|
|
season = ep['season']
|
|
season_counts[season] = season_counts.get(season, 0) + 1
|
|
|
|
print(f"\nSeason distribution of emergency players:")
|
|
for season in sorted(season_counts.keys()):
|
|
print(f" Season {season}: {season_counts[season]} players")
|
|
|
|
# Check if these players exist in the original data but were somehow missed
|
|
print(f"\nChecking if emergency players were in original comprehensive matching scope...")
|
|
|
|
emergency_names = set(ep['name'] for ep in emergency_players)
|
|
original_unmatched_names = set()
|
|
|
|
# Try to load from matching report if it exists
|
|
try:
|
|
with open('matching_report.txt', 'r') as f:
|
|
content = f.read()
|
|
# This is rough parsing - just to get an idea
|
|
if "SAMPLE UNMATCHED PLAYERS" in content:
|
|
print("Found original unmatched players list in matching report")
|
|
except FileNotFoundError:
|
|
print("No matching report found")
|
|
|
|
if __name__ == "__main__":
|
|
main() |