252 lines
9.3 KiB
Python
252 lines
9.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script to analyze player data for sbaplayer matching strategy
|
|
Uses hierarchical matching: bbref_id (Player) -> key_bbref (SbaPlayer) -> name fallback
|
|
"""
|
|
import requests
|
|
import json
|
|
import logging
|
|
import os
|
|
from dataclasses import dataclass, asdict
|
|
from typing import Dict, List, Set, Optional
|
|
from collections import defaultdict
|
|
|
|
# Set up logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.FileHandler('/tmp/player_analysis.log'),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
logger = logging.getLogger('PlayerAnalysis')
|
|
|
|
@dataclass
|
|
class PlayerRecord:
|
|
id: int
|
|
name: str
|
|
season: int
|
|
bbref_id: Optional[str] = None
|
|
sbaplayer_id: Optional[int] = None
|
|
|
|
@dataclass
|
|
class SbaPlayerRecord:
|
|
id: int
|
|
first_name: str
|
|
last_name: str
|
|
key_bbref: Optional[str] = None
|
|
key_fangraphs: Optional[int] = None
|
|
key_mlbam: Optional[int] = None
|
|
key_retro: Optional[str] = None
|
|
|
|
def fetch_players_for_season(season: int, use_cache: bool = True) -> List[PlayerRecord]:
|
|
"""Fetch all players for a given season, with caching"""
|
|
cache_file = f"/tmp/players_season_{season}.json"
|
|
|
|
# Try to load from cache first
|
|
if use_cache and os.path.exists(cache_file):
|
|
try:
|
|
with open(cache_file, 'r') as f:
|
|
cached_data = json.load(f)
|
|
|
|
players = []
|
|
for player_data in cached_data:
|
|
player = PlayerRecord(**player_data)
|
|
players.append(player)
|
|
|
|
logger.info(f"Season {season}: {len(players)} players loaded from cache")
|
|
return players
|
|
except Exception as e:
|
|
logger.warning(f"Failed to load cache for season {season}: {e}")
|
|
|
|
# Fetch from API
|
|
try:
|
|
url = f"https://api.sba.manticorum.com/players?season={season}"
|
|
response = requests.get(url, timeout=30)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
players = []
|
|
for player_data in data.get('players', []):
|
|
player = PlayerRecord(
|
|
id=player_data['id'],
|
|
name=player_data['name'],
|
|
season=season,
|
|
bbref_id=player_data.get('bbref_id') if player_data.get('bbref_id') else None,
|
|
sbaplayer_id=player_data.get('sbaplayer_id')
|
|
)
|
|
players.append(player)
|
|
|
|
# Save to cache
|
|
try:
|
|
with open(cache_file, 'w') as f:
|
|
json.dump([asdict(p) for p in players], f, indent=2)
|
|
logger.info(f"Season {season}: {len(players)} players fetched and cached")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to cache season {season} data: {e}")
|
|
|
|
return players
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching season {season}: {e}")
|
|
return []
|
|
|
|
def fetch_sbaplayers(use_cache: bool = True) -> List[SbaPlayerRecord]:
|
|
"""Fetch all SBA players, with caching"""
|
|
cache_file = "/tmp/sbaplayers.json"
|
|
|
|
# Try to load from cache first
|
|
if use_cache and os.path.exists(cache_file):
|
|
try:
|
|
with open(cache_file, 'r') as f:
|
|
cached_data = json.load(f)
|
|
|
|
sbaplayers = []
|
|
for sba_data in cached_data:
|
|
sbaplayer = SbaPlayerRecord(**sba_data)
|
|
sbaplayers.append(sbaplayer)
|
|
|
|
logger.info(f"SbaPlayers: {len(sbaplayers)} records loaded from cache")
|
|
return sbaplayers
|
|
except Exception as e:
|
|
logger.warning(f"Failed to load sbaplayers cache: {e}")
|
|
|
|
# Fetch from API
|
|
try:
|
|
url = "https://api.sba.manticorum.com/sbaplayers"
|
|
response = requests.get(url, timeout=30)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
sbaplayers = []
|
|
for sba_data in data.get('players', []):
|
|
sbaplayer = SbaPlayerRecord(
|
|
id=sba_data['id'],
|
|
first_name=sba_data['first_name'],
|
|
last_name=sba_data['last_name'],
|
|
key_bbref=sba_data.get('key_bbref') if sba_data.get('key_bbref') else None,
|
|
key_fangraphs=sba_data.get('key_fangraphs'),
|
|
key_mlbam=sba_data.get('key_mlbam'),
|
|
key_retro=sba_data.get('key_retro') if sba_data.get('key_retro') else None
|
|
)
|
|
sbaplayers.append(sbaplayer)
|
|
|
|
# Save to cache
|
|
try:
|
|
with open(cache_file, 'w') as f:
|
|
json.dump([asdict(s) for s in sbaplayers], f, indent=2)
|
|
logger.info(f"SbaPlayers: {len(sbaplayers)} records fetched and cached")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to cache sbaplayers data: {e}")
|
|
|
|
return sbaplayers
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error fetching sbaplayers: {e}")
|
|
return []
|
|
|
|
def analyze_matching_potential():
|
|
"""Analyze potential matches between players and sbaplayers"""
|
|
|
|
# Fetch all data
|
|
logger.info("Fetching SbaPlayer records...")
|
|
sbaplayers = fetch_sbaplayers()
|
|
|
|
logger.info("Fetching Player records from all seasons...")
|
|
all_players: List[PlayerRecord] = []
|
|
for season in range(1, 13):
|
|
players = fetch_players_for_season(season)
|
|
all_players.extend(players)
|
|
|
|
logger.info(f"Total player-season records: {len(all_players)}")
|
|
|
|
# Create lookup maps
|
|
sbaplayer_by_bbref = {}
|
|
sbaplayer_by_name = {}
|
|
|
|
for sba in sbaplayers:
|
|
if sba.key_bbref:
|
|
sbaplayer_by_bbref[sba.key_bbref] = sba
|
|
full_name = f"{sba.first_name} {sba.last_name}"
|
|
sbaplayer_by_name[full_name] = sba
|
|
|
|
logger.info(f"SbaPlayers with key_bbref: {len(sbaplayer_by_bbref)}")
|
|
logger.info(f"Total unique SbaPlayers: {len(sbaplayers)}")
|
|
|
|
# Analysis of Player records
|
|
players_with_bbref = [p for p in all_players if p.bbref_id]
|
|
players_without_bbref = [p for p in all_players if not p.bbref_id]
|
|
|
|
logger.info(f"Player records with bbref_id: {len(players_with_bbref)} ({len(players_with_bbref)/len(all_players)*100:.1f}%)")
|
|
logger.info(f"Player records without bbref_id: {len(players_without_bbref)} ({len(players_without_bbref)/len(all_players)*100:.1f}%)")
|
|
|
|
# Try matching by bbref_id
|
|
bbref_matches = 0
|
|
bbref_no_matches = 0
|
|
|
|
for player in players_with_bbref:
|
|
if player.bbref_id in sbaplayer_by_bbref:
|
|
bbref_matches += 1
|
|
else:
|
|
bbref_no_matches += 1
|
|
|
|
logger.info(f"Player records that can match by bbref_id: {bbref_matches}")
|
|
logger.info(f"Player records with bbref_id but no SbaPlayer match: {bbref_no_matches}")
|
|
|
|
# Group remaining players by unique identifiers
|
|
unique_bbref_players = defaultdict(list) # players grouped by bbref_id
|
|
unique_name_players = defaultdict(list) # players without bbref_id, grouped by name
|
|
|
|
for player in all_players:
|
|
if player.bbref_id:
|
|
unique_bbref_players[player.bbref_id].append(player)
|
|
else:
|
|
unique_name_players[player.name].append(player)
|
|
|
|
logger.info(f"Unique players identifiable by bbref_id: {len(unique_bbref_players)}")
|
|
logger.info(f"Unique players identifiable by name only: {len(unique_name_players)}")
|
|
|
|
# Show some examples
|
|
logger.info("\nExample bbref_id matches:")
|
|
count = 0
|
|
for bbref_id, players in unique_bbref_players.items():
|
|
if bbref_id in sbaplayer_by_bbref and count < 5:
|
|
seasons = sorted([p.season for p in players])
|
|
sba = sbaplayer_by_bbref[bbref_id]
|
|
logger.info(f" {bbref_id}: {players[0].name} -> {sba.first_name} {sba.last_name} (seasons {seasons})")
|
|
count += 1
|
|
|
|
logger.info("\nExample name-only players (need new SbaPlayer records):")
|
|
count = 0
|
|
for name, players in unique_name_players.items():
|
|
if count < 5:
|
|
seasons = sorted([p.season for p in players])
|
|
logger.info(f" {name} (seasons {seasons})")
|
|
count += 1
|
|
|
|
# Summary statistics
|
|
total_unique_players = len(unique_bbref_players) + len(unique_name_players)
|
|
matchable_by_bbref = sum(1 for bbref in unique_bbref_players.keys() if bbref in sbaplayer_by_bbref)
|
|
need_new_sbaplayers = len(unique_bbref_players) - matchable_by_bbref + len(unique_name_players)
|
|
|
|
logger.info(f"\n=== SUMMARY ===")
|
|
logger.info(f"Total unique players across all seasons: {total_unique_players}")
|
|
logger.info(f"Can match to existing SbaPlayers by bbref_id: {matchable_by_bbref}")
|
|
logger.info(f"Need new SbaPlayer records: {need_new_sbaplayers}")
|
|
logger.info(f"Total player-season records to update: {len(all_players)}")
|
|
|
|
return {
|
|
'total_player_records': len(all_players),
|
|
'total_unique_players': total_unique_players,
|
|
'matchable_by_bbref': matchable_by_bbref,
|
|
'need_new_sbaplayers': need_new_sbaplayers,
|
|
'unique_bbref_players': dict(unique_bbref_players),
|
|
'unique_name_players': dict(unique_name_players),
|
|
'sbaplayer_lookup': sbaplayer_by_bbref
|
|
}
|
|
|
|
if __name__ == "__main__":
|
|
logger.info("Starting player matching analysis...")
|
|
results = analyze_matching_potential()
|
|
logger.info("Analysis complete!") |