#!/usr/bin/env python3 """ Generate complete player assignments for ALL players: 1. Automatic bbref_id matches 2. Manual review decisions 3. Create new SbaPlayers for remaining unmatched players """ import csv import json import logging from dataclasses import dataclass from typing import Dict, List, Set, Optional from collections import defaultdict logging.basicConfig(level=logging.INFO) logger = logging.getLogger('CompleteAssignments') @dataclass class PlayerRecord: id: int name: str season: int bbref_id: Optional[str] = None sbaplayer_id: Optional[int] = None @dataclass class SbaPlayerRecord: id: int first_name: str last_name: str key_bbref: Optional[str] = None key_fangraphs: Optional[int] = None key_mlbam: Optional[int] = None key_retro: Optional[str] = None @dataclass class NewSbaPlayer: first_name: str last_name: str key_bbref: Optional[str] = None temp_id: int = 0 def load_cached_data(): """Load cached player and sbaplayer data""" logger.info("Loading cached data...") # Load SbaPlayers with open('/tmp/sbaplayers.json', 'r') as f: sbaplayer_data = json.load(f) sbaplayers = [] for data in sbaplayer_data: sbaplayers.append(SbaPlayerRecord(**data)) # Load all players all_players = [] for season in range(1, 13): with open(f"/tmp/players_season_{season}.json", 'r') as f: season_data = json.load(f) for data in season_data: all_players.append(PlayerRecord(**data)) logger.info(f"Loaded {len(sbaplayers)} SbaPlayers and {len(all_players)} player records") return all_players, sbaplayers def parse_manual_decisions(): """Parse manual decisions from reviewed CSV files""" logger.info("Parsing manual decisions...") manual_assignments = {} # player_id -> sbaplayer_id # Parse unmatched decisions try: with open('/mnt/NV2/Development/major-domo/database/unmatched_players_for_review_updated.csv', 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: resolution = row.get('resolution', '').strip() if not resolution or resolution == 'SKIP': continue # Get column names dynamically player_id_key = next((k for k in row.keys() if 'player' in k.lower() and 'id' in k.lower()), 'player_id') bbref_id_key = next((k for k in row.keys() if 'bbref' in k.lower() and 'id' in k.lower()), 'bbref_id') suggested_id_key = next((k for k in row.keys() if 'suggested' in k.lower() and 'sbaplayer' in k.lower() and 'id' in k.lower()), 'suggested_sbaplayer_id') player_id = int(row[player_id_key]) player_name = row['name'] bbref_id = row[bbref_id_key] if row[bbref_id_key] else None if resolution == 'ACCEPT': suggested_id = row[suggested_id_key] if suggested_id and not suggested_id.startswith('PARTIAL:'): manual_assignments[player_id] = int(suggested_id) elif resolution.startswith('USE_SBA_'): sbaplayer_id = int(resolution.replace('USE_SBA_', '')) manual_assignments[player_id] = sbaplayer_id # Note: CREATE_NEW cases will be handled by creating new SbaPlayers except FileNotFoundError: logger.warning("Unmatched decisions file not found") # Parse high-risk decisions try: with open('/mnt/NV2/Development/major-domo/database/high_risk_player_matches_updated.csv', 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: resolution = row.get('resolution', '').strip() if not resolution or resolution == 'SKIP': continue # Get column names dynamically player_id_key = next((k for k in row.keys() if 'player' in k.lower() and 'id' in k.lower()), 'player_id') if resolution.startswith('USE_SBA_') and row[player_id_key]: player_id = int(row[player_id_key]) sbaplayer_id = int(resolution.replace('USE_SBA_', '')) manual_assignments[player_id] = sbaplayer_id except FileNotFoundError: logger.warning("High-risk decisions file not found") logger.info(f"Found {len(manual_assignments)} manual player assignments") return manual_assignments def generate_complete_assignments(all_players, sbaplayers, manual_assignments): """Generate assignments for ALL players""" # Create lookup maps sbaplayers_by_bbref = {} for sba in sbaplayers: if sba.key_bbref: sbaplayers_by_bbref[sba.key_bbref] = sba # Track assignments and new SbaPlayers needed all_assignments = [] # (player_id, sbaplayer_id, assignment_type) new_sbaplayers = [] players_needing_new_sba = defaultdict(list) # group by unique identifier temp_id_counter = 90000 logger.info("Processing all player records...") for player in all_players: assignment_type = "" sbaplayer_id = None # 1. Check manual assignments first if player.id in manual_assignments: sbaplayer_id = manual_assignments[player.id] assignment_type = "manual_decision" # 2. Try automatic bbref_id matching elif player.bbref_id and player.bbref_id in sbaplayers_by_bbref: sba = sbaplayers_by_bbref[player.bbref_id] sbaplayer_id = sba.id assignment_type = "automatic_bbref_match" # 3. Player needs new SbaPlayer record else: # Group players who need new SbaPlayer records if player.bbref_id: # Group by bbref_id key = f"bbref:{player.bbref_id}" unique_name = player.name # Use name from any player with this bbref_id unique_bbref = player.bbref_id else: # Group by name key = f"name:{player.name}" unique_name = player.name unique_bbref = None players_needing_new_sba[key].append(player) continue # Will process these after creating new SbaPlayers # Add assignment if sbaplayer_id: all_assignments.append((player.id, sbaplayer_id, assignment_type)) logger.info(f"Direct assignments: {len(all_assignments)}") logger.info(f"Player groups needing new SbaPlayers: {len(players_needing_new_sba)}") # Create new SbaPlayer records for remaining players for key, players in players_needing_new_sba.items(): # Use the first player as representative representative = players[0] # Parse name name_parts = representative.name.strip().split() if len(name_parts) >= 2: first_name = name_parts[0] last_name = ' '.join(name_parts[1:]) else: first_name = representative.name last_name = "" # Create new SbaPlayer new_sba = NewSbaPlayer( first_name=first_name, last_name=last_name, key_bbref=representative.bbref_id, temp_id=temp_id_counter ) new_sbaplayers.append(new_sba) # Assign all players in this group to the new SbaPlayer for player in players: all_assignments.append((player.id, temp_id_counter, "new_sbaplayer")) temp_id_counter += 1 logger.info(f"Total assignments: {len(all_assignments)}") logger.info(f"New SbaPlayers to create: {len(new_sbaplayers)}") return all_assignments, new_sbaplayers def generate_csv_files(all_assignments, new_sbaplayers, all_players, sbaplayers): """Generate the final CSV files""" players_by_id = {p.id: p for p in all_players} sbaplayers_by_id = {sba.id: sba for sba in sbaplayers} # Generate new SbaPlayers CSV logger.info("Generating new SbaPlayers CSV...") with open('/mnt/NV2/Development/major-domo/database/new_sbaplayers_to_insert_complete.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow(['temp_id', 'first_name', 'last_name', 'key_bbref', 'key_fangraphs', 'key_mlbam', 'key_retro']) for sba in new_sbaplayers: writer.writerow([ sba.temp_id, sba.first_name, sba.last_name, sba.key_bbref or '', '', # key_fangraphs '', # key_mlbam '' # key_retro ]) # Generate complete player assignments CSV logger.info("Generating complete player assignments CSV...") with open('/mnt/NV2/Development/major-domo/database/player_sbaplayer_assignments_complete.csv', 'w', newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow([ 'player_id', 'player_name', 'player_season', 'player_bbref_id', 'sbaplayer_id', 'sbaplayer_name', 'sbaplayer_bbref', 'assignment_type' ]) for player_id, sbaplayer_id, assignment_type in all_assignments: player = players_by_id[player_id] # Get SbaPlayer info if sbaplayer_id >= 90000: # New SbaPlayer (temp ID) sbaplayer_name = f"NEW_TEMP_{sbaplayer_id}" sbaplayer_bbref = player.bbref_id or '' else: # Existing SbaPlayer if sbaplayer_id in sbaplayers_by_id: sba = sbaplayers_by_id[sbaplayer_id] sbaplayer_name = f"{sba.first_name} {sba.last_name}" sbaplayer_bbref = sba.key_bbref or '' else: sbaplayer_name = f"UNKNOWN_SBA_{sbaplayer_id}" sbaplayer_bbref = '' writer.writerow([ player.id, player.name, player.season, player.bbref_id or '', sbaplayer_id, sbaplayer_name, sbaplayer_bbref, assignment_type ]) # Generate summary with open('/mnt/NV2/Development/major-domo/database/complete_assignment_summary.txt', 'w') as f: f.write("COMPLETE PLAYER ASSIGNMENT SUMMARY\n") f.write("=" * 50 + "\n\n") # Count by assignment type type_counts = defaultdict(int) for _, _, assignment_type in all_assignments: type_counts[assignment_type] += 1 f.write("ASSIGNMENT BREAKDOWN:\n") for assignment_type, count in type_counts.items(): f.write(f" {assignment_type}: {count:,} players\n") f.write(f"\nTOTAL ASSIGNMENTS: {len(all_assignments):,}\n") f.write(f"NEW SBAPLAYERS TO CREATE: {len(new_sbaplayers):,}\n\n") f.write("FILES GENERATED:\n") f.write("1. new_sbaplayers_to_insert_complete.csv - New SbaPlayer records\n") f.write("2. player_sbaplayer_assignments_complete.csv - ALL player assignments\n") def main(): """Generate complete assignments for all players""" logger.info("Generating complete player assignments...") # Load data all_players, sbaplayers = load_cached_data() # Parse manual decisions manual_assignments = parse_manual_decisions() # Generate complete assignments all_assignments, new_sbaplayers = generate_complete_assignments( all_players, sbaplayers, manual_assignments ) # Generate CSV files generate_csv_files(all_assignments, new_sbaplayers, all_players, sbaplayers) logger.info("\n=== COMPLETE ASSIGNMENT GENERATION COMPLETE ===") logger.info(f"Total player assignments: {len(all_assignments):,}") logger.info(f"New SbaPlayers needed: {len(new_sbaplayers):,}") if __name__ == "__main__": main()