325 lines
12 KiB
Python
325 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generate complete player assignments for ALL players:
|
|
1. Automatic bbref_id matches
|
|
2. Manual review decisions
|
|
3. Create new SbaPlayers for remaining unmatched players
|
|
"""
|
|
import csv
|
|
import json
|
|
import logging
|
|
from dataclasses import dataclass
|
|
from typing import Dict, List, Set, Optional
|
|
from collections import defaultdict
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger('CompleteAssignments')
|
|
|
|
@dataclass
|
|
class PlayerRecord:
|
|
id: int
|
|
name: str
|
|
season: int
|
|
bbref_id: Optional[str] = None
|
|
sbaplayer_id: Optional[int] = None
|
|
|
|
@dataclass
|
|
class SbaPlayerRecord:
|
|
id: int
|
|
first_name: str
|
|
last_name: str
|
|
key_bbref: Optional[str] = None
|
|
key_fangraphs: Optional[int] = None
|
|
key_mlbam: Optional[int] = None
|
|
key_retro: Optional[str] = None
|
|
|
|
@dataclass
|
|
class NewSbaPlayer:
|
|
first_name: str
|
|
last_name: str
|
|
key_bbref: Optional[str] = None
|
|
temp_id: int = 0
|
|
|
|
def load_cached_data():
|
|
"""Load cached player and sbaplayer data"""
|
|
logger.info("Loading cached data...")
|
|
|
|
# Load SbaPlayers
|
|
with open('/tmp/sbaplayers.json', 'r') as f:
|
|
sbaplayer_data = json.load(f)
|
|
|
|
sbaplayers = []
|
|
for data in sbaplayer_data:
|
|
sbaplayers.append(SbaPlayerRecord(**data))
|
|
|
|
# Load all players
|
|
all_players = []
|
|
for season in range(1, 13):
|
|
with open(f"/tmp/players_season_{season}.json", 'r') as f:
|
|
season_data = json.load(f)
|
|
|
|
for data in season_data:
|
|
all_players.append(PlayerRecord(**data))
|
|
|
|
logger.info(f"Loaded {len(sbaplayers)} SbaPlayers and {len(all_players)} player records")
|
|
return all_players, sbaplayers
|
|
|
|
def parse_manual_decisions():
|
|
"""Parse manual decisions from reviewed CSV files"""
|
|
logger.info("Parsing manual decisions...")
|
|
|
|
manual_assignments = {} # player_id -> sbaplayer_id
|
|
|
|
# Parse unmatched decisions
|
|
try:
|
|
with open('/mnt/NV2/Development/major-domo/database/unmatched_players_for_review_updated.csv', 'r', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f)
|
|
|
|
for row in reader:
|
|
resolution = row.get('resolution', '').strip()
|
|
if not resolution or resolution == 'SKIP':
|
|
continue
|
|
|
|
# Get column names dynamically
|
|
player_id_key = next((k for k in row.keys() if 'player' in k.lower() and 'id' in k.lower()), 'player_id')
|
|
bbref_id_key = next((k for k in row.keys() if 'bbref' in k.lower() and 'id' in k.lower()), 'bbref_id')
|
|
suggested_id_key = next((k for k in row.keys() if 'suggested' in k.lower() and 'sbaplayer' in k.lower() and 'id' in k.lower()), 'suggested_sbaplayer_id')
|
|
|
|
player_id = int(row[player_id_key])
|
|
player_name = row['name']
|
|
bbref_id = row[bbref_id_key] if row[bbref_id_key] else None
|
|
|
|
if resolution == 'ACCEPT':
|
|
suggested_id = row[suggested_id_key]
|
|
if suggested_id and not suggested_id.startswith('PARTIAL:'):
|
|
manual_assignments[player_id] = int(suggested_id)
|
|
|
|
elif resolution.startswith('USE_SBA_'):
|
|
sbaplayer_id = int(resolution.replace('USE_SBA_', ''))
|
|
manual_assignments[player_id] = sbaplayer_id
|
|
|
|
# Note: CREATE_NEW cases will be handled by creating new SbaPlayers
|
|
|
|
except FileNotFoundError:
|
|
logger.warning("Unmatched decisions file not found")
|
|
|
|
# Parse high-risk decisions
|
|
try:
|
|
with open('/mnt/NV2/Development/major-domo/database/high_risk_player_matches_updated.csv', 'r', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f)
|
|
|
|
for row in reader:
|
|
resolution = row.get('resolution', '').strip()
|
|
if not resolution or resolution == 'SKIP':
|
|
continue
|
|
|
|
# Get column names dynamically
|
|
player_id_key = next((k for k in row.keys() if 'player' in k.lower() and 'id' in k.lower()), 'player_id')
|
|
|
|
if resolution.startswith('USE_SBA_') and row[player_id_key]:
|
|
player_id = int(row[player_id_key])
|
|
sbaplayer_id = int(resolution.replace('USE_SBA_', ''))
|
|
manual_assignments[player_id] = sbaplayer_id
|
|
|
|
except FileNotFoundError:
|
|
logger.warning("High-risk decisions file not found")
|
|
|
|
logger.info(f"Found {len(manual_assignments)} manual player assignments")
|
|
return manual_assignments
|
|
|
|
def generate_complete_assignments(all_players, sbaplayers, manual_assignments):
|
|
"""Generate assignments for ALL players"""
|
|
|
|
# Create lookup maps
|
|
sbaplayers_by_bbref = {}
|
|
for sba in sbaplayers:
|
|
if sba.key_bbref:
|
|
sbaplayers_by_bbref[sba.key_bbref] = sba
|
|
|
|
# Track assignments and new SbaPlayers needed
|
|
all_assignments = [] # (player_id, sbaplayer_id, assignment_type)
|
|
new_sbaplayers = []
|
|
players_needing_new_sba = defaultdict(list) # group by unique identifier
|
|
temp_id_counter = 90000
|
|
|
|
logger.info("Processing all player records...")
|
|
|
|
for player in all_players:
|
|
assignment_type = ""
|
|
sbaplayer_id = None
|
|
|
|
# 1. Check manual assignments first
|
|
if player.id in manual_assignments:
|
|
sbaplayer_id = manual_assignments[player.id]
|
|
assignment_type = "manual_decision"
|
|
|
|
# 2. Try automatic bbref_id matching
|
|
elif player.bbref_id and player.bbref_id in sbaplayers_by_bbref:
|
|
sba = sbaplayers_by_bbref[player.bbref_id]
|
|
sbaplayer_id = sba.id
|
|
assignment_type = "automatic_bbref_match"
|
|
|
|
# 3. Player needs new SbaPlayer record
|
|
else:
|
|
# Group players who need new SbaPlayer records
|
|
if player.bbref_id:
|
|
# Group by bbref_id
|
|
key = f"bbref:{player.bbref_id}"
|
|
unique_name = player.name # Use name from any player with this bbref_id
|
|
unique_bbref = player.bbref_id
|
|
else:
|
|
# Group by name
|
|
key = f"name:{player.name}"
|
|
unique_name = player.name
|
|
unique_bbref = None
|
|
|
|
players_needing_new_sba[key].append(player)
|
|
continue # Will process these after creating new SbaPlayers
|
|
|
|
# Add assignment
|
|
if sbaplayer_id:
|
|
all_assignments.append((player.id, sbaplayer_id, assignment_type))
|
|
|
|
logger.info(f"Direct assignments: {len(all_assignments)}")
|
|
logger.info(f"Player groups needing new SbaPlayers: {len(players_needing_new_sba)}")
|
|
|
|
# Create new SbaPlayer records for remaining players
|
|
for key, players in players_needing_new_sba.items():
|
|
# Use the first player as representative
|
|
representative = players[0]
|
|
|
|
# Parse name
|
|
name_parts = representative.name.strip().split()
|
|
if len(name_parts) >= 2:
|
|
first_name = name_parts[0]
|
|
last_name = ' '.join(name_parts[1:])
|
|
else:
|
|
first_name = representative.name
|
|
last_name = ""
|
|
|
|
# Create new SbaPlayer
|
|
new_sba = NewSbaPlayer(
|
|
first_name=first_name,
|
|
last_name=last_name,
|
|
key_bbref=representative.bbref_id,
|
|
temp_id=temp_id_counter
|
|
)
|
|
new_sbaplayers.append(new_sba)
|
|
|
|
# Assign all players in this group to the new SbaPlayer
|
|
for player in players:
|
|
all_assignments.append((player.id, temp_id_counter, "new_sbaplayer"))
|
|
|
|
temp_id_counter += 1
|
|
|
|
logger.info(f"Total assignments: {len(all_assignments)}")
|
|
logger.info(f"New SbaPlayers to create: {len(new_sbaplayers)}")
|
|
|
|
return all_assignments, new_sbaplayers
|
|
|
|
def generate_csv_files(all_assignments, new_sbaplayers, all_players, sbaplayers):
|
|
"""Generate the final CSV files"""
|
|
|
|
players_by_id = {p.id: p for p in all_players}
|
|
sbaplayers_by_id = {sba.id: sba for sba in sbaplayers}
|
|
|
|
# Generate new SbaPlayers CSV
|
|
logger.info("Generating new SbaPlayers CSV...")
|
|
with open('/mnt/NV2/Development/major-domo/database/new_sbaplayers_to_insert_complete.csv', 'w', newline='') as csvfile:
|
|
writer = csv.writer(csvfile)
|
|
writer.writerow(['temp_id', 'first_name', 'last_name', 'key_bbref', 'key_fangraphs', 'key_mlbam', 'key_retro'])
|
|
|
|
for sba in new_sbaplayers:
|
|
writer.writerow([
|
|
sba.temp_id,
|
|
sba.first_name,
|
|
sba.last_name,
|
|
sba.key_bbref or '',
|
|
'', # key_fangraphs
|
|
'', # key_mlbam
|
|
'' # key_retro
|
|
])
|
|
|
|
# Generate complete player assignments CSV
|
|
logger.info("Generating complete player assignments CSV...")
|
|
with open('/mnt/NV2/Development/major-domo/database/player_sbaplayer_assignments_complete.csv', 'w', newline='') as csvfile:
|
|
writer = csv.writer(csvfile)
|
|
writer.writerow([
|
|
'player_id', 'player_name', 'player_season', 'player_bbref_id',
|
|
'sbaplayer_id', 'sbaplayer_name', 'sbaplayer_bbref', 'assignment_type'
|
|
])
|
|
|
|
for player_id, sbaplayer_id, assignment_type in all_assignments:
|
|
player = players_by_id[player_id]
|
|
|
|
# Get SbaPlayer info
|
|
if sbaplayer_id >= 90000:
|
|
# New SbaPlayer (temp ID)
|
|
sbaplayer_name = f"NEW_TEMP_{sbaplayer_id}"
|
|
sbaplayer_bbref = player.bbref_id or ''
|
|
else:
|
|
# Existing SbaPlayer
|
|
if sbaplayer_id in sbaplayers_by_id:
|
|
sba = sbaplayers_by_id[sbaplayer_id]
|
|
sbaplayer_name = f"{sba.first_name} {sba.last_name}"
|
|
sbaplayer_bbref = sba.key_bbref or ''
|
|
else:
|
|
sbaplayer_name = f"UNKNOWN_SBA_{sbaplayer_id}"
|
|
sbaplayer_bbref = ''
|
|
|
|
writer.writerow([
|
|
player.id,
|
|
player.name,
|
|
player.season,
|
|
player.bbref_id or '',
|
|
sbaplayer_id,
|
|
sbaplayer_name,
|
|
sbaplayer_bbref,
|
|
assignment_type
|
|
])
|
|
|
|
# Generate summary
|
|
with open('/mnt/NV2/Development/major-domo/database/complete_assignment_summary.txt', 'w') as f:
|
|
f.write("COMPLETE PLAYER ASSIGNMENT SUMMARY\n")
|
|
f.write("=" * 50 + "\n\n")
|
|
|
|
# Count by assignment type
|
|
type_counts = defaultdict(int)
|
|
for _, _, assignment_type in all_assignments:
|
|
type_counts[assignment_type] += 1
|
|
|
|
f.write("ASSIGNMENT BREAKDOWN:\n")
|
|
for assignment_type, count in type_counts.items():
|
|
f.write(f" {assignment_type}: {count:,} players\n")
|
|
|
|
f.write(f"\nTOTAL ASSIGNMENTS: {len(all_assignments):,}\n")
|
|
f.write(f"NEW SBAPLAYERS TO CREATE: {len(new_sbaplayers):,}\n\n")
|
|
|
|
f.write("FILES GENERATED:\n")
|
|
f.write("1. new_sbaplayers_to_insert_complete.csv - New SbaPlayer records\n")
|
|
f.write("2. player_sbaplayer_assignments_complete.csv - ALL player assignments\n")
|
|
|
|
def main():
|
|
"""Generate complete assignments for all players"""
|
|
logger.info("Generating complete player assignments...")
|
|
|
|
# Load data
|
|
all_players, sbaplayers = load_cached_data()
|
|
|
|
# Parse manual decisions
|
|
manual_assignments = parse_manual_decisions()
|
|
|
|
# Generate complete assignments
|
|
all_assignments, new_sbaplayers = generate_complete_assignments(
|
|
all_players, sbaplayers, manual_assignments
|
|
)
|
|
|
|
# Generate CSV files
|
|
generate_csv_files(all_assignments, new_sbaplayers, all_players, sbaplayers)
|
|
|
|
logger.info("\n=== COMPLETE ASSIGNMENT GENERATION COMPLETE ===")
|
|
logger.info(f"Total player assignments: {len(all_assignments):,}")
|
|
logger.info(f"New SbaPlayers needed: {len(new_sbaplayers):,}")
|
|
|
|
if __name__ == "__main__":
|
|
main() |