#!/usr/bin/env python3 """ Generate raw SQL UPDATE statements for bulk player sbaplayer_id assignments Output can be copied and pasted into Adminer or run via docker exec """ import csv import logging from collections import defaultdict # Set up logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('sql_generation.log'), logging.StreamHandler() ] ) logger = logging.getLogger('generate_update_sql') def load_assignments(): """Load player assignments from CSV file""" assignments = [] logger.info("Loading player assignments from player_sbaplayer_assignments.csv...") with open('player_sbaplayer_assignments.csv', 'r') as f: reader = csv.DictReader(f) for row in reader: assignments.append({ 'player_id': int(row['player_id']), 'assigned_sbaplayer_id': int(row['assigned_sbaplayer_id']), 'assignment_source': row['assignment_source'] }) logger.info(f"Loaded {len(assignments)} player assignments") return assignments def generate_sql_statements(assignments, max_ids_per_statement=1000): """Generate SQL UPDATE statements grouped by sbaplayer_id""" # Group assignments by sbaplayer_id for efficiency by_sbaplayer = defaultdict(list) for assignment in assignments: sbaplayer_id = assignment['assigned_sbaplayer_id'] by_sbaplayer[sbaplayer_id].append(assignment['player_id']) logger.info(f"Grouped assignments into {len(by_sbaplayer)} unique sbaplayer_id groups") sql_statements = [] total_players = 0 for sbaplayer_id, player_ids in by_sbaplayer.items(): # Split large groups into chunks to avoid overly long SQL statements for i in range(0, len(player_ids), max_ids_per_statement): chunk = player_ids[i:i + max_ids_per_statement] sql = f"""UPDATE player SET sbaplayer_id = {sbaplayer_id} WHERE id IN ({','.join(map(str, chunk))});""" sql_statements.append(sql) total_players += len(chunk) logger.info(f"Generated {len(sql_statements)} SQL statements covering {total_players} players") return sql_statements def write_sql_file(sql_statements, filename='player_sbaplayer_updates.sql'): """Write SQL statements to file""" with open(filename, 'w') as f: f.write("-- Bulk update player.sbaplayer_id assignments\n") f.write("-- Generated from player-to-sbaplayer matching project\n") f.write(f"-- Total statements: {len(sql_statements)}\n") f.write("-- \n") f.write("-- IMPORTANT: Run these in a transaction for safety:\n") f.write("-- BEGIN;\n") f.write("-- \n") f.write("-- COMMIT;\n") f.write("\n") f.write("BEGIN;\n\n") for i, sql in enumerate(sql_statements, 1): f.write(f"-- Statement {i}\n") f.write(sql) f.write("\n\n") f.write("COMMIT;\n") logger.info(f"SQL statements written to {filename}") def generate_verification_sql(assignments, sample_size=100): """Generate SQL to verify updates were applied correctly""" # Take a sample for verification sample_assignments = assignments[:sample_size] verification_sql = """-- Verification queries -- Check sample assignments were applied correctly """ for i, assignment in enumerate(sample_assignments[:10], 1): player_id = assignment['player_id'] expected_sbaplayer_id = assignment['assigned_sbaplayer_id'] verification_sql += f"""-- Check player {player_id} SELECT id, name, sbaplayer_id FROM player WHERE id = {player_id} AND sbaplayer_id = {expected_sbaplayer_id}; """ verification_sql += f""" -- Summary check: Count of players with non-null sbaplayer_id SELECT COUNT(*) as players_with_sbaplayer_id FROM player WHERE sbaplayer_id IS NOT NULL; -- Expected result: should be close to 12,232 -- Check distribution by assignment source type SELECT CASE WHEN sbaplayer_id < 3000 THEN 'existing_sbaplayer' ELSE 'new_sbaplayer' END as assignment_type, COUNT(*) as player_count FROM player WHERE sbaplayer_id IS NOT NULL GROUP BY assignment_type; """ with open('verify_updates.sql', 'w') as f: f.write(verification_sql) logger.info("Verification SQL written to verify_updates.sql") def show_summary_stats(assignments): """Show summary statistics""" by_source = defaultdict(int) by_sbaplayer = defaultdict(int) for assignment in assignments: by_source[assignment['assignment_source']] += 1 by_sbaplayer[assignment['assigned_sbaplayer_id']] += 1 logger.info("=== Assignment Summary ===") logger.info(f"Total players to update: {len(assignments):,}") logger.info("By assignment source:") for source, count in sorted(by_source.items()): logger.info(f" {source}: {count:,} players") logger.info(f"Unique SbaPlayer IDs: {len(by_sbaplayer):,}") # Show new vs existing SbaPlayers new_sbaplayers = sum(1 for sbaplayer_id in by_sbaplayer.keys() if sbaplayer_id >= 3000) existing_sbaplayers = len(by_sbaplayer) - new_sbaplayers logger.info(f" Existing SbaPlayers: {existing_sbaplayers}") logger.info(f" New SbaPlayers: {new_sbaplayers}") def main(): """Main execution function""" logger.info("Starting SQL generation for player sbaplayer_id updates...") try: # Load assignments assignments = load_assignments() if not assignments: logger.error("No assignments loaded. Exiting.") return # Show summary statistics show_summary_stats(assignments) # Generate SQL statements sql_statements = generate_sql_statements(assignments) # Write to file write_sql_file(sql_statements) # Generate verification SQL generate_verification_sql(assignments) logger.info("=== SUCCESS ===") logger.info("Generated files:") logger.info(" 📄 player_sbaplayer_updates.sql - Main update statements") logger.info(" 🔍 verify_updates.sql - Verification queries") logger.info("") logger.info("Next steps:") logger.info("1. Copy player_sbaplayer_updates.sql to your server") logger.info("2. Run via Adminer or docker exec:") logger.info(" docker exec -i postgres_container psql -U username -d database < player_sbaplayer_updates.sql") logger.info("3. Run verify_updates.sql to confirm success") except Exception as e: logger.error(f"Error generating SQL: {e}") raise if __name__ == "__main__": main()