major-domo-database/.claude/sqlite-to-postgres/player-to-sbaplayer-matching/generate_update_sql.py
Cal Corum 7130a1fd43 Postgres Migration
Migration documentation and scripts
2025-08-25 07:18:31 -05:00

203 lines
6.8 KiB
Python

#!/usr/bin/env python3
"""
Generate raw SQL UPDATE statements for bulk player sbaplayer_id assignments
Output can be copied and pasted into Adminer or run via docker exec
"""
import csv
import logging
from collections import defaultdict
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('sql_generation.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger('generate_update_sql')
def load_assignments():
"""Load player assignments from CSV file"""
assignments = []
logger.info("Loading player assignments from player_sbaplayer_assignments.csv...")
with open('player_sbaplayer_assignments.csv', 'r') as f:
reader = csv.DictReader(f)
for row in reader:
assignments.append({
'player_id': int(row['player_id']),
'assigned_sbaplayer_id': int(row['assigned_sbaplayer_id']),
'assignment_source': row['assignment_source']
})
logger.info(f"Loaded {len(assignments)} player assignments")
return assignments
def generate_sql_statements(assignments, max_ids_per_statement=1000):
"""Generate SQL UPDATE statements grouped by sbaplayer_id"""
# Group assignments by sbaplayer_id for efficiency
by_sbaplayer = defaultdict(list)
for assignment in assignments:
sbaplayer_id = assignment['assigned_sbaplayer_id']
by_sbaplayer[sbaplayer_id].append(assignment['player_id'])
logger.info(f"Grouped assignments into {len(by_sbaplayer)} unique sbaplayer_id groups")
sql_statements = []
total_players = 0
for sbaplayer_id, player_ids in by_sbaplayer.items():
# Split large groups into chunks to avoid overly long SQL statements
for i in range(0, len(player_ids), max_ids_per_statement):
chunk = player_ids[i:i + max_ids_per_statement]
sql = f"""UPDATE player
SET sbaplayer_id = {sbaplayer_id}
WHERE id IN ({','.join(map(str, chunk))});"""
sql_statements.append(sql)
total_players += len(chunk)
logger.info(f"Generated {len(sql_statements)} SQL statements covering {total_players} players")
return sql_statements
def write_sql_file(sql_statements, filename='player_sbaplayer_updates.sql'):
"""Write SQL statements to file"""
with open(filename, 'w') as f:
f.write("-- Bulk update player.sbaplayer_id assignments\n")
f.write("-- Generated from player-to-sbaplayer matching project\n")
f.write(f"-- Total statements: {len(sql_statements)}\n")
f.write("-- \n")
f.write("-- IMPORTANT: Run these in a transaction for safety:\n")
f.write("-- BEGIN;\n")
f.write("-- <paste statements here>\n")
f.write("-- COMMIT;\n")
f.write("\n")
f.write("BEGIN;\n\n")
for i, sql in enumerate(sql_statements, 1):
f.write(f"-- Statement {i}\n")
f.write(sql)
f.write("\n\n")
f.write("COMMIT;\n")
logger.info(f"SQL statements written to {filename}")
def generate_verification_sql(assignments, sample_size=100):
"""Generate SQL to verify updates were applied correctly"""
# Take a sample for verification
sample_assignments = assignments[:sample_size]
verification_sql = """-- Verification queries
-- Check sample assignments were applied correctly
"""
for i, assignment in enumerate(sample_assignments[:10], 1):
player_id = assignment['player_id']
expected_sbaplayer_id = assignment['assigned_sbaplayer_id']
verification_sql += f"""-- Check player {player_id}
SELECT id, name, sbaplayer_id
FROM player
WHERE id = {player_id} AND sbaplayer_id = {expected_sbaplayer_id};
"""
verification_sql += f"""
-- Summary check: Count of players with non-null sbaplayer_id
SELECT COUNT(*) as players_with_sbaplayer_id
FROM player
WHERE sbaplayer_id IS NOT NULL;
-- Expected result: should be close to 12,232
-- Check distribution by assignment source type
SELECT
CASE
WHEN sbaplayer_id < 3000 THEN 'existing_sbaplayer'
ELSE 'new_sbaplayer'
END as assignment_type,
COUNT(*) as player_count
FROM player
WHERE sbaplayer_id IS NOT NULL
GROUP BY assignment_type;
"""
with open('verify_updates.sql', 'w') as f:
f.write(verification_sql)
logger.info("Verification SQL written to verify_updates.sql")
def show_summary_stats(assignments):
"""Show summary statistics"""
by_source = defaultdict(int)
by_sbaplayer = defaultdict(int)
for assignment in assignments:
by_source[assignment['assignment_source']] += 1
by_sbaplayer[assignment['assigned_sbaplayer_id']] += 1
logger.info("=== Assignment Summary ===")
logger.info(f"Total players to update: {len(assignments):,}")
logger.info("By assignment source:")
for source, count in sorted(by_source.items()):
logger.info(f" {source}: {count:,} players")
logger.info(f"Unique SbaPlayer IDs: {len(by_sbaplayer):,}")
# Show new vs existing SbaPlayers
new_sbaplayers = sum(1 for sbaplayer_id in by_sbaplayer.keys() if sbaplayer_id >= 3000)
existing_sbaplayers = len(by_sbaplayer) - new_sbaplayers
logger.info(f" Existing SbaPlayers: {existing_sbaplayers}")
logger.info(f" New SbaPlayers: {new_sbaplayers}")
def main():
"""Main execution function"""
logger.info("Starting SQL generation for player sbaplayer_id updates...")
try:
# Load assignments
assignments = load_assignments()
if not assignments:
logger.error("No assignments loaded. Exiting.")
return
# Show summary statistics
show_summary_stats(assignments)
# Generate SQL statements
sql_statements = generate_sql_statements(assignments)
# Write to file
write_sql_file(sql_statements)
# Generate verification SQL
generate_verification_sql(assignments)
logger.info("=== SUCCESS ===")
logger.info("Generated files:")
logger.info(" 📄 player_sbaplayer_updates.sql - Main update statements")
logger.info(" 🔍 verify_updates.sql - Verification queries")
logger.info("")
logger.info("Next steps:")
logger.info("1. Copy player_sbaplayer_updates.sql to your server")
logger.info("2. Run via Adminer or docker exec:")
logger.info(" docker exec -i postgres_container psql -U username -d database < player_sbaplayer_updates.sql")
logger.info("3. Run verify_updates.sql to confirm success")
except Exception as e:
logger.error(f"Error generating SQL: {e}")
raise
if __name__ == "__main__":
main()