major-domo-database/migrate_to_postgres.py
2025-08-20 09:52:46 -05:00

344 lines
13 KiB
Python

#!/usr/bin/env python3
import os
import logging
from datetime import datetime
from playhouse.shortcuts import model_to_dict
from peewee import SqliteDatabase, PostgresqlDatabase
logger = logging.getLogger(f'{__name__}.migrate_to_postgres')
def setup_databases():
"""Setup both SQLite source and PostgreSQL target databases"""
# SQLite source database
sqlite_db = SqliteDatabase(
'storage/sba_master.db',
pragmas={
'journal_mode': 'wal',
'cache_size': -1 * 64000,
'synchronous': 0
}
)
# PostgreSQL target database
postgres_db = PostgresqlDatabase(
os.environ.get('POSTGRES_DB', 'sba_master'),
user=os.environ.get('POSTGRES_USER', 'sba_admin'),
password=os.environ.get('POSTGRES_PASSWORD', 'sba_dev_password_2024'),
host=os.environ.get('POSTGRES_HOST', 'localhost'),
port=int(os.environ.get('POSTGRES_PORT', '5432'))
)
return sqlite_db, postgres_db
def get_all_models():
"""Get all models in dependency order for migration"""
# Set temporary environment to load models
os.environ['DATABASE_TYPE'] = 'sqlite'
from app.db_engine import (
Current, Manager, Division, SbaPlayer, # No dependencies
Team, # Depends on Manager, Division
Player, # Depends on Team, SbaPlayer
Result, Schedule, Transaction, # Depend on Team, Player
BattingStat, PitchingStat, # Depend on Player, Team
Standings, # Depends on Team
BattingCareer, PitchingCareer, FieldingCareer, # No dependencies
BattingSeason, PitchingSeason, FieldingSeason, # Depend on Player, Career tables
DraftPick, DraftData, DraftList, # Depend on Team, Player
Award, # Depends on Manager, Player, Team
DiceRoll, # Depends on Team
Keeper, Injury, # Depend on Team, Player
StratGame, # Depends on Team, Manager
StratPlay, Decision, # Depend on StratGame, Player, Team
CustomCommandCreator, CustomCommand # CustomCommand depends on Creator
)
# Return in dependency order
return [
# Base tables (no dependencies)
Current, Manager, Division, SbaPlayer,
BattingCareer, PitchingCareer, FieldingCareer,
CustomCommandCreator,
# First level dependencies
Team, DraftData,
# Second level dependencies
Player, CustomCommand,
# Third level dependencies
Result, Schedule, Transaction, BattingStat, PitchingStat,
Standings, DraftPick, DraftList, Award,
Keeper, Injury, StratGame,
# Fourth level dependencies
BattingSeason, PitchingSeason, FieldingSeason,
StratPlay, Decision
]
def get_fa_team_id_for_season(season, postgres_db):
"""Get the Free Agents team ID for a given season"""
from app.db_engine import Team
original_db = Team._meta.database
Team._meta.database = postgres_db
try:
fa_team = Team.select().where(
(Team.abbrev == 'FA') & (Team.season == season)
).first()
if fa_team:
return fa_team.id
else:
# Fallback: find any FA team if season-specific one doesn't exist
fallback_fa = Team.select().where(Team.abbrev == 'FA').first()
if fallback_fa:
logger.warning(f" Using fallback FA team ID {fallback_fa.id} for season {season}")
return fallback_fa.id
else:
logger.error(f" No FA team found for season {season}")
return None
except Exception as e:
logger.error(f" Error finding FA team for season {season}: {e}")
return None
finally:
Team._meta.database = original_db
def fix_decision_foreign_keys(record_data, season, postgres_db):
"""Fix missing foreign keys in Decision records by using FA team ID"""
from app.db_engine import Team, Player, StratGame
fixed = False
# Fix missing team_id by using FA team for the season
if 'team_id' in record_data and record_data['team_id'] is not None:
original_db = Team._meta.database
Team._meta.database = postgres_db
try:
# Check if team exists
team_exists = Team.select().where(Team.id == record_data['team_id']).exists()
if not team_exists:
fa_team_id = get_fa_team_id_for_season(season, postgres_db)
if fa_team_id:
logger.warning(f" Replacing missing team_id {record_data['team_id']} with FA team {fa_team_id} for season {season}")
record_data['team_id'] = fa_team_id
fixed = True
else:
# Set to None if no FA team found (nullable field)
record_data['team_id'] = None
fixed = True
except Exception as e:
logger.error(f" Error checking team existence: {e}")
finally:
Team._meta.database = original_db
return fixed
def migrate_table_data(model_class, sqlite_db, postgres_db, batch_size=1000):
"""Migrate data from SQLite to PostgreSQL for a specific model"""
table_name = model_class._meta.table_name
logger.info(f"Migrating table: {table_name}")
try:
# Connect to SQLite and count records
model_class._meta.database = sqlite_db
sqlite_db.connect()
# Check if table exists first
try:
total_records = model_class.select().count()
except Exception as e:
if "no such table" in str(e).lower():
logger.warning(f" Table {table_name} doesn't exist in SQLite source, skipping")
sqlite_db.close()
return True
else:
raise # Re-raise if it's a different error
if total_records == 0:
logger.info(f" No records in {table_name}, skipping")
sqlite_db.close()
return True
logger.info(f" Found {total_records} records")
sqlite_db.close()
# Connect to PostgreSQL and prepare
model_class._meta.database = postgres_db
postgres_db.connect()
# Create table if it doesn't exist
model_class.create_table(safe=True)
# Migrate data in batches
migrated = 0
sqlite_db.connect()
for batch_start in range(0, total_records, batch_size):
# Get batch from SQLite
model_class._meta.database = sqlite_db
batch = list(model_class.select().offset(batch_start).limit(batch_size))
if not batch:
break
# Convert to dicts and prepare for PostgreSQL
batch_data = []
for record in batch:
data = model_to_dict(record, recurse=False)
# CRITICAL: Preserve original IDs to maintain foreign key relationships
# DO NOT remove IDs - they must be preserved from SQLite source
batch_data.append(data)
# Insert into PostgreSQL with foreign key error handling
model_class._meta.database = postgres_db
if batch_data:
try:
# Try bulk insert first (fast)
model_class.insert_many(batch_data).execute()
migrated += len(batch_data)
except Exception as batch_error:
error_msg = str(batch_error).lower()
if 'foreign key constraint' in error_msg or 'violates foreign key' in error_msg:
# Batch failed due to foreign key - try individual inserts
successful_inserts = 0
for record_data in batch_data:
try:
model_class.insert(record_data).execute()
successful_inserts += 1
except Exception as insert_error:
individual_error_msg = str(insert_error).lower()
if 'foreign key constraint' in individual_error_msg or 'violates foreign key' in individual_error_msg:
# Special handling for Decision table - fix foreign keys using FA team
if table_name == 'decision':
season = record_data.get('season', 0)
if fix_decision_foreign_keys(record_data, season, postgres_db):
# Retry the insert after fixing foreign keys
try:
model_class.insert(record_data).execute()
successful_inserts += 1
continue
except Exception as retry_error:
logger.error(f" Failed to insert decision record even after fixing foreign keys: {retry_error}")
# For other tables or if foreign key fix failed, skip the record
continue
else:
# Re-raise other types of errors
raise insert_error
migrated += successful_inserts
if successful_inserts < len(batch_data):
skipped = len(batch_data) - successful_inserts
logger.warning(f" Skipped {skipped} records with foreign key violations")
else:
# Re-raise other types of batch errors
raise batch_error
logger.info(f" Migrated {migrated}/{total_records} records")
# Reset PostgreSQL sequence to prevent ID conflicts on future inserts
if migrated > 0 and hasattr(model_class, 'id'):
try:
sequence_name = f"{table_name}_id_seq"
reset_query = f"SELECT setval('{sequence_name}', (SELECT MAX(id) FROM {table_name}));"
postgres_db.execute_sql(reset_query)
logger.info(f" Reset sequence {sequence_name} to max ID")
except Exception as seq_error:
logger.warning(f" Could not reset sequence for {table_name}: {seq_error}")
sqlite_db.close()
postgres_db.close()
logger.info(f"✓ Successfully migrated {table_name}: {migrated} records")
return True
except Exception as e:
logger.error(f"✗ Failed to migrate {table_name}: {e}")
try:
sqlite_db.close()
except:
pass
try:
postgres_db.close()
except:
pass
return False
def migrate_all_data():
"""Migrate all data from SQLite to PostgreSQL"""
logger.info("Starting full data migration from SQLite to PostgreSQL...")
# Setup databases
sqlite_db, postgres_db = setup_databases()
# Test connections
try:
sqlite_db.connect()
sqlite_db.execute_sql("SELECT 1").fetchone()
sqlite_db.close()
logger.info("✓ SQLite source database connection OK")
except Exception as e:
logger.error(f"✗ SQLite connection failed: {e}")
return False
try:
postgres_db.connect()
postgres_db.execute_sql("SELECT 1").fetchone()
postgres_db.close()
logger.info("✓ PostgreSQL target database connection OK")
except Exception as e:
logger.error(f"✗ PostgreSQL connection failed: {e}")
return False
# Get models in dependency order
all_models = get_all_models()
logger.info(f"Found {len(all_models)} models to migrate")
# Migrate each table
successful_migrations = 0
failed_migrations = []
for model in all_models:
success = migrate_table_data(model, sqlite_db, postgres_db)
if success:
successful_migrations += 1
else:
failed_migrations.append(model._meta.table_name)
# Report results
logger.info(f"\nMigration completed:")
logger.info(f"✓ Successful: {successful_migrations}/{len(all_models)} tables")
if failed_migrations:
logger.error(f"✗ Failed: {len(failed_migrations)} tables")
for table in failed_migrations:
logger.error(f" - {table}")
return False
else:
logger.info("🎉 All tables migrated successfully!")
return True
def main():
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# Set PostgreSQL environment variables
os.environ['POSTGRES_DB'] = 'sba_master'
os.environ['POSTGRES_USER'] = 'sba_admin'
os.environ['POSTGRES_PASSWORD'] = 'sba_dev_password_2024'
os.environ['POSTGRES_HOST'] = 'localhost'
os.environ['POSTGRES_PORT'] = '5432'
success = migrate_all_data()
return 0 if success else 1
if __name__ == "__main__":
exit(main())