- Add db_helpers.py with cross-database upsert functions for SQLite/PostgreSQL - Replace 12 on_conflict_replace() calls with PostgreSQL-compatible upserts - Add unique indexes: StratPlay(game, play_num), Decision(game, pitcher) - Add max_length to Team model fields (abbrev, sname, lname) - Fix boolean comparison in teams.py (== 0/1 to == False/True) - Create migrate_to_postgres.py with ID-preserving migration logic - Create audit_sqlite.py for pre-migration data integrity checks - Add PROJECT_PLAN.json for migration tracking - Add .secrets/ to .gitignore for credentials Audit results: 658,963 records across 29 tables, 2,390 orphaned stats (expected) Based on Major Domo migration lessons learned (33 issues resolved there)
511 lines
17 KiB
Python
Executable File
511 lines
17 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Paper Dynasty SQLite to PostgreSQL Migration Script
|
|
|
|
CRITICAL: This script preserves primary key IDs exactly as they exist in SQLite.
|
|
Failing to preserve IDs will cause all foreign key references to break.
|
|
|
|
Usage:
|
|
# Dry run (validate only, no changes)
|
|
python scripts/migrate_to_postgres.py --dry-run
|
|
|
|
# Full migration
|
|
python scripts/migrate_to_postgres.py
|
|
|
|
# Migrate specific table only
|
|
python scripts/migrate_to_postgres.py --table player
|
|
|
|
Environment Variables Required:
|
|
POSTGRES_HOST, POSTGRES_DB, POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_PORT
|
|
|
|
Based on lessons learned from Major Domo PostgreSQL migration (August 2025).
|
|
"""
|
|
|
|
import argparse
|
|
import logging
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
from datetime import datetime
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
import psycopg2
|
|
from psycopg2.extras import execute_values
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
|
handlers=[
|
|
logging.StreamHandler(sys.stdout),
|
|
logging.FileHandler(
|
|
f"logs/migration_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
|
|
),
|
|
],
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Migration order - tables with no FK dependencies first, then dependent tables
|
|
# This ensures parent records exist before children are inserted
|
|
MIGRATION_ORDER = [
|
|
# Tier 1: No foreign key dependencies
|
|
"current",
|
|
"rarity",
|
|
"event",
|
|
"packtype",
|
|
"notification",
|
|
# Tier 2: Simple FK dependencies (single level)
|
|
"cardset", # -> event
|
|
"mlbplayer", # no FKs
|
|
"gamerewards", # -> packtype, player (but player not created yet, so nullable)
|
|
# Tier 3: Core entity tables
|
|
"team", # -> event
|
|
"player", # -> cardset, rarity, mlbplayer
|
|
# Tier 4: Dependent on core entities
|
|
"pack", # -> team, packtype, cardset
|
|
"card", # -> player, team, pack
|
|
"roster", # -> team, card (x26)
|
|
"result", # -> team (x2)
|
|
"stratgame", # -> team (x2)
|
|
# Tier 5: Statistics and game data
|
|
"battingstat", # -> card, team, result
|
|
"pitchingstat", # -> card, team, result
|
|
"stratplay", # -> stratgame, player (many), team (many)
|
|
"decision", # -> stratgame, player, team
|
|
# Tier 6: Card detail tables
|
|
"battingcard", # -> player
|
|
"battingcardratings", # -> battingcard
|
|
"pitchingcard", # -> player
|
|
"pitchingcardratings", # -> pitchingcard
|
|
"cardposition", # -> player
|
|
# Tier 7: Other dependent tables
|
|
"award", # -> card, team
|
|
"paperdex", # -> team, player
|
|
"reward", # -> team
|
|
"gauntletreward", # -> event, gamerewards
|
|
"gauntletrun", # -> team, event
|
|
]
|
|
|
|
# Tables with explicit primary keys (not auto-increment)
|
|
EXPLICIT_PK_TABLES = {
|
|
"player": "player_id", # Uses player_id as explicit PK
|
|
}
|
|
|
|
# All other tables use 'id' as auto-increment PK
|
|
|
|
|
|
def get_sqlite_connection(db_path: str) -> sqlite3.Connection:
|
|
"""Connect to SQLite database."""
|
|
if not os.path.exists(db_path):
|
|
raise FileNotFoundError(f"SQLite database not found: {db_path}")
|
|
|
|
conn = sqlite3.connect(db_path)
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
|
|
def get_postgres_connection() -> psycopg2.extensions.connection:
|
|
"""Connect to PostgreSQL database using environment variables."""
|
|
required_vars = [
|
|
"POSTGRES_HOST",
|
|
"POSTGRES_DB",
|
|
"POSTGRES_USER",
|
|
"POSTGRES_PASSWORD",
|
|
]
|
|
missing = [v for v in required_vars if not os.environ.get(v)]
|
|
if missing:
|
|
raise EnvironmentError(f"Missing required environment variables: {missing}")
|
|
|
|
return psycopg2.connect(
|
|
host=os.environ["POSTGRES_HOST"],
|
|
database=os.environ["POSTGRES_DB"],
|
|
user=os.environ["POSTGRES_USER"],
|
|
password=os.environ["POSTGRES_PASSWORD"],
|
|
port=int(os.environ.get("POSTGRES_PORT", "5432")),
|
|
)
|
|
|
|
|
|
def get_table_columns(sqlite_conn: sqlite3.Connection, table_name: str) -> List[str]:
|
|
"""Get column names for a table from SQLite."""
|
|
cursor = sqlite_conn.execute(f"PRAGMA table_info({table_name})")
|
|
return [row["name"] for row in cursor.fetchall()]
|
|
|
|
|
|
def get_primary_key_column(table_name: str) -> str:
|
|
"""Get the primary key column name for a table."""
|
|
return EXPLICIT_PK_TABLES.get(table_name, "id")
|
|
|
|
|
|
def get_sequence_name(table_name: str, pk_column: str) -> str:
|
|
"""Get the PostgreSQL sequence name for a table's primary key."""
|
|
return f"{table_name}_{pk_column}_seq"
|
|
|
|
|
|
def get_record_count(conn, table_name: str, is_sqlite: bool = True) -> int:
|
|
"""Get record count for a table."""
|
|
if is_sqlite:
|
|
cursor = conn.execute(f"SELECT COUNT(*) FROM {table_name}")
|
|
return cursor.fetchone()[0]
|
|
else:
|
|
cursor = conn.cursor()
|
|
cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
|
|
return cursor.fetchone()[0]
|
|
|
|
|
|
def migrate_table(
|
|
sqlite_conn: sqlite3.Connection,
|
|
pg_conn: psycopg2.extensions.connection,
|
|
table_name: str,
|
|
batch_size: int = 500,
|
|
dry_run: bool = False,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Migrate a single table from SQLite to PostgreSQL.
|
|
|
|
CRITICAL: Preserves primary key IDs exactly.
|
|
|
|
Returns:
|
|
Dict with migration statistics
|
|
"""
|
|
stats = {
|
|
"table": table_name,
|
|
"sqlite_count": 0,
|
|
"postgres_count": 0,
|
|
"inserted": 0,
|
|
"skipped": 0,
|
|
"errors": [],
|
|
"success": False,
|
|
}
|
|
|
|
try:
|
|
# Get column info
|
|
columns = get_table_columns(sqlite_conn, table_name)
|
|
pk_column = get_primary_key_column(table_name)
|
|
|
|
# Count source records
|
|
stats["sqlite_count"] = get_record_count(
|
|
sqlite_conn, table_name, is_sqlite=True
|
|
)
|
|
logger.info(f"Table {table_name}: {stats['sqlite_count']} records to migrate")
|
|
|
|
if stats["sqlite_count"] == 0:
|
|
logger.info(f"Table {table_name}: No records to migrate")
|
|
stats["success"] = True
|
|
return stats
|
|
|
|
if dry_run:
|
|
logger.info(
|
|
f"[DRY RUN] Would migrate {stats['sqlite_count']} records from {table_name}"
|
|
)
|
|
stats["success"] = True
|
|
return stats
|
|
|
|
# Read all records from SQLite
|
|
cursor = sqlite_conn.execute(f"SELECT * FROM {table_name}")
|
|
rows = cursor.fetchall()
|
|
|
|
# Prepare PostgreSQL insert
|
|
pg_cursor = pg_conn.cursor()
|
|
|
|
# Build column list string
|
|
columns_str = ", ".join(columns)
|
|
placeholders = ", ".join(["%s"] * len(columns))
|
|
|
|
# Process in batches
|
|
for i in range(0, len(rows), batch_size):
|
|
batch = rows[i : i + batch_size]
|
|
batch_values = []
|
|
|
|
for row in batch:
|
|
# Convert sqlite3.Row to tuple, preserving all values including ID
|
|
values = tuple(row[col] for col in columns)
|
|
batch_values.append(values)
|
|
|
|
try:
|
|
# Use execute_values for efficient batch insert
|
|
insert_sql = f"INSERT INTO {table_name} ({columns_str}) VALUES %s"
|
|
execute_values(pg_cursor, insert_sql, batch_values)
|
|
stats["inserted"] += len(batch)
|
|
|
|
except psycopg2.errors.ForeignKeyViolation as e:
|
|
# Foreign key error - fall back to individual inserts
|
|
logger.warning(
|
|
f"FK violation in batch, falling back to individual inserts: {e}"
|
|
)
|
|
pg_conn.rollback()
|
|
|
|
for values in batch_values:
|
|
try:
|
|
pg_cursor.execute(
|
|
f"INSERT INTO {table_name} ({columns_str}) VALUES ({placeholders})",
|
|
values,
|
|
)
|
|
stats["inserted"] += 1
|
|
except psycopg2.errors.ForeignKeyViolation as e:
|
|
stats["skipped"] += 1
|
|
# Extract ID for logging
|
|
pk_idx = columns.index(pk_column) if pk_column in columns else 0
|
|
record_id = values[pk_idx]
|
|
stats["errors"].append(
|
|
{
|
|
"id": record_id,
|
|
"error": "ForeignKeyViolation",
|
|
"message": str(e),
|
|
}
|
|
)
|
|
logger.warning(
|
|
f"Skipped orphaned record {table_name}.{pk_column}={record_id}"
|
|
)
|
|
pg_conn.rollback()
|
|
except Exception as e:
|
|
stats["skipped"] += 1
|
|
pk_idx = columns.index(pk_column) if pk_column in columns else 0
|
|
record_id = values[pk_idx]
|
|
stats["errors"].append(
|
|
{
|
|
"id": record_id,
|
|
"error": type(e).__name__,
|
|
"message": str(e),
|
|
}
|
|
)
|
|
logger.error(
|
|
f"Error inserting {table_name}.{pk_column}={record_id}: {e}"
|
|
)
|
|
pg_conn.rollback()
|
|
|
|
logger.info(
|
|
f"Table {table_name}: Processed {min(i + batch_size, len(rows))}/{len(rows)} records"
|
|
)
|
|
|
|
# Commit the transaction
|
|
pg_conn.commit()
|
|
|
|
# CRITICAL: Reset the PostgreSQL sequence to MAX(id) + 1
|
|
# Without this, new inserts will fail with duplicate key errors
|
|
sequence_name = get_sequence_name(table_name, pk_column)
|
|
try:
|
|
pg_cursor.execute(f"""
|
|
SELECT setval('{sequence_name}', COALESCE((SELECT MAX({pk_column}) FROM {table_name}), 1), true)
|
|
""")
|
|
pg_conn.commit()
|
|
logger.info(f"Table {table_name}: Reset sequence {sequence_name}")
|
|
except psycopg2.errors.UndefinedTable as e:
|
|
# Sequence might not exist for explicit PK tables
|
|
logger.warning(f"Could not reset sequence {sequence_name}: {e}")
|
|
pg_conn.rollback()
|
|
|
|
# Verify counts
|
|
stats["postgres_count"] = get_record_count(pg_conn, table_name, is_sqlite=False)
|
|
|
|
if stats["postgres_count"] == stats["sqlite_count"]:
|
|
logger.info(
|
|
f"Table {table_name}: SUCCESS - {stats['postgres_count']} records migrated"
|
|
)
|
|
stats["success"] = True
|
|
elif stats["postgres_count"] == stats["inserted"]:
|
|
logger.warning(
|
|
f"Table {table_name}: PARTIAL - {stats['inserted']} inserted, "
|
|
f"{stats['skipped']} skipped (orphaned FK records)"
|
|
)
|
|
stats["success"] = (
|
|
True # Partial success is acceptable for orphaned records
|
|
)
|
|
else:
|
|
logger.error(
|
|
f"Table {table_name}: MISMATCH - SQLite: {stats['sqlite_count']}, "
|
|
f"PostgreSQL: {stats['postgres_count']}"
|
|
)
|
|
stats["success"] = False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Table {table_name}: FAILED - {e}")
|
|
stats["errors"].append({"error": type(e).__name__, "message": str(e)})
|
|
stats["success"] = False
|
|
pg_conn.rollback()
|
|
|
|
return stats
|
|
|
|
|
|
def verify_id_preservation(
|
|
sqlite_conn: sqlite3.Connection,
|
|
pg_conn: psycopg2.extensions.connection,
|
|
sample_tables: List[str] = None,
|
|
) -> bool:
|
|
"""
|
|
Verify that primary key IDs were preserved correctly.
|
|
|
|
This is a CRITICAL check - if IDs don't match, the migration has failed.
|
|
"""
|
|
if sample_tables is None:
|
|
sample_tables = ["player", "team", "card", "stratgame"]
|
|
|
|
all_match = True
|
|
|
|
for table_name in sample_tables:
|
|
pk_column = get_primary_key_column(table_name)
|
|
|
|
# Get first and last 5 IDs from SQLite
|
|
sqlite_cursor = sqlite_conn.execute(
|
|
f"SELECT {pk_column} FROM {table_name} ORDER BY {pk_column} LIMIT 5"
|
|
)
|
|
sqlite_first = [row[0] for row in sqlite_cursor.fetchall()]
|
|
|
|
sqlite_cursor = sqlite_conn.execute(
|
|
f"SELECT {pk_column} FROM {table_name} ORDER BY {pk_column} DESC LIMIT 5"
|
|
)
|
|
sqlite_last = [row[0] for row in sqlite_cursor.fetchall()]
|
|
|
|
# Get same IDs from PostgreSQL
|
|
pg_cursor = pg_conn.cursor()
|
|
pg_cursor.execute(
|
|
f"SELECT {pk_column} FROM {table_name} ORDER BY {pk_column} LIMIT 5"
|
|
)
|
|
pg_first = [row[0] for row in pg_cursor.fetchall()]
|
|
|
|
pg_cursor.execute(
|
|
f"SELECT {pk_column} FROM {table_name} ORDER BY {pk_column} DESC LIMIT 5"
|
|
)
|
|
pg_last = [row[0] for row in pg_cursor.fetchall()]
|
|
|
|
if sqlite_first == pg_first and sqlite_last == pg_last:
|
|
logger.info(f"ID Verification {table_name}: PASS - IDs match")
|
|
else:
|
|
logger.error(
|
|
f"ID Verification {table_name}: FAIL - "
|
|
f"SQLite first: {sqlite_first}, PG first: {pg_first}, "
|
|
f"SQLite last: {sqlite_last}, PG last: {pg_last}"
|
|
)
|
|
all_match = False
|
|
|
|
return all_match
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Migrate Paper Dynasty from SQLite to PostgreSQL"
|
|
)
|
|
parser.add_argument(
|
|
"--dry-run", action="store_true", help="Validate without making changes"
|
|
)
|
|
parser.add_argument("--table", type=str, help="Migrate only this table")
|
|
parser.add_argument(
|
|
"--sqlite-path",
|
|
type=str,
|
|
default="storage/pd_master.db",
|
|
help="Path to SQLite database",
|
|
)
|
|
parser.add_argument(
|
|
"--batch-size", type=int, default=500, help="Batch size for inserts"
|
|
)
|
|
parser.add_argument(
|
|
"--skip-verification", action="store_true", help="Skip ID verification"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
logger.info("=" * 60)
|
|
logger.info("Paper Dynasty SQLite to PostgreSQL Migration")
|
|
logger.info("=" * 60)
|
|
|
|
if args.dry_run:
|
|
logger.info("DRY RUN MODE - No changes will be made")
|
|
|
|
# Connect to databases
|
|
try:
|
|
sqlite_conn = get_sqlite_connection(args.sqlite_path)
|
|
logger.info(f"Connected to SQLite: {args.sqlite_path}")
|
|
except FileNotFoundError as e:
|
|
logger.error(str(e))
|
|
sys.exit(1)
|
|
|
|
try:
|
|
pg_conn = get_postgres_connection()
|
|
logger.info(
|
|
f"Connected to PostgreSQL: {os.environ['POSTGRES_HOST']}/{os.environ['POSTGRES_DB']}"
|
|
)
|
|
except EnvironmentError as e:
|
|
logger.error(str(e))
|
|
sys.exit(1)
|
|
except psycopg2.Error as e:
|
|
logger.error(f"PostgreSQL connection failed: {e}")
|
|
sys.exit(1)
|
|
|
|
# Determine tables to migrate
|
|
tables_to_migrate = [args.table] if args.table else MIGRATION_ORDER
|
|
|
|
# Validate tables exist
|
|
available_tables = set()
|
|
cursor = sqlite_conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
|
for row in cursor:
|
|
available_tables.add(row[0])
|
|
|
|
for table in tables_to_migrate:
|
|
if table not in available_tables:
|
|
logger.warning(f"Table {table} not found in SQLite, skipping")
|
|
tables_to_migrate.remove(table)
|
|
|
|
# Migration summary
|
|
results = []
|
|
start_time = datetime.now()
|
|
|
|
logger.info(f"Migrating {len(tables_to_migrate)} tables...")
|
|
logger.info("-" * 60)
|
|
|
|
for table_name in tables_to_migrate:
|
|
stats = migrate_table(
|
|
sqlite_conn,
|
|
pg_conn,
|
|
table_name,
|
|
batch_size=args.batch_size,
|
|
dry_run=args.dry_run,
|
|
)
|
|
results.append(stats)
|
|
logger.info("-" * 60)
|
|
|
|
# Summary
|
|
elapsed = datetime.now() - start_time
|
|
successful = sum(1 for r in results if r["success"])
|
|
total_records = sum(r["inserted"] for r in results)
|
|
total_skipped = sum(r["skipped"] for r in results)
|
|
|
|
logger.info("=" * 60)
|
|
logger.info("MIGRATION SUMMARY")
|
|
logger.info("=" * 60)
|
|
logger.info(f"Tables: {successful}/{len(results)} successful")
|
|
logger.info(f"Records: {total_records} inserted, {total_skipped} skipped")
|
|
logger.info(f"Duration: {elapsed}")
|
|
|
|
# Failed tables
|
|
failed = [r for r in results if not r["success"]]
|
|
if failed:
|
|
logger.error("FAILED TABLES:")
|
|
for r in failed:
|
|
logger.error(f" - {r['table']}: {r['errors']}")
|
|
|
|
# ID Verification (CRITICAL)
|
|
if not args.dry_run and not args.skip_verification:
|
|
logger.info("-" * 60)
|
|
logger.info("VERIFYING ID PRESERVATION...")
|
|
if verify_id_preservation(sqlite_conn, pg_conn):
|
|
logger.info("ID VERIFICATION: PASS - All IDs preserved correctly")
|
|
else:
|
|
logger.error("ID VERIFICATION: FAIL - IDs do not match!")
|
|
logger.error(
|
|
"THIS IS A CRITICAL FAILURE - Foreign key references may be broken"
|
|
)
|
|
sys.exit(1)
|
|
|
|
# Close connections
|
|
sqlite_conn.close()
|
|
pg_conn.close()
|
|
|
|
if all(r["success"] for r in results):
|
|
logger.info("MIGRATION COMPLETE - SUCCESS")
|
|
sys.exit(0)
|
|
else:
|
|
logger.warning("MIGRATION COMPLETE - PARTIAL SUCCESS (some tables failed)")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|