paper-dynasty-database/app/db_helpers.py
Cal Corum 0cba52cea5 PostgreSQL migration: Complete code preparation phase
- Add db_helpers.py with cross-database upsert functions for SQLite/PostgreSQL
- Replace 12 on_conflict_replace() calls with PostgreSQL-compatible upserts
- Add unique indexes: StratPlay(game, play_num), Decision(game, pitcher)
- Add max_length to Team model fields (abbrev, sname, lname)
- Fix boolean comparison in teams.py (== 0/1 to == False/True)
- Create migrate_to_postgres.py with ID-preserving migration logic
- Create audit_sqlite.py for pre-migration data integrity checks
- Add PROJECT_PLAN.json for migration tracking
- Add .secrets/ to .gitignore for credentials

Audit results: 658,963 records across 29 tables, 2,390 orphaned stats (expected)

Based on Major Domo migration lessons learned (33 issues resolved there)
2026-01-25 23:05:54 -06:00

285 lines
8.6 KiB
Python

"""
Database helper functions for PostgreSQL compatibility.
This module provides cross-database compatible upsert operations that work
with both SQLite and PostgreSQL.
The key difference:
- SQLite: .on_conflict_replace() works directly
- PostgreSQL: Requires .on_conflict() with explicit conflict_target and update dict
Usage:
from app.db_helpers import upsert_many, DATABASE_TYPE
# Instead of:
Model.insert_many(batch).on_conflict_replace().execute()
# Use:
upsert_many(Model, batch, conflict_fields=['field1', 'field2'])
"""
import os
from typing import Any, Dict, List, Type, Union
from peewee import Model, SQL
# Re-export DATABASE_TYPE for convenience
DATABASE_TYPE = os.environ.get("DATABASE_TYPE", "sqlite").lower()
def get_model_fields(model: Type[Model], exclude: List[str] = None) -> List[str]:
"""
Get all field names for a model, excluding specified fields.
Args:
model: Peewee Model class
exclude: Field names to exclude (e.g., primary key)
Returns:
List of field names
"""
if exclude is None:
exclude = []
return [
field.name for field in model._meta.sorted_fields if field.name not in exclude
]
def upsert_many(
model: Type[Model],
data: List[Dict[str, Any]],
conflict_fields: List[str],
update_fields: List[str] = None,
batch_size: int = 100,
) -> int:
"""
Insert or update multiple records in a database-agnostic way.
Works with both SQLite (on_conflict_replace) and PostgreSQL (on_conflict).
Args:
model: Peewee Model class
data: List of dictionaries with field values
conflict_fields: Fields that define uniqueness (for PostgreSQL ON CONFLICT)
update_fields: Fields to update on conflict (defaults to all non-conflict fields)
batch_size: Number of records per batch
Returns:
Number of records processed
Example:
# For BattingCard with unique constraint on (player, variant)
upsert_many(
BattingCard,
batch_data,
conflict_fields=['player', 'variant']
)
"""
if not data:
return 0
total = 0
# Determine update fields if not specified
if update_fields is None:
# Get primary key name
pk_name = model._meta.primary_key.name if model._meta.primary_key else "id"
# Update all fields except PK and conflict fields
exclude = [pk_name] + conflict_fields
update_fields = get_model_fields(model, exclude=exclude)
# Process in batches
for i in range(0, len(data), batch_size):
batch = data[i : i + batch_size]
if DATABASE_TYPE == "postgresql":
# PostgreSQL: Use ON CONFLICT with explicit target and update
from peewee import EXCLUDED
# Build conflict target - get actual field objects
conflict_target = [getattr(model, f) for f in conflict_fields]
# Build update dict
update_dict = {
getattr(model, f): EXCLUDED[f]
for f in update_fields
if hasattr(model, f)
}
if update_dict:
model.insert_many(batch).on_conflict(
conflict_target=conflict_target, action="update", update=update_dict
).execute()
else:
# No fields to update, just ignore conflicts
model.insert_many(batch).on_conflict_ignore().execute()
else:
# SQLite: Use on_conflict_replace (simpler)
model.insert_many(batch).on_conflict_replace().execute()
total += len(batch)
return total
def upsert_by_pk(
model: Type[Model],
data: List[Dict[str, Any]],
pk_field: str = None,
batch_size: int = 100,
) -> int:
"""
Upsert records using primary key as conflict target.
This is for models where the primary key is explicitly provided in the data
(like Player with player_id).
Args:
model: Peewee Model class
data: List of dictionaries with field values (including PK)
pk_field: Primary key field name (auto-detected if not specified)
batch_size: Number of records per batch
Returns:
Number of records processed
Example:
# For Player with explicit player_id
upsert_by_pk(Player, player_data, pk_field='player_id')
"""
if not data:
return 0
# Auto-detect primary key
if pk_field is None:
pk_field = model._meta.primary_key.name if model._meta.primary_key else "id"
return upsert_many(model, data, conflict_fields=[pk_field], batch_size=batch_size)
# Pre-configured upsert functions for specific models
# These encode the unique constraint knowledge for each model
def upsert_players(data: List[Dict], batch_size: int = 15) -> int:
"""Upsert Player records using player_id as conflict target."""
from app.db_engine import Player
return upsert_by_pk(Player, data, pk_field="player_id", batch_size=batch_size)
def upsert_batting_cards(data: List[Dict], batch_size: int = 30) -> int:
"""Upsert BattingCard records using (player, variant) unique constraint."""
from app.db_engine import BattingCard
return upsert_many(
BattingCard, data, conflict_fields=["player", "variant"], batch_size=batch_size
)
def upsert_pitching_cards(data: List[Dict], batch_size: int = 30) -> int:
"""Upsert PitchingCard records using (player, variant) unique constraint."""
from app.db_engine import PitchingCard
return upsert_many(
PitchingCard, data, conflict_fields=["player", "variant"], batch_size=batch_size
)
def upsert_batting_card_ratings(data: List[Dict], batch_size: int = 30) -> int:
"""Upsert BattingCardRatings using (battingcard, vs_hand) unique constraint."""
from app.db_engine import BattingCardRatings
return upsert_many(
BattingCardRatings,
data,
conflict_fields=["battingcard", "vs_hand"],
batch_size=batch_size,
)
def upsert_pitching_card_ratings(data: List[Dict], batch_size: int = 30) -> int:
"""Upsert PitchingCardRatings using (pitchingcard, vs_hand) unique constraint."""
from app.db_engine import PitchingCardRatings
return upsert_many(
PitchingCardRatings,
data,
conflict_fields=["pitchingcard", "vs_hand"],
batch_size=batch_size,
)
def upsert_card_positions(data: List[Dict], batch_size: int = 30) -> int:
"""Upsert CardPosition using (player, variant, position) unique constraint."""
from app.db_engine import CardPosition
return upsert_many(
CardPosition,
data,
conflict_fields=["player", "variant", "position"],
batch_size=batch_size,
)
def upsert_strat_plays(data: List[Dict], batch_size: int = 20) -> int:
"""Upsert StratPlay using (game, play_num) unique constraint."""
from app.db_engine import StratPlay
return upsert_many(
StratPlay, data, conflict_fields=["game", "play_num"], batch_size=batch_size
)
def upsert_decisions(data: List[Dict], batch_size: int = 10) -> int:
"""Upsert Decision using (game, pitcher) unique constraint."""
from app.db_engine import Decision
return upsert_many(
Decision, data, conflict_fields=["game", "pitcher"], batch_size=batch_size
)
def upsert_gauntlet_rewards(data: List[Dict], batch_size: int = 15) -> int:
"""
Upsert GauntletReward records.
Note: GauntletReward doesn't have a natural unique key defined.
For PostgreSQL, we use id if provided, otherwise insert-only.
"""
from app.db_engine import GauntletReward
# Check if any records have 'id' field
has_ids = any("id" in record for record in data)
if has_ids:
return upsert_by_pk(GauntletReward, data, pk_field="id", batch_size=batch_size)
else:
# No IDs provided - just insert (may fail on duplicates)
total = 0
for i in range(0, len(data), batch_size):
batch = data[i : i + batch_size]
GauntletReward.insert_many(batch).execute()
total += len(batch)
return total
def upsert_mlb_players(data: List[Dict], batch_size: int = 15) -> int:
"""
Upsert MlbPlayer records.
Note: The calling code already checks for duplicates before insert,
so this is effectively just an insert operation.
"""
from app.db_engine import MlbPlayer
# MlbPlayer doesn't have a good unique key other than id
# Since duplicates are already checked, just insert
total = 0
for i in range(0, len(data), batch_size):
batch = data[i : i + batch_size]
MlbPlayer.insert_many(batch).execute()
total += len(batch)
return total