- Add db_helpers.py with cross-database upsert functions for SQLite/PostgreSQL - Replace 12 on_conflict_replace() calls with PostgreSQL-compatible upserts - Add unique indexes: StratPlay(game, play_num), Decision(game, pitcher) - Add max_length to Team model fields (abbrev, sname, lname) - Fix boolean comparison in teams.py (== 0/1 to == False/True) - Create migrate_to_postgres.py with ID-preserving migration logic - Create audit_sqlite.py for pre-migration data integrity checks - Add PROJECT_PLAN.json for migration tracking - Add .secrets/ to .gitignore for credentials Audit results: 658,963 records across 29 tables, 2,390 orphaned stats (expected) Based on Major Domo migration lessons learned (33 issues resolved there)
285 lines
8.6 KiB
Python
285 lines
8.6 KiB
Python
"""
|
|
Database helper functions for PostgreSQL compatibility.
|
|
|
|
This module provides cross-database compatible upsert operations that work
|
|
with both SQLite and PostgreSQL.
|
|
|
|
The key difference:
|
|
- SQLite: .on_conflict_replace() works directly
|
|
- PostgreSQL: Requires .on_conflict() with explicit conflict_target and update dict
|
|
|
|
Usage:
|
|
from app.db_helpers import upsert_many, DATABASE_TYPE
|
|
|
|
# Instead of:
|
|
Model.insert_many(batch).on_conflict_replace().execute()
|
|
|
|
# Use:
|
|
upsert_many(Model, batch, conflict_fields=['field1', 'field2'])
|
|
"""
|
|
|
|
import os
|
|
from typing import Any, Dict, List, Type, Union
|
|
|
|
from peewee import Model, SQL
|
|
|
|
# Re-export DATABASE_TYPE for convenience
|
|
DATABASE_TYPE = os.environ.get("DATABASE_TYPE", "sqlite").lower()
|
|
|
|
|
|
def get_model_fields(model: Type[Model], exclude: List[str] = None) -> List[str]:
|
|
"""
|
|
Get all field names for a model, excluding specified fields.
|
|
|
|
Args:
|
|
model: Peewee Model class
|
|
exclude: Field names to exclude (e.g., primary key)
|
|
|
|
Returns:
|
|
List of field names
|
|
"""
|
|
if exclude is None:
|
|
exclude = []
|
|
|
|
return [
|
|
field.name for field in model._meta.sorted_fields if field.name not in exclude
|
|
]
|
|
|
|
|
|
def upsert_many(
|
|
model: Type[Model],
|
|
data: List[Dict[str, Any]],
|
|
conflict_fields: List[str],
|
|
update_fields: List[str] = None,
|
|
batch_size: int = 100,
|
|
) -> int:
|
|
"""
|
|
Insert or update multiple records in a database-agnostic way.
|
|
|
|
Works with both SQLite (on_conflict_replace) and PostgreSQL (on_conflict).
|
|
|
|
Args:
|
|
model: Peewee Model class
|
|
data: List of dictionaries with field values
|
|
conflict_fields: Fields that define uniqueness (for PostgreSQL ON CONFLICT)
|
|
update_fields: Fields to update on conflict (defaults to all non-conflict fields)
|
|
batch_size: Number of records per batch
|
|
|
|
Returns:
|
|
Number of records processed
|
|
|
|
Example:
|
|
# For BattingCard with unique constraint on (player, variant)
|
|
upsert_many(
|
|
BattingCard,
|
|
batch_data,
|
|
conflict_fields=['player', 'variant']
|
|
)
|
|
"""
|
|
if not data:
|
|
return 0
|
|
|
|
total = 0
|
|
|
|
# Determine update fields if not specified
|
|
if update_fields is None:
|
|
# Get primary key name
|
|
pk_name = model._meta.primary_key.name if model._meta.primary_key else "id"
|
|
# Update all fields except PK and conflict fields
|
|
exclude = [pk_name] + conflict_fields
|
|
update_fields = get_model_fields(model, exclude=exclude)
|
|
|
|
# Process in batches
|
|
for i in range(0, len(data), batch_size):
|
|
batch = data[i : i + batch_size]
|
|
|
|
if DATABASE_TYPE == "postgresql":
|
|
# PostgreSQL: Use ON CONFLICT with explicit target and update
|
|
from peewee import EXCLUDED
|
|
|
|
# Build conflict target - get actual field objects
|
|
conflict_target = [getattr(model, f) for f in conflict_fields]
|
|
|
|
# Build update dict
|
|
update_dict = {
|
|
getattr(model, f): EXCLUDED[f]
|
|
for f in update_fields
|
|
if hasattr(model, f)
|
|
}
|
|
|
|
if update_dict:
|
|
model.insert_many(batch).on_conflict(
|
|
conflict_target=conflict_target, action="update", update=update_dict
|
|
).execute()
|
|
else:
|
|
# No fields to update, just ignore conflicts
|
|
model.insert_many(batch).on_conflict_ignore().execute()
|
|
else:
|
|
# SQLite: Use on_conflict_replace (simpler)
|
|
model.insert_many(batch).on_conflict_replace().execute()
|
|
|
|
total += len(batch)
|
|
|
|
return total
|
|
|
|
|
|
def upsert_by_pk(
|
|
model: Type[Model],
|
|
data: List[Dict[str, Any]],
|
|
pk_field: str = None,
|
|
batch_size: int = 100,
|
|
) -> int:
|
|
"""
|
|
Upsert records using primary key as conflict target.
|
|
|
|
This is for models where the primary key is explicitly provided in the data
|
|
(like Player with player_id).
|
|
|
|
Args:
|
|
model: Peewee Model class
|
|
data: List of dictionaries with field values (including PK)
|
|
pk_field: Primary key field name (auto-detected if not specified)
|
|
batch_size: Number of records per batch
|
|
|
|
Returns:
|
|
Number of records processed
|
|
|
|
Example:
|
|
# For Player with explicit player_id
|
|
upsert_by_pk(Player, player_data, pk_field='player_id')
|
|
"""
|
|
if not data:
|
|
return 0
|
|
|
|
# Auto-detect primary key
|
|
if pk_field is None:
|
|
pk_field = model._meta.primary_key.name if model._meta.primary_key else "id"
|
|
|
|
return upsert_many(model, data, conflict_fields=[pk_field], batch_size=batch_size)
|
|
|
|
|
|
# Pre-configured upsert functions for specific models
|
|
# These encode the unique constraint knowledge for each model
|
|
|
|
|
|
def upsert_players(data: List[Dict], batch_size: int = 15) -> int:
|
|
"""Upsert Player records using player_id as conflict target."""
|
|
from app.db_engine import Player
|
|
|
|
return upsert_by_pk(Player, data, pk_field="player_id", batch_size=batch_size)
|
|
|
|
|
|
def upsert_batting_cards(data: List[Dict], batch_size: int = 30) -> int:
|
|
"""Upsert BattingCard records using (player, variant) unique constraint."""
|
|
from app.db_engine import BattingCard
|
|
|
|
return upsert_many(
|
|
BattingCard, data, conflict_fields=["player", "variant"], batch_size=batch_size
|
|
)
|
|
|
|
|
|
def upsert_pitching_cards(data: List[Dict], batch_size: int = 30) -> int:
|
|
"""Upsert PitchingCard records using (player, variant) unique constraint."""
|
|
from app.db_engine import PitchingCard
|
|
|
|
return upsert_many(
|
|
PitchingCard, data, conflict_fields=["player", "variant"], batch_size=batch_size
|
|
)
|
|
|
|
|
|
def upsert_batting_card_ratings(data: List[Dict], batch_size: int = 30) -> int:
|
|
"""Upsert BattingCardRatings using (battingcard, vs_hand) unique constraint."""
|
|
from app.db_engine import BattingCardRatings
|
|
|
|
return upsert_many(
|
|
BattingCardRatings,
|
|
data,
|
|
conflict_fields=["battingcard", "vs_hand"],
|
|
batch_size=batch_size,
|
|
)
|
|
|
|
|
|
def upsert_pitching_card_ratings(data: List[Dict], batch_size: int = 30) -> int:
|
|
"""Upsert PitchingCardRatings using (pitchingcard, vs_hand) unique constraint."""
|
|
from app.db_engine import PitchingCardRatings
|
|
|
|
return upsert_many(
|
|
PitchingCardRatings,
|
|
data,
|
|
conflict_fields=["pitchingcard", "vs_hand"],
|
|
batch_size=batch_size,
|
|
)
|
|
|
|
|
|
def upsert_card_positions(data: List[Dict], batch_size: int = 30) -> int:
|
|
"""Upsert CardPosition using (player, variant, position) unique constraint."""
|
|
from app.db_engine import CardPosition
|
|
|
|
return upsert_many(
|
|
CardPosition,
|
|
data,
|
|
conflict_fields=["player", "variant", "position"],
|
|
batch_size=batch_size,
|
|
)
|
|
|
|
|
|
def upsert_strat_plays(data: List[Dict], batch_size: int = 20) -> int:
|
|
"""Upsert StratPlay using (game, play_num) unique constraint."""
|
|
from app.db_engine import StratPlay
|
|
|
|
return upsert_many(
|
|
StratPlay, data, conflict_fields=["game", "play_num"], batch_size=batch_size
|
|
)
|
|
|
|
|
|
def upsert_decisions(data: List[Dict], batch_size: int = 10) -> int:
|
|
"""Upsert Decision using (game, pitcher) unique constraint."""
|
|
from app.db_engine import Decision
|
|
|
|
return upsert_many(
|
|
Decision, data, conflict_fields=["game", "pitcher"], batch_size=batch_size
|
|
)
|
|
|
|
|
|
def upsert_gauntlet_rewards(data: List[Dict], batch_size: int = 15) -> int:
|
|
"""
|
|
Upsert GauntletReward records.
|
|
|
|
Note: GauntletReward doesn't have a natural unique key defined.
|
|
For PostgreSQL, we use id if provided, otherwise insert-only.
|
|
"""
|
|
from app.db_engine import GauntletReward
|
|
|
|
# Check if any records have 'id' field
|
|
has_ids = any("id" in record for record in data)
|
|
|
|
if has_ids:
|
|
return upsert_by_pk(GauntletReward, data, pk_field="id", batch_size=batch_size)
|
|
else:
|
|
# No IDs provided - just insert (may fail on duplicates)
|
|
total = 0
|
|
for i in range(0, len(data), batch_size):
|
|
batch = data[i : i + batch_size]
|
|
GauntletReward.insert_many(batch).execute()
|
|
total += len(batch)
|
|
return total
|
|
|
|
|
|
def upsert_mlb_players(data: List[Dict], batch_size: int = 15) -> int:
|
|
"""
|
|
Upsert MlbPlayer records.
|
|
|
|
Note: The calling code already checks for duplicates before insert,
|
|
so this is effectively just an insert operation.
|
|
"""
|
|
from app.db_engine import MlbPlayer
|
|
|
|
# MlbPlayer doesn't have a good unique key other than id
|
|
# Since duplicates are already checked, just insert
|
|
total = 0
|
|
for i in range(0, len(data), batch_size):
|
|
batch = data[i : i + batch_size]
|
|
MlbPlayer.insert_many(batch).execute()
|
|
total += len(batch)
|
|
return total
|