""" Database helper functions for PostgreSQL compatibility. This module provides cross-database compatible upsert operations that work with both SQLite and PostgreSQL. The key difference: - SQLite: .on_conflict_replace() works directly - PostgreSQL: Requires .on_conflict() with explicit conflict_target and update dict Usage: from app.db_helpers import upsert_many, DATABASE_TYPE # Instead of: Model.insert_many(batch).on_conflict_replace().execute() # Use: upsert_many(Model, batch, conflict_fields=['field1', 'field2']) """ import os from typing import Any, Dict, List, Type, Union from peewee import Model, SQL # Re-export DATABASE_TYPE for convenience DATABASE_TYPE = os.environ.get("DATABASE_TYPE", "sqlite").lower() def get_model_fields(model: Type[Model], exclude: List[str] = None) -> List[str]: """ Get all field names for a model, excluding specified fields. Args: model: Peewee Model class exclude: Field names to exclude (e.g., primary key) Returns: List of field names """ if exclude is None: exclude = [] return [ field.name for field in model._meta.sorted_fields if field.name not in exclude ] def upsert_many( model: Type[Model], data: List[Dict[str, Any]], conflict_fields: List[str], update_fields: List[str] = None, batch_size: int = 100, ) -> int: """ Insert or update multiple records in a database-agnostic way. Works with both SQLite (on_conflict_replace) and PostgreSQL (on_conflict). Args: model: Peewee Model class data: List of dictionaries with field values conflict_fields: Fields that define uniqueness (for PostgreSQL ON CONFLICT) update_fields: Fields to update on conflict (defaults to all non-conflict fields) batch_size: Number of records per batch Returns: Number of records processed Example: # For BattingCard with unique constraint on (player, variant) upsert_many( BattingCard, batch_data, conflict_fields=['player', 'variant'] ) """ if not data: return 0 total = 0 # Determine update fields if not specified if update_fields is None: # Get primary key name pk_name = model._meta.primary_key.name if model._meta.primary_key else "id" # Update all fields except PK and conflict fields exclude = [pk_name] + conflict_fields update_fields = get_model_fields(model, exclude=exclude) # Process in batches for i in range(0, len(data), batch_size): batch = data[i : i + batch_size] if DATABASE_TYPE == "postgresql": # PostgreSQL: Use ON CONFLICT with explicit target and update from peewee import EXCLUDED # Build conflict target - get actual field objects conflict_target = [getattr(model, f) for f in conflict_fields] # Build update dict update_dict = { getattr(model, f): EXCLUDED[f] for f in update_fields if hasattr(model, f) } if update_dict: model.insert_many(batch).on_conflict( conflict_target=conflict_target, action="update", update=update_dict ).execute() else: # No fields to update, just ignore conflicts model.insert_many(batch).on_conflict_ignore().execute() else: # SQLite: Use on_conflict_replace (simpler) model.insert_many(batch).on_conflict_replace().execute() total += len(batch) return total def upsert_by_pk( model: Type[Model], data: List[Dict[str, Any]], pk_field: str = None, batch_size: int = 100, ) -> int: """ Upsert records using primary key as conflict target. This is for models where the primary key is explicitly provided in the data (like Player with player_id). Args: model: Peewee Model class data: List of dictionaries with field values (including PK) pk_field: Primary key field name (auto-detected if not specified) batch_size: Number of records per batch Returns: Number of records processed Example: # For Player with explicit player_id upsert_by_pk(Player, player_data, pk_field='player_id') """ if not data: return 0 # Auto-detect primary key if pk_field is None: pk_field = model._meta.primary_key.name if model._meta.primary_key else "id" return upsert_many(model, data, conflict_fields=[pk_field], batch_size=batch_size) # Pre-configured upsert functions for specific models # These encode the unique constraint knowledge for each model def upsert_players(data: List[Dict], batch_size: int = 15) -> int: """Upsert Player records using player_id as conflict target.""" from app.db_engine import Player return upsert_by_pk(Player, data, pk_field="player_id", batch_size=batch_size) def upsert_batting_cards(data: List[Dict], batch_size: int = 30) -> int: """Upsert BattingCard records using (player, variant) unique constraint.""" from app.db_engine import BattingCard return upsert_many( BattingCard, data, conflict_fields=["player", "variant"], batch_size=batch_size ) def upsert_pitching_cards(data: List[Dict], batch_size: int = 30) -> int: """Upsert PitchingCard records using (player, variant) unique constraint.""" from app.db_engine import PitchingCard return upsert_many( PitchingCard, data, conflict_fields=["player", "variant"], batch_size=batch_size ) def upsert_batting_card_ratings(data: List[Dict], batch_size: int = 30) -> int: """Upsert BattingCardRatings using (battingcard, vs_hand) unique constraint.""" from app.db_engine import BattingCardRatings return upsert_many( BattingCardRatings, data, conflict_fields=["battingcard", "vs_hand"], batch_size=batch_size, ) def upsert_pitching_card_ratings(data: List[Dict], batch_size: int = 30) -> int: """Upsert PitchingCardRatings using (pitchingcard, vs_hand) unique constraint.""" from app.db_engine import PitchingCardRatings return upsert_many( PitchingCardRatings, data, conflict_fields=["pitchingcard", "vs_hand"], batch_size=batch_size, ) def upsert_card_positions(data: List[Dict], batch_size: int = 30) -> int: """Upsert CardPosition using (player, variant, position) unique constraint.""" from app.db_engine import CardPosition return upsert_many( CardPosition, data, conflict_fields=["player", "variant", "position"], batch_size=batch_size, ) def upsert_strat_plays(data: List[Dict], batch_size: int = 20) -> int: """Upsert StratPlay using (game, play_num) unique constraint.""" from app.db_engine import StratPlay return upsert_many( StratPlay, data, conflict_fields=["game", "play_num"], batch_size=batch_size ) def upsert_decisions(data: List[Dict], batch_size: int = 10) -> int: """Upsert Decision using (game, pitcher) unique constraint.""" from app.db_engine import Decision return upsert_many( Decision, data, conflict_fields=["game", "pitcher"], batch_size=batch_size ) def upsert_gauntlet_rewards(data: List[Dict], batch_size: int = 15) -> int: """ Upsert GauntletReward records. Note: GauntletReward doesn't have a natural unique key defined. For PostgreSQL, we use id if provided, otherwise insert-only. """ from app.db_engine import GauntletReward # Check if any records have 'id' field has_ids = any("id" in record for record in data) if has_ids: return upsert_by_pk(GauntletReward, data, pk_field="id", batch_size=batch_size) else: # No IDs provided - just insert (may fail on duplicates) total = 0 for i in range(0, len(data), batch_size): batch = data[i : i + batch_size] GauntletReward.insert_many(batch).execute() total += len(batch) return total def upsert_mlb_players(data: List[Dict], batch_size: int = 15) -> int: """ Upsert MlbPlayer records. Note: The calling code already checks for duplicates before insert, so this is effectively just an insert operation. """ from app.db_engine import MlbPlayer # MlbPlayer doesn't have a good unique key other than id # Since duplicates are already checked, just insert total = 0 for i in range(0, len(data), batch_size): batch = data[i : i + batch_size] MlbPlayer.insert_many(batch).execute() total += len(batch) return total