- Fix upsert_many() to use column_name for EXCLUDED references (ForeignKeyField columns end in _id, e.g., batter -> batter_id) - Add null checks in batting/pitching CSV output for player, team, game fields to prevent 'NoneType' not subscriptable errors Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
288 lines
8.9 KiB
Python
288 lines
8.9 KiB
Python
"""
|
|
Database helper functions for PostgreSQL compatibility.
|
|
|
|
This module provides cross-database compatible upsert operations that work
|
|
with both SQLite and PostgreSQL.
|
|
|
|
The key difference:
|
|
- SQLite: .on_conflict_replace() works directly
|
|
- PostgreSQL: Requires .on_conflict() with explicit conflict_target and update dict
|
|
|
|
Usage:
|
|
from app.db_helpers import upsert_many, DATABASE_TYPE
|
|
|
|
# Instead of:
|
|
Model.insert_many(batch).on_conflict_replace().execute()
|
|
|
|
# Use:
|
|
upsert_many(Model, batch, conflict_fields=['field1', 'field2'])
|
|
"""
|
|
|
|
import os
|
|
from typing import Any, Dict, List, Type, Union
|
|
|
|
from peewee import Model, SQL
|
|
|
|
# Re-export DATABASE_TYPE for convenience
|
|
DATABASE_TYPE = os.environ.get("DATABASE_TYPE", "sqlite").lower()
|
|
|
|
|
|
def get_model_fields(model: Type[Model], exclude: List[str] = None) -> List[str]:
|
|
"""
|
|
Get all field names for a model, excluding specified fields.
|
|
|
|
Args:
|
|
model: Peewee Model class
|
|
exclude: Field names to exclude (e.g., primary key)
|
|
|
|
Returns:
|
|
List of field names
|
|
"""
|
|
if exclude is None:
|
|
exclude = []
|
|
|
|
return [
|
|
field.name for field in model._meta.sorted_fields if field.name not in exclude
|
|
]
|
|
|
|
|
|
def upsert_many(
|
|
model: Type[Model],
|
|
data: List[Dict[str, Any]],
|
|
conflict_fields: List[str],
|
|
update_fields: List[str] = None,
|
|
batch_size: int = 100,
|
|
) -> int:
|
|
"""
|
|
Insert or update multiple records in a database-agnostic way.
|
|
|
|
Works with both SQLite (on_conflict_replace) and PostgreSQL (on_conflict).
|
|
|
|
Args:
|
|
model: Peewee Model class
|
|
data: List of dictionaries with field values
|
|
conflict_fields: Fields that define uniqueness (for PostgreSQL ON CONFLICT)
|
|
update_fields: Fields to update on conflict (defaults to all non-conflict fields)
|
|
batch_size: Number of records per batch
|
|
|
|
Returns:
|
|
Number of records processed
|
|
|
|
Example:
|
|
# For BattingCard with unique constraint on (player, variant)
|
|
upsert_many(
|
|
BattingCard,
|
|
batch_data,
|
|
conflict_fields=['player', 'variant']
|
|
)
|
|
"""
|
|
if not data:
|
|
return 0
|
|
|
|
total = 0
|
|
|
|
# Determine update fields if not specified
|
|
if update_fields is None:
|
|
# Get primary key name
|
|
pk_name = model._meta.primary_key.name if model._meta.primary_key else "id"
|
|
# Update all fields except PK and conflict fields
|
|
exclude = [pk_name] + conflict_fields
|
|
update_fields = get_model_fields(model, exclude=exclude)
|
|
|
|
# Process in batches
|
|
for i in range(0, len(data), batch_size):
|
|
batch = data[i : i + batch_size]
|
|
|
|
if DATABASE_TYPE == "postgresql":
|
|
# PostgreSQL: Use ON CONFLICT with explicit target and update
|
|
from peewee import EXCLUDED
|
|
|
|
# Build conflict target - get actual field objects
|
|
conflict_target = [getattr(model, f) for f in conflict_fields]
|
|
|
|
# Build update dict - use column_name for EXCLUDED reference
|
|
# (ForeignKeyField column names end in _id, e.g., batter -> batter_id)
|
|
update_dict = {}
|
|
for f in update_fields:
|
|
if hasattr(model, f):
|
|
field_obj = getattr(model, f)
|
|
# Get the actual column name from the field
|
|
col_name = field_obj.column_name
|
|
update_dict[field_obj] = EXCLUDED[col_name]
|
|
|
|
if update_dict:
|
|
model.insert_many(batch).on_conflict(
|
|
conflict_target=conflict_target, action="update", update=update_dict
|
|
).execute()
|
|
else:
|
|
# No fields to update, just ignore conflicts
|
|
model.insert_many(batch).on_conflict_ignore().execute()
|
|
else:
|
|
# SQLite: Use on_conflict_replace (simpler)
|
|
model.insert_many(batch).on_conflict_replace().execute()
|
|
|
|
total += len(batch)
|
|
|
|
return total
|
|
|
|
|
|
def upsert_by_pk(
|
|
model: Type[Model],
|
|
data: List[Dict[str, Any]],
|
|
pk_field: str = None,
|
|
batch_size: int = 100,
|
|
) -> int:
|
|
"""
|
|
Upsert records using primary key as conflict target.
|
|
|
|
This is for models where the primary key is explicitly provided in the data
|
|
(like Player with player_id).
|
|
|
|
Args:
|
|
model: Peewee Model class
|
|
data: List of dictionaries with field values (including PK)
|
|
pk_field: Primary key field name (auto-detected if not specified)
|
|
batch_size: Number of records per batch
|
|
|
|
Returns:
|
|
Number of records processed
|
|
|
|
Example:
|
|
# For Player with explicit player_id
|
|
upsert_by_pk(Player, player_data, pk_field='player_id')
|
|
"""
|
|
if not data:
|
|
return 0
|
|
|
|
# Auto-detect primary key
|
|
if pk_field is None:
|
|
pk_field = model._meta.primary_key.name if model._meta.primary_key else "id"
|
|
|
|
return upsert_many(model, data, conflict_fields=[pk_field], batch_size=batch_size)
|
|
|
|
|
|
# Pre-configured upsert functions for specific models
|
|
# These encode the unique constraint knowledge for each model
|
|
|
|
|
|
def upsert_players(data: List[Dict], batch_size: int = 15) -> int:
|
|
"""Upsert Player records using player_id as conflict target."""
|
|
from app.db_engine import Player
|
|
|
|
return upsert_by_pk(Player, data, pk_field="player_id", batch_size=batch_size)
|
|
|
|
|
|
def upsert_batting_cards(data: List[Dict], batch_size: int = 30) -> int:
|
|
"""Upsert BattingCard records using (player, variant) unique constraint."""
|
|
from app.db_engine import BattingCard
|
|
|
|
return upsert_many(
|
|
BattingCard, data, conflict_fields=["player", "variant"], batch_size=batch_size
|
|
)
|
|
|
|
|
|
def upsert_pitching_cards(data: List[Dict], batch_size: int = 30) -> int:
|
|
"""Upsert PitchingCard records using (player, variant) unique constraint."""
|
|
from app.db_engine import PitchingCard
|
|
|
|
return upsert_many(
|
|
PitchingCard, data, conflict_fields=["player", "variant"], batch_size=batch_size
|
|
)
|
|
|
|
|
|
def upsert_batting_card_ratings(data: List[Dict], batch_size: int = 30) -> int:
|
|
"""Upsert BattingCardRatings using (battingcard, vs_hand) unique constraint."""
|
|
from app.db_engine import BattingCardRatings
|
|
|
|
return upsert_many(
|
|
BattingCardRatings,
|
|
data,
|
|
conflict_fields=["battingcard", "vs_hand"],
|
|
batch_size=batch_size,
|
|
)
|
|
|
|
|
|
def upsert_pitching_card_ratings(data: List[Dict], batch_size: int = 30) -> int:
|
|
"""Upsert PitchingCardRatings using (pitchingcard, vs_hand) unique constraint."""
|
|
from app.db_engine import PitchingCardRatings
|
|
|
|
return upsert_many(
|
|
PitchingCardRatings,
|
|
data,
|
|
conflict_fields=["pitchingcard", "vs_hand"],
|
|
batch_size=batch_size,
|
|
)
|
|
|
|
|
|
def upsert_card_positions(data: List[Dict], batch_size: int = 30) -> int:
|
|
"""Upsert CardPosition using (player, variant, position) unique constraint."""
|
|
from app.db_engine import CardPosition
|
|
|
|
return upsert_many(
|
|
CardPosition,
|
|
data,
|
|
conflict_fields=["player", "variant", "position"],
|
|
batch_size=batch_size,
|
|
)
|
|
|
|
|
|
def upsert_strat_plays(data: List[Dict], batch_size: int = 20) -> int:
|
|
"""Upsert StratPlay using (game, play_num) unique constraint."""
|
|
from app.db_engine import StratPlay
|
|
|
|
return upsert_many(
|
|
StratPlay, data, conflict_fields=["game", "play_num"], batch_size=batch_size
|
|
)
|
|
|
|
|
|
def upsert_decisions(data: List[Dict], batch_size: int = 10) -> int:
|
|
"""Upsert Decision using (game, pitcher) unique constraint."""
|
|
from app.db_engine import Decision
|
|
|
|
return upsert_many(
|
|
Decision, data, conflict_fields=["game", "pitcher"], batch_size=batch_size
|
|
)
|
|
|
|
|
|
def upsert_gauntlet_rewards(data: List[Dict], batch_size: int = 15) -> int:
|
|
"""
|
|
Upsert GauntletReward records.
|
|
|
|
Note: GauntletReward doesn't have a natural unique key defined.
|
|
For PostgreSQL, we use id if provided, otherwise insert-only.
|
|
"""
|
|
from app.db_engine import GauntletReward
|
|
|
|
# Check if any records have 'id' field
|
|
has_ids = any("id" in record for record in data)
|
|
|
|
if has_ids:
|
|
return upsert_by_pk(GauntletReward, data, pk_field="id", batch_size=batch_size)
|
|
else:
|
|
# No IDs provided - just insert (may fail on duplicates)
|
|
total = 0
|
|
for i in range(0, len(data), batch_size):
|
|
batch = data[i : i + batch_size]
|
|
GauntletReward.insert_many(batch).execute()
|
|
total += len(batch)
|
|
return total
|
|
|
|
|
|
def upsert_mlb_players(data: List[Dict], batch_size: int = 15) -> int:
|
|
"""
|
|
Upsert MlbPlayer records.
|
|
|
|
Note: The calling code already checks for duplicates before insert,
|
|
so this is effectively just an insert operation.
|
|
"""
|
|
from app.db_engine import MlbPlayer
|
|
|
|
# MlbPlayer doesn't have a good unique key other than id
|
|
# Since duplicates are already checked, just insert
|
|
total = 0
|
|
for i in range(0, len(data), batch_size):
|
|
batch = data[i : i + batch_size]
|
|
MlbPlayer.insert_many(batch).execute()
|
|
total += len(batch)
|
|
return total
|