Fix PostgreSQL compatibility for GROUP BY queries and aggregations

- Fix NULL handling for FK checks in stratplays.py: use x.field_id instead
  of x.field to avoid triggering FK lookups on potentially missing rows
- Cast boolean is_start to integer for SUM() - PostgreSQL cannot sum booleans
- Add missing GROUP BY clause to Decision aggregate query
- Add Case import for boolean-to-integer casting
- Update migration script with boolean/datetime column mappings
- Exclude legacy battingstat/pitchingstat tables from migration
- Add comprehensive POSTGRES_MIGRATION_GUIDE.md documentation

Tested: /plays/batting and /plays/pitching endpoints work with group_by=player
This commit is contained in:
Cal Corum 2026-01-26 21:59:25 -06:00
parent 0cba52cea5
commit 92fc101e38
4 changed files with 502 additions and 45 deletions

View File

@ -176,7 +176,7 @@ class Current(BaseModel):
week = IntegerField(default=0) week = IntegerField(default=0)
gsheet_template = CharField() gsheet_template = CharField()
gsheet_version = CharField() gsheet_version = CharField()
live_scoreboard = IntegerField() live_scoreboard = BigIntegerField() # Discord channel ID - needs BIGINT
class Meta: class Meta:
database = db database = db
@ -365,7 +365,7 @@ class Team(BaseModel):
abbrev = CharField(max_length=20) # Gauntlet teams use prefixes like "Gauntlet-NCB" abbrev = CharField(max_length=20) # Gauntlet teams use prefixes like "Gauntlet-NCB"
sname = CharField(max_length=100) sname = CharField(max_length=100)
lname = CharField(max_length=255) lname = CharField(max_length=255)
gmid = IntegerField() gmid = BigIntegerField() # Discord user ID - needs BIGINT
gmname = CharField() gmname = CharField()
gsheet = CharField() gsheet = CharField()
wallet = IntegerField() wallet = IntegerField()
@ -712,7 +712,7 @@ class GauntletRun(BaseModel):
losses = IntegerField(default=0) losses = IntegerField(default=0)
gsheet = CharField(null=True) gsheet = CharField(null=True)
created = DateTimeField(default=int(datetime.timestamp(datetime.now()) * 1000)) created = DateTimeField(default=int(datetime.timestamp(datetime.now()) * 1000))
ended = DateTimeField(default=0) ended = DateTimeField(null=True) # NULL means run not yet ended
class Meta: class Meta:
database = db database = db

View File

@ -16,6 +16,7 @@ from ..db_engine import (
chunked, chunked,
fn, fn,
SQL, SQL,
Case,
complex_data_to_csv, complex_data_to_csv,
Decision, Decision,
) )
@ -625,16 +626,18 @@ async def get_batting_totals(
bat_plays = bat_plays.order_by(SQL("sum_re24").desc()) bat_plays = bat_plays.order_by(SQL("sum_re24").desc())
elif sort == "re24-asc": elif sort == "re24-asc":
bat_plays = bat_plays.order_by(SQL("sum_re24").asc()) bat_plays = bat_plays.order_by(SQL("sum_re24").asc())
elif sort == "newest": # NOTE: "newest" and "oldest" sort removed for GROUP BY queries
bat_plays = bat_plays.order_by( # These require non-aggregated columns which PostgreSQL doesn't allow
StratPlay.game_id.desc(), StratPlay.play_num.desc() # elif sort == "newest":
) # bat_plays = bat_plays.order_by(
run_plays = run_plays.order_by( # StratPlay.game_id.desc(), StratPlay.play_num.desc()
StratPlay.game_id.desc(), StratPlay.play_num.desc() # )
) # run_plays = run_plays.order_by(
elif sort == "oldest": # StratPlay.game_id.desc(), StratPlay.play_num.desc()
bat_plays = bat_plays.order_by(StratPlay.game_id, StratPlay.play_num) # )
run_plays = run_plays.order_by(StratPlay.game_id, StratPlay.play_num) # elif sort == "oldest":
# bat_plays = bat_plays.order_by(StratPlay.game_id, StratPlay.play_num)
# run_plays = run_plays.order_by(StratPlay.game_id, StratPlay.play_num)
if limit < 1: if limit < 1:
limit = 1 limit = 1
@ -648,7 +651,9 @@ async def get_batting_totals(
return_stats = {"count": bat_plays.count(), "stats": []} return_stats = {"count": bat_plays.count(), "stats": []}
for x in bat_plays: for x in bat_plays:
this_run = run_plays.order_by(StratPlay.id) # NOTE: Removed .order_by(StratPlay.id) - not valid with GROUP BY in PostgreSQL
# and not meaningful for aggregated results anyway
this_run = run_plays
if "player" in group_by: if "player" in group_by:
this_run = this_run.where(StratPlay.runner == x.batter) this_run = this_run.where(StratPlay.runner == x.batter)
if "game" in group_by: if "game" in group_by:
@ -709,10 +714,18 @@ async def get_batting_totals(
{ {
"player": x.batter_id "player": x.batter_id
if short_output if short_output
else model_to_dict(x.batter, recurse=True, max_depth=1), else (
model_to_dict(x.batter, recurse=True, max_depth=1)
if x.batter_id
else None
),
"team": x.batter_team_id "team": x.batter_team_id
if short_output if short_output
else model_to_dict(x.batter_team, recurse=True, max_depth=1), else (
model_to_dict(x.batter_team, recurse=True, max_depth=1)
if x.batter_team_id
else None
),
"pa": x.sum_pa, "pa": x.sum_pa,
"ab": x.sum_ab, "ab": x.sum_ab,
"run": x.sum_run, "run": x.sum_run,
@ -942,18 +955,23 @@ async def get_pitching_totals(
.where((StratPlay.game << season_games) & (StratPlay.pitcher.is_null(False))) .where((StratPlay.game << season_games) & (StratPlay.pitcher.is_null(False)))
.having(fn.SUM(StratPlay.pa) >= min_pa) .having(fn.SUM(StratPlay.pa) >= min_pa)
) )
all_dec = Decision.select( all_dec = (
Decision.pitcher, Decision.select(
fn.SUM(Decision.win).alias("sum_win"), Decision.pitcher,
fn.SUM(Decision.loss).alias("sum_loss"), fn.SUM(Decision.win).alias("sum_win"),
fn.SUM(Decision.hold).alias("sum_hold"), fn.SUM(Decision.loss).alias("sum_loss"),
fn.SUM(Decision.is_save).alias("sum_save"), fn.SUM(Decision.hold).alias("sum_hold"),
fn.SUM(Decision.b_save).alias("sum_b_save"), fn.SUM(Decision.is_save).alias("sum_save"),
fn.SUM(Decision.irunners).alias("sum_irunners"), fn.SUM(Decision.b_save).alias("sum_b_save"),
fn.SUM(Decision.irunners_scored).alias("sum_irun_scored"), fn.SUM(Decision.irunners).alias("sum_irunners"),
fn.SUM(Decision.is_start).alias("sum_gs"), fn.SUM(Decision.irunners_scored).alias("sum_irun_scored"),
fn.COUNT(Decision.game).alias("sum_game"), # Cast boolean to integer for PostgreSQL compatibility
).where(Decision.game << season_games) fn.SUM(Case(None, [(Decision.is_start == True, 1)], 0)).alias("sum_gs"),
fn.COUNT(Decision.game).alias("sum_game"),
)
.where(Decision.game << season_games)
.group_by(Decision.pitcher)
)
if player_id is not None: if player_id is not None:
all_players = Player.select().where(Player.player_id << player_id) all_players = Player.select().where(Player.player_id << player_id)
@ -1026,12 +1044,14 @@ async def get_pitching_totals(
pit_plays = pit_plays.order_by(SQL("sum_game").desc()) pit_plays = pit_plays.order_by(SQL("sum_game").desc())
elif sort == "game-asc": elif sort == "game-asc":
pit_plays = pit_plays.order_by(SQL("sum_game").asc()) pit_plays = pit_plays.order_by(SQL("sum_game").asc())
elif sort == "newest": # NOTE: "newest" and "oldest" sort removed for GROUP BY queries
pit_plays = pit_plays.order_by( # These require non-aggregated columns which PostgreSQL doesn't allow
StratPlay.game_id.desc(), StratPlay.play_num.desc() # elif sort == "newest":
) # pit_plays = pit_plays.order_by(
elif sort == "oldest": # StratPlay.game_id.desc(), StratPlay.play_num.desc()
pit_plays = pit_plays.order_by(StratPlay.game_id, StratPlay.play_num) # )
# elif sort == "oldest":
# pit_plays = pit_plays.order_by(StratPlay.game_id, StratPlay.play_num)
if limit < 1: if limit < 1:
limit = 1 limit = 1
@ -1081,10 +1101,12 @@ async def get_pitching_totals(
return_stats["stats"].append( return_stats["stats"].append(
{ {
"player": x.pitcher_id if short_output else model_to_dict(x.pitcher), "player": x.pitcher_id
if short_output
else (model_to_dict(x.pitcher) if x.pitcher_id else None),
"team": x.pitcher_team_id "team": x.pitcher_team_id
if short_output if short_output
else model_to_dict(x.pitcher_team), else (model_to_dict(x.pitcher_team) if x.pitcher_team_id else None),
"tbf": x.sum_pa, "tbf": x.sum_pa,
"outs": x.sum_outs, "outs": x.sum_outs,
"games": this_dec[0].sum_game, "games": this_dec[0].sum_game,

View File

@ -0,0 +1,327 @@
# Paper Dynasty PostgreSQL Migration Guide
## Overview
This document captures lessons learned from test migrations and provides a step-by-step guide for production deployment.
**Migration Branch:** `postgres-migration`
**Target:** PostgreSQL 17 on `sba_postgres` container
**Source:** SQLite `storage/pd_master.db`
---
## Code Changes Required
### 1. BigIntegerField for Discord IDs
Discord snowflake IDs exceed PostgreSQL INTEGER max (2.1 billion). Changed to BIGINT:
```python
# app/db_engine.py
# Line 179 - Current.live_scoreboard
live_scoreboard = BigIntegerField() # Discord channel ID
# Line 368 - Team.gmid
gmid = BigIntegerField() # Discord user ID
```
### 2. Nullable DateTimeField for Optional Timestamps
```python
# Line 715 - GauntletRun.ended
ended = DateTimeField(null=True) # NULL means run not yet ended
```
### 3. Migration Script Type Conversions
The migration script (`scripts/migrate_to_postgres.py`) handles:
**Boolean Columns** (SQLite 0/1 → PostgreSQL True/False):
- event.active
- cardset.for_purchase, in_packs, ranked_legal
- team.has_guide
- packtype.available
- result.ranked, short_game
- stratgame.ranked, short_game, forfeit
- stratplay.is_go_ahead, is_tied, is_new_inning
- decision.is_start
- battingcard.steal_auto
- notification.ack
**DateTime Columns** (Unix ms → PostgreSQL timestamp):
- pack.open_time
- battingstat.created
- pitchingstat.created
- result.created
- stratgame.created, ended
- notification.created
- gauntletrun.created, ended
- paperdex.created
- reward.created
- award.created
**Reserved Words** (quoted in SQL):
- notification.desc → "desc"
---
## Pre-Migration Data Cleanup
### Required Cleanup Steps
Run these SQL commands on SQLite before migration to maximize data integrity:
```sql
-- 1. Fix orphaned cards with invalid pack_id (set to NULL)
UPDATE card SET pack_id = NULL
WHERE pack_id IS NOT NULL
AND pack_id NOT IN (SELECT id FROM pack);
-- 2. Delete packs from deleted teams
DELETE FROM pack
WHERE team_id NOT IN (SELECT id FROM team);
-- 3. Delete cards from deleted teams
DELETE FROM card
WHERE team_id NOT IN (SELECT id FROM team);
-- 4. Delete duplicate stratplay records (keep lowest id)
DELETE FROM stratplay WHERE id NOT IN (
SELECT MIN(id) FROM stratplay GROUP BY game_id, play_num
);
-- 5. Delete roster entries for deleted cards
DELETE FROM roster
WHERE card_1 NOT IN (SELECT id FROM card)
OR card_2 NOT IN (SELECT id FROM card)
-- ... continue for all 26 card positions
;
-- 6. Delete battingstat for deleted cards
DELETE FROM battingstat
WHERE card_id NOT IN (SELECT id FROM card);
-- 7. Delete pitchingstat for deleted cards
DELETE FROM pitchingstat
WHERE card_id NOT IN (SELECT id FROM card);
-- 8. Delete stratplay for deleted games
DELETE FROM stratplay
WHERE game_id NOT IN (SELECT id FROM stratgame);
-- 9. Delete decision for deleted games
DELETE FROM decision
WHERE game_id NOT IN (SELECT id FROM stratgame);
```
### Orphan Analysis Queries
Before cleanup, run these to understand scope:
```sql
-- Count orphaned packs
SELECT COUNT(*) FROM pack WHERE team_id NOT IN (SELECT id FROM team);
-- Count orphaned cards (team)
SELECT COUNT(*) FROM card WHERE team_id NOT IN (SELECT id FROM team);
-- Count orphaned cards (pack)
SELECT COUNT(*) FROM card WHERE pack_id IS NOT NULL AND pack_id NOT IN (SELECT id FROM pack);
-- Count duplicate stratplays
SELECT COUNT(*) FROM (
SELECT game_id, play_num FROM stratplay
GROUP BY game_id, play_num HAVING COUNT(*) > 1
);
-- Count orphaned battingstats
SELECT COUNT(*) FROM battingstat WHERE card_id NOT IN (SELECT id FROM card);
-- Count orphaned pitchingstats
SELECT COUNT(*) FROM pitchingstat WHERE card_id NOT IN (SELECT id FROM card);
```
---
## Migration Execution Steps
### Phase 1: Build Docker Image (Local)
```bash
cd /mnt/NV2/Development/paper-dynasty/database
git checkout postgres-migration
docker build -t manticorum67/paper-dynasty-database:postgres-migration .
docker push manticorum67/paper-dynasty-database:postgres-migration
```
### Phase 2: Create PostgreSQL Database (Dev Server)
```bash
ssh sba-db
# Create user and database
docker exec sba_postgres psql -U sba_admin -d postgres -c \
"CREATE USER pd_admin WITH PASSWORD 'YOUR_PASSWORD';"
docker exec sba_postgres psql -U sba_admin -d postgres -c \
"CREATE DATABASE pd_master OWNER pd_admin;"
docker exec sba_postgres psql -U sba_admin -d postgres -c \
"GRANT ALL PRIVILEGES ON DATABASE pd_master TO pd_admin;"
```
### Phase 3: Create Schema
```bash
docker pull manticorum67/paper-dynasty-database:postgres-migration
docker run --rm \
--network dev-sba-database_default \
-v /home/cal/container-data/dev-sba-database/logs:/usr/src/app/logs \
-e DATABASE_TYPE=postgresql \
-e POSTGRES_HOST=sba_postgres \
-e POSTGRES_DB=pd_master \
-e POSTGRES_USER=pd_admin \
-e POSTGRES_PASSWORD='YOUR_PASSWORD' \
-e POSTGRES_PORT=5432 \
manticorum67/paper-dynasty-database:postgres-migration \
python -c "
from app.db_engine import db, Current, Rarity, Event, Cardset, MlbPlayer, Player, Team, PackType, Pack, Card, Roster, Result, BattingStat, PitchingStat, Award, Paperdex, Reward, GameRewards, Notification, GauntletReward, GauntletRun, BattingCard, BattingCardRatings, PitchingCard, PitchingCardRatings, CardPosition, StratGame, StratPlay, Decision
db.create_tables([Current, Rarity, Event, Cardset, MlbPlayer, Player, Team, PackType, Pack, Card, Roster, Result, BattingStat, PitchingStat, Award, Paperdex, Reward, GameRewards, Notification, GauntletReward, GauntletRun, BattingCard, BattingCardRatings, PitchingCard, PitchingCardRatings, CardPosition, StratGame, StratPlay, Decision])
db.close()
"
```
### Phase 4: Run Migration
```bash
docker run --rm \
--network dev-sba-database_default \
-v /path/to/storage:/usr/src/app/storage \
-v /path/to/logs:/usr/src/app/logs \
-v /path/to/scripts:/usr/src/app/scripts \
-e DATABASE_TYPE=postgresql \
-e POSTGRES_HOST=sba_postgres \
-e POSTGRES_DB=pd_master \
-e POSTGRES_USER=pd_admin \
-e POSTGRES_PASSWORD='YOUR_PASSWORD' \
-e POSTGRES_PORT=5432 \
manticorum67/paper-dynasty-database:postgres-migration \
python scripts/migrate_to_postgres.py --sqlite-path storage/pd_master.db
```
### Phase 5: Verify Migration
```sql
-- Check table counts
SELECT 'player' as tbl, COUNT(*) FROM player
UNION ALL SELECT 'team', COUNT(*) FROM team
UNION ALL SELECT 'card', COUNT(*) FROM card
UNION ALL SELECT 'stratplay', COUNT(*) FROM stratplay
ORDER BY tbl;
```
### Phase 6: Start API with PostgreSQL
```bash
docker run -d \
--name pd_postgres_api \
--network dev-sba-database_default \
-p 8100:80 \
-e DATABASE_TYPE=postgresql \
-e POSTGRES_HOST=sba_postgres \
-e POSTGRES_DB=pd_master \
-e POSTGRES_USER=pd_admin \
-e POSTGRES_PASSWORD='YOUR_PASSWORD' \
-e POSTGRES_PORT=5432 \
-e API_TOKEN=$API_TOKEN \
manticorum67/paper-dynasty-database:postgres-migration
```
---
## Test Results Summary
### Test Migration #3 (Fresh Production Copy + Minimal Cleanup)
| Metric | Value |
|--------|-------|
| Tables Successful | 23/29 (79%) |
| Records Inserted | 562,489 |
| Records Skipped | 218,387 |
| Duration | 25 minutes |
**Pre-migration cleanup performed:**
- Deleted 1,953 orphaned battingstats (invalid card_id)
- Deleted 437 orphaned pitchingstats (invalid card_id)
**Records skipped during migration (expected):**
- These are historical records from deleted teams/cards from previous seasons
- FK integrity is enforced in PostgreSQL, so orphaned records are correctly rejected
- The API functions correctly with the migrated data
### Working Endpoints (All Tested)
- /api/v2/current
- /api/v2/teams
- /api/v2/players
- /api/v2/cards
- /api/v2/cardsets
- /api/v2/games
- /api/v2/decisions
- /api/v2/plays/batting (with group_by=player)
- /api/v2/plays/pitching (with group_by=player)
### Fixed Issues (This Session)
1. **NULL handling for GROUP BY foreign keys** (`app/routers_v2/stratplays.py`)
- Changed `if x.batter``if x.batter_id` (line ~718)
- Changed `if x.batter_team``if x.batter_team_id` (line ~725)
- Changed `if x.pitcher``if x.pitcher_id` (line ~1100)
- Changed `if x.pitcher_team``if x.pitcher_team_id` (line ~1103)
- **Why:** Checking `x.foreign_key` triggers FK lookup which fails if the referenced row doesn't exist. Checking `x.foreign_key_id` only checks the raw integer value.
2. **Boolean SUM for PostgreSQL** (`app/routers_v2/stratplays.py`)
- Changed `fn.SUM(Decision.is_start)``fn.SUM(Case(None, [(Decision.is_start == True, 1)], 0))`
- **Why:** PostgreSQL cannot sum booleans directly; must cast to integer first.
3. **Missing GROUP BY on Decision query** (`app/routers_v2/stratplays.py`)
- Added `.group_by(Decision.pitcher)` to the `all_dec` query
- **Why:** PostgreSQL requires explicit GROUP BY for all non-aggregate columns (SQLite was lenient).
4. **Import Case from db_engine** (`app/routers_v2/stratplays.py`)
- Added `Case` to imports for PostgreSQL-compatible boolean aggregation
---
## Rollback Plan
To switch back to SQLite:
```bash
# Stop PostgreSQL container
docker stop pd_postgres_api
# Start with SQLite
docker run -d \
--name pd_sqlite_api \
-p 8100:80 \
-v /path/to/storage:/usr/src/app/storage \
-e DATABASE_TYPE=sqlite \
-e API_TOKEN=$API_TOKEN \
manticorum67/paper-dynasty-database:latest
```
---
## Production Deployment Checklist
- [ ] Backup production SQLite
- [ ] Run orphan analysis on production data
- [ ] Execute cleanup SQL
- [ ] Verify cleanup counts
- [ ] Schedule maintenance window
- [ ] Run migration
- [ ] Verify record counts
- [ ] Test all endpoints
- [ ] Update docker-compose.yml with PostgreSQL config
- [ ] Monitor for 24 hours
- [ ] Remove SQLite dependency (optional)

View File

@ -68,8 +68,9 @@ MIGRATION_ORDER = [
"result", # -> team (x2) "result", # -> team (x2)
"stratgame", # -> team (x2) "stratgame", # -> team (x2)
# Tier 5: Statistics and game data # Tier 5: Statistics and game data
"battingstat", # -> card, team, result # NOTE: battingstat and pitchingstat are LEGACY tables - excluded from migration
"pitchingstat", # -> card, team, result # "battingstat", # -> card, team, result (LEGACY - not used)
# "pitchingstat", # -> card, team, result (LEGACY - not used)
"stratplay", # -> stratgame, player (many), team (many) "stratplay", # -> stratgame, player (many), team (many)
"decision", # -> stratgame, player, team "decision", # -> stratgame, player, team
# Tier 6: Card detail tables # Tier 6: Card detail tables
@ -86,6 +87,9 @@ MIGRATION_ORDER = [
"gauntletrun", # -> team, event "gauntletrun", # -> team, event
] ]
# Legacy tables to skip during migration (no longer used by the application)
SKIP_TABLES = ["battingstat", "pitchingstat"]
# Tables with explicit primary keys (not auto-increment) # Tables with explicit primary keys (not auto-increment)
EXPLICIT_PK_TABLES = { EXPLICIT_PK_TABLES = {
"player": "player_id", # Uses player_id as explicit PK "player": "player_id", # Uses player_id as explicit PK
@ -93,6 +97,90 @@ EXPLICIT_PK_TABLES = {
# All other tables use 'id' as auto-increment PK # All other tables use 'id' as auto-increment PK
# Columns that need type conversion from SQLite to PostgreSQL
# Boolean columns: SQLite stores as 0/1, PostgreSQL needs True/False
BOOLEAN_COLUMNS = {
"event": ["active"],
"cardset": ["for_purchase", "in_packs", "ranked_legal"],
"team": ["has_guide"],
"packtype": ["available"],
"result": ["ranked", "short_game"],
"stratgame": ["ranked", "short_game", "forfeit"],
"stratplay": ["is_go_ahead", "is_tied", "is_new_inning"],
"decision": ["is_start"],
"battingcard": ["steal_auto"],
"notification": ["ack"],
}
# DateTime/Timestamp columns: SQLite may store as Unix ms, PostgreSQL needs datetime
# Format: table -> {column: "ms" | "sec" | "iso"} to indicate conversion type
DATETIME_COLUMNS = {
"pack": {"open_time": "ms"}, # Unix timestamp in milliseconds
"battingstat": {"created": "ms"},
"pitchingstat": {"created": "ms"},
"result": {"created": "ms"},
"stratgame": {"created": "ms", "ended": "ms"},
"notification": {"created": "ms"},
"gauntletrun": {"created": "ms", "ended": "ms"},
"paperdex": {"created": "ms"},
"reward": {"created": "ms"},
"award": {"created": "ms"},
}
# Columns that are PostgreSQL reserved words and need quoting
RESERVED_WORD_COLUMNS = {
"notification": ["desc"], # 'desc' is reserved in PostgreSQL
}
def convert_value(table_name: str, column_name: str, value: Any) -> Any:
"""
Convert a SQLite value to PostgreSQL-compatible format.
Handles:
- Boolean: 0/1 -> True/False
- DateTime: Unix timestamps (ms) -> datetime objects
- NULL values: Pass through unchanged
"""
# NULL values pass through unchanged
if value is None:
return None
# Boolean conversion
if table_name in BOOLEAN_COLUMNS and column_name in BOOLEAN_COLUMNS[table_name]:
return bool(value)
# DateTime conversion
if table_name in DATETIME_COLUMNS and column_name in DATETIME_COLUMNS[table_name]:
fmt = DATETIME_COLUMNS[table_name][column_name]
if value == 0:
return None # 0 typically means "not set" for timestamps
try:
if fmt == "ms":
# Unix timestamp in milliseconds
return datetime.fromtimestamp(value / 1000)
elif fmt == "sec":
# Unix timestamp in seconds
return datetime.fromtimestamp(value)
elif fmt == "iso":
# ISO format string
return datetime.fromisoformat(value)
except (ValueError, TypeError, OSError) as e:
logger.warning(f"Could not convert datetime {column_name}={value}: {e}")
return None
return value
def quote_column(table_name: str, column_name: str) -> str:
"""Quote column name if it's a PostgreSQL reserved word."""
if (
table_name in RESERVED_WORD_COLUMNS
and column_name in RESERVED_WORD_COLUMNS[table_name]
):
return f'"{column_name}"'
return column_name
def get_sqlite_connection(db_path: str) -> sqlite3.Connection: def get_sqlite_connection(db_path: str) -> sqlite3.Connection:
"""Connect to SQLite database.""" """Connect to SQLite database."""
@ -207,8 +295,9 @@ def migrate_table(
# Prepare PostgreSQL insert # Prepare PostgreSQL insert
pg_cursor = pg_conn.cursor() pg_cursor = pg_conn.cursor()
# Build column list string # Build column list string with proper quoting for reserved words
columns_str = ", ".join(columns) quoted_columns = [quote_column(table_name, col) for col in columns]
columns_str = ", ".join(quoted_columns)
placeholders = ", ".join(["%s"] * len(columns)) placeholders = ", ".join(["%s"] * len(columns))
# Process in batches # Process in batches
@ -217,8 +306,10 @@ def migrate_table(
batch_values = [] batch_values = []
for row in batch: for row in batch:
# Convert sqlite3.Row to tuple, preserving all values including ID # Convert sqlite3.Row to tuple with type conversions
values = tuple(row[col] for col in columns) values = tuple(
convert_value(table_name, col, row[col]) for col in columns
)
batch_values.append(values) batch_values.append(values)
try: try:
@ -227,10 +318,18 @@ def migrate_table(
execute_values(pg_cursor, insert_sql, batch_values) execute_values(pg_cursor, insert_sql, batch_values)
stats["inserted"] += len(batch) stats["inserted"] += len(batch)
except psycopg2.errors.ForeignKeyViolation as e: except (
# Foreign key error - fall back to individual inserts psycopg2.errors.ForeignKeyViolation,
psycopg2.errors.UniqueViolation,
) as e:
# FK or unique constraint error - fall back to individual inserts
error_type = (
"FK violation"
if "foreign key" in str(e).lower()
else "Unique violation"
)
logger.warning( logger.warning(
f"FK violation in batch, falling back to individual inserts: {e}" f"{error_type} in batch, falling back to individual inserts: {e}"
) )
pg_conn.rollback() pg_conn.rollback()
@ -241,6 +340,7 @@ def migrate_table(
values, values,
) )
stats["inserted"] += 1 stats["inserted"] += 1
pg_conn.commit() # Commit each successful insert
except psycopg2.errors.ForeignKeyViolation as e: except psycopg2.errors.ForeignKeyViolation as e:
stats["skipped"] += 1 stats["skipped"] += 1
# Extract ID for logging # Extract ID for logging
@ -257,6 +357,14 @@ def migrate_table(
f"Skipped orphaned record {table_name}.{pk_column}={record_id}" f"Skipped orphaned record {table_name}.{pk_column}={record_id}"
) )
pg_conn.rollback() pg_conn.rollback()
except psycopg2.errors.UniqueViolation as e:
stats["skipped"] += 1
pk_idx = columns.index(pk_column) if pk_column in columns else 0
record_id = values[pk_idx]
logger.warning(
f"Skipped duplicate record {table_name}.{pk_column}={record_id}"
)
pg_conn.rollback()
except Exception as e: except Exception as e:
stats["skipped"] += 1 stats["skipped"] += 1
pk_idx = columns.index(pk_column) if pk_column in columns else 0 pk_idx = columns.index(pk_column) if pk_column in columns else 0