From e0c12467b0511eacd41867f2becf8682f2f8bfee Mon Sep 17 00:00:00 2001 From: Cal Corum Date: Fri, 5 Dec 2025 16:14:00 -0600 Subject: [PATCH] CLAUDE: Improve UX with single-click OAuth, enhanced games list, and layout fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Frontend UX improvements: - Single-click Discord OAuth from home page (no intermediate /auth page) - Auto-redirect authenticated users from home to /games - Fixed Nuxt layout system - app.vue now wraps NuxtPage with NuxtLayout - Games page now has proper card container with shadow/border styling - Layout header includes working logout with API cookie clearing Games list enhancements: - Display team names (lname) instead of just team IDs - Show current score for each team - Show inning indicator (Top/Bot X) for active games - Responsive header with wrapped buttons on mobile Backend improvements: - Added team caching to SbaApiClient (1-hour TTL) - Enhanced GameListItem with team names, scores, inning data - Games endpoint now enriches response with SBA API team data Docker optimizations: - Optimized Dockerfile using --chown flag on COPY (faster than chown -R) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .claude/plans/001-websocket-authorization.md | 246 ++++++ .claude/plans/002-websocket-locking.md | 266 ++++++ .claude/plans/003-idle-game-eviction.md | 338 +++++++ .claude/plans/004-alembic-migrations.md | 390 ++++++++ .claude/plans/005-exception-handling.md | 326 +++++++ .claude/plans/006-rate-limiting.md | 451 ++++++++++ .claude/plans/007-session-expiration.md | 416 +++++++++ .claude/plans/008-websocket-tests.md | 677 ++++++++++++++ .claude/plans/009-integration-test-fix.md | 478 ++++++++++ .claude/plans/010-shared-components.md | 451 ++++++++++ .claude/plans/011-database-indexes.md | 287 ++++++ .../plans/012-connection-pool-monitoring.md | 442 ++++++++++ .claude/plans/MASTER_TRACKER.md | 380 ++++++++ CLAUDE.md | 109 ++- WEBSOCKET_ARCHITECTURE_ANALYSIS.md | 829 ++++++++++++++++++ backend/.env.dev | 24 + backend/.env.prod | 24 + backend/.python-version | 1 - backend/app/api/routes/games.py | 59 +- backend/app/core/game_engine.py | 26 +- backend/app/services/sba_api_client.py | 57 ++ docker-compose.dev.yml | 26 + docker-compose.prod.yml | 14 +- docker-compose.yml | 92 +- frontend-pd/Dockerfile | 2 +- frontend-pd/plugins/socket.client.ts | 2 +- .../.claude/NUXT4_BREAKING_CHANGES.md | 37 + frontend-sba/.dockerignore | 5 +- frontend-sba/.gitignore | 4 + frontend-sba/CLAUDE.md | 6 + frontend-sba/Dockerfile | 21 +- frontend-sba/app.vue | 4 +- frontend-sba/app/app.vue | 6 - .../components/Game/CurrentSituation.vue | 14 +- frontend-sba/composables/useApiUrl.ts | 16 + frontend-sba/composables/useWebSocket.ts | 7 +- frontend-sba/layouts/default.vue | 15 +- frontend-sba/nuxt.config.ts | 2 + frontend-sba/pages/auth/callback.vue | 8 +- frontend-sba/pages/games/[id].vue | 22 +- frontend-sba/pages/games/index.vue | 126 ++- frontend-sba/pages/index.vue | 33 +- .../plugins/socket.client.ts.disabled | 49 ++ frontend-sba/store/game.ts | 9 + scripts/env-switch.sh | 72 ++ start.sh | 255 ++++++ 46 files changed, 6913 insertions(+), 211 deletions(-) create mode 100644 .claude/plans/001-websocket-authorization.md create mode 100644 .claude/plans/002-websocket-locking.md create mode 100644 .claude/plans/003-idle-game-eviction.md create mode 100644 .claude/plans/004-alembic-migrations.md create mode 100644 .claude/plans/005-exception-handling.md create mode 100644 .claude/plans/006-rate-limiting.md create mode 100644 .claude/plans/007-session-expiration.md create mode 100644 .claude/plans/008-websocket-tests.md create mode 100644 .claude/plans/009-integration-test-fix.md create mode 100644 .claude/plans/010-shared-components.md create mode 100644 .claude/plans/011-database-indexes.md create mode 100644 .claude/plans/012-connection-pool-monitoring.md create mode 100644 .claude/plans/MASTER_TRACKER.md create mode 100644 WEBSOCKET_ARCHITECTURE_ANALYSIS.md create mode 100644 backend/.env.dev create mode 100644 backend/.env.prod delete mode 100644 backend/.python-version create mode 100644 docker-compose.dev.yml delete mode 100644 frontend-sba/app/app.vue create mode 100644 frontend-sba/composables/useApiUrl.ts create mode 100644 frontend-sba/plugins/socket.client.ts.disabled create mode 100755 scripts/env-switch.sh create mode 100755 start.sh diff --git a/.claude/plans/001-websocket-authorization.md b/.claude/plans/001-websocket-authorization.md new file mode 100644 index 0000000..fa165cf --- /dev/null +++ b/.claude/plans/001-websocket-authorization.md @@ -0,0 +1,246 @@ +# Plan 001: WebSocket Authorization + +**Priority**: CRITICAL +**Effort**: 4-6 hours +**Status**: NOT STARTED +**Risk Level**: HIGH - Security vulnerability + +--- + +## Problem Statement + +WebSocket handlers in `backend/app/websocket/handlers.py` have **11 TODO comments** indicating missing access control. Any connected user can: +- Join any game +- Submit decisions for teams they don't own +- View game states they shouldn't access +- Manipulate games in progress + +## Impact + +- **Security**: Unauthorized game access +- **Data Integrity**: Users can cheat by controlling opponent actions +- **Trust**: Players can't trust game outcomes + +## Files to Modify + +| File | Changes | +|------|---------| +| `backend/app/websocket/handlers.py` | Add authorization checks to all handlers | +| `backend/app/websocket/connection_manager.py` | Track user-game associations | +| `backend/app/models/db_models.py` | May need game participant query | +| `backend/app/database/operations.py` | Add participant validation queries | + +## Implementation Steps + +### Step 1: Create Authorization Utility (30 min) + +Create `backend/app/websocket/auth.py`: + +```python +from uuid import UUID +from app.database.operations import db_ops + +async def get_user_role_in_game(user_id: int, game_id: UUID) -> str | None: + """ + Returns user's role in game: 'home', 'away', 'spectator', or None if not authorized. + """ + game = await db_ops.get_game(game_id) + if not game: + return None + + if game.home_user_id == user_id: + return "home" + elif game.away_user_id == user_id: + return "away" + elif game.allow_spectators: + return "spectator" + return None + +async def require_game_participant(sid: str, game_id: UUID, required_role: str | None = None) -> bool: + """ + Validate user can access game. Emits error and returns False if unauthorized. + """ + user_id = await manager.get_user_id(sid) + role = await get_user_role_in_game(user_id, game_id) + + if role is None: + await sio.emit("error", {"message": "Not authorized for this game"}, to=sid) + return False + + if required_role and role != required_role: + await sio.emit("error", {"message": f"Requires {required_role} role"}, to=sid) + return False + + return True + +async def require_team_control(sid: str, game_id: UUID, team_id: int) -> bool: + """ + Validate user controls specified team. + """ + user_id = await manager.get_user_id(sid) + game = await db_ops.get_game(game_id) + + if team_id == game.home_team_id and game.home_user_id == user_id: + return True + elif team_id == game.away_team_id and game.away_user_id == user_id: + return True + + await sio.emit("error", {"message": "Not authorized for this team"}, to=sid) + return False +``` + +### Step 2: Add User Tracking to ConnectionManager (30 min) + +Update `backend/app/websocket/connection_manager.py`: + +```python +class ConnectionManager: + def __init__(self): + self.active_connections: dict[str, int] = {} # sid -> user_id + self.user_games: dict[int, set[UUID]] = {} # user_id -> game_ids + self.game_rooms: dict[UUID, set[str]] = {} # game_id -> sids + + async def get_user_id(self, sid: str) -> int | None: + return self.active_connections.get(sid) + + async def get_user_games(self, user_id: int) -> set[UUID]: + return self.user_games.get(user_id, set()) +``` + +### Step 3: Update join_game Handler (30 min) + +```python +@sio.event +async def join_game(sid, data): + game_id = UUID(data.get("game_id")) + + # Authorization check + if not await require_game_participant(sid, game_id): + return # Error already emitted + + user_id = await manager.get_user_id(sid) + role = await get_user_role_in_game(user_id, game_id) + + await manager.join_game(sid, game_id, role) + # ... rest of handler +``` + +### Step 4: Update Decision Handlers (1-2 hours) + +Each decision handler needs team ownership validation: + +```python +@sio.event +async def submit_defensive_decision(sid, data): + game_id = UUID(data.get("game_id")) + team_id = data.get("team_id") + + # Authorization: must control this team + if not await require_team_control(sid, game_id, team_id): + return + + # ... rest of handler +``` + +Apply to: +- [ ] `submit_defensive_decision` +- [ ] `submit_offensive_decision` +- [ ] `request_pinch_hitter` +- [ ] `request_defensive_replacement` +- [ ] `request_pitching_change` +- [ ] `roll_dice` +- [ ] `submit_manual_outcome` + +### Step 5: Update Spectator-Only Handlers (30 min) + +```python +@sio.event +async def get_lineup(sid, data): + game_id = UUID(data.get("game_id")) + + # Authorization: any participant (including spectators) + if not await require_game_participant(sid, game_id): + return + + # ... rest of handler +``` + +Apply to: +- [ ] `get_lineup` +- [ ] `get_box_score` + +### Step 6: Add Database Queries (30 min) + +Add to `backend/app/database/operations.py`: + +```python +async def get_game_participants(self, game_id: UUID) -> dict: + """Get home_user_id, away_user_id, allow_spectators for game.""" + async with AsyncSessionLocal() as session: + result = await session.execute( + select(Game.home_user_id, Game.away_user_id, Game.allow_spectators) + .where(Game.id == game_id) + ) + row = result.first() + if row: + return { + "home_user_id": row.home_user_id, + "away_user_id": row.away_user_id, + "allow_spectators": row.allow_spectators + } + return None +``` + +### Step 7: Write Tests (1 hour) + +Create `backend/tests/unit/websocket/test_authorization.py`: + +```python +import pytest +from app.websocket.auth import get_user_role_in_game, require_game_participant + +class TestWebSocketAuthorization: + """Tests for WebSocket authorization utilities.""" + + async def test_home_user_gets_home_role(self): + """Home team owner gets 'home' role.""" + + async def test_away_user_gets_away_role(self): + """Away team owner gets 'away' role.""" + + async def test_spectator_allowed_when_enabled(self): + """Non-participant gets 'spectator' when allowed.""" + + async def test_unauthorized_user_rejected(self): + """Non-participant rejected when spectators disabled.""" + + async def test_require_team_control_validates_ownership(self): + """User can only control their own team.""" +``` + +## Verification Checklist + +- [ ] All 11 TODO comments addressed +- [ ] Home user can only control home team +- [ ] Away user can only control away team +- [ ] Spectators can view but not act +- [ ] Unauthorized users rejected with clear error +- [ ] Unit tests pass +- [ ] Manual test: try joining game as wrong user + +## Rollback Plan + +If issues arise: +1. Revert `handlers.py` changes +2. Keep authorization utility for future use +3. Add rate limiting as temporary mitigation + +## Dependencies + +- None (can be implemented independently) + +## Notes + +- Consider caching user-game associations to reduce DB queries +- May want to add audit logging for authorization failures +- Future: Add game invite system for private games diff --git a/.claude/plans/002-websocket-locking.md b/.claude/plans/002-websocket-locking.md new file mode 100644 index 0000000..25cce50 --- /dev/null +++ b/.claude/plans/002-websocket-locking.md @@ -0,0 +1,266 @@ +# Plan 002: WebSocket Handler Locking + +**Priority**: CRITICAL +**Effort**: 2-3 hours +**Status**: NOT STARTED +**Risk Level**: HIGH - Data corruption + +--- + +## Problem Statement + +WebSocket handlers mutate `GameState` without acquiring per-game locks, allowing concurrent handlers to corrupt game state. + +**Example Race Condition**: +``` +Time Client A (roll_dice) Client B (roll_dice) +---- -------------------- -------------------- +T1 Read pending_roll = None +T2 Read pending_roll = None +T3 Set pending_roll = RollA +T4 Set pending_roll = RollB (overwrites!) +T5 Process with RollA +T6 Process with RollB (uses wrong roll!) +``` + +## Impact + +- **Data Corruption**: Invalid game states +- **Invalid Outcomes**: Wrong play resolutions +- **User Trust**: Inconsistent game behavior + +## Files to Modify + +| File | Changes | +|------|---------| +| `backend/app/websocket/handlers.py` | Add lock acquisition to handlers | +| `backend/app/core/state_manager.py` | Expose lock acquisition method | + +## Current Lock Implementation + +The `StateManager` already has per-game locks: + +```python +# backend/app/core/state_manager.py +class StateManager: + def __init__(self): + self._game_locks: dict[UUID, asyncio.Lock] = {} + + def _get_game_lock(self, game_id: UUID) -> asyncio.Lock: + if game_id not in self._game_locks: + self._game_locks[game_id] = asyncio.Lock() + return self._game_locks[game_id] +``` + +**Problem**: Locks exist but handlers don't use them! + +## Implementation Steps + +### Step 1: Expose Lock Context Manager (15 min) + +Update `backend/app/core/state_manager.py`: + +```python +from contextlib import asynccontextmanager + +class StateManager: + # ... existing code ... + + @asynccontextmanager + async def game_lock(self, game_id: UUID): + """ + Acquire exclusive lock for game operations. + Use this for any handler that modifies game state. + + Usage: + async with state_manager.game_lock(game_id): + # Perform state modifications + """ + lock = self._get_game_lock(game_id) + async with lock: + yield +``` + +### Step 2: Identify Handlers Requiring Locks (15 min) + +Handlers that modify state (MUST lock): +- [x] `submit_defensive_decision` - modifies pending decisions +- [x] `submit_offensive_decision` - modifies pending decisions +- [x] `roll_dice` - modifies pending_manual_roll +- [x] `submit_manual_outcome` - modifies game state +- [x] `request_pinch_hitter` - modifies lineup +- [x] `request_defensive_replacement` - modifies lineup +- [x] `request_pitching_change` - modifies lineup + +Handlers that only read (no lock needed): +- [ ] `join_game` - read only +- [ ] `leave_game` - read only +- [ ] `get_lineup` - read only +- [ ] `get_box_score` - read only + +### Step 3: Update Decision Handlers (30 min) + +```python +@sio.event +async def submit_defensive_decision(sid, data): + game_id = UUID(data.get("game_id")) + + async with state_manager.game_lock(game_id): + # All state modifications inside lock + game_state = state_manager.get_game_state(game_id) + if not game_state: + await sio.emit("error", {"message": "Game not found"}, to=sid) + return + + # Process decision + decision = DefensiveDecision(**data.get("decision")) + game_state.pending_defensive_decision = decision + + # Check if both decisions received + if game_state.pending_offensive_decision: + # Resolve play within lock + result = await game_engine.resolve_play(game_state) + await manager.broadcast_to_game(game_id, "play_resolved", result) +``` + +### Step 4: Update Roll/Outcome Handlers (30 min) + +```python +@sio.event +async def roll_dice(sid, data): + game_id = UUID(data.get("game_id")) + roll_type = data.get("roll_type") + + async with state_manager.game_lock(game_id): + game_state = state_manager.get_game_state(game_id) + + # Validate no pending roll + if game_state.pending_manual_roll: + await sio.emit("error", {"message": "Roll already pending"}, to=sid) + return + + # Perform roll and set pending + roll = dice.roll(roll_type, game_state.league_id) + game_state.pending_manual_roll = roll + + await manager.broadcast_to_game(game_id, "dice_rolled", roll.to_dict()) +``` + +### Step 5: Update Substitution Handlers (30 min) + +```python +@sio.event +async def request_pinch_hitter(sid, data): + game_id = UUID(data.get("game_id")) + + async with state_manager.game_lock(game_id): + game_state = state_manager.get_game_state(game_id) + + # Validate and process substitution + result = await substitution_manager.process_pinch_hitter( + game_state, + entering_player_id=data.get("entering_player_id"), + exiting_player_id=data.get("exiting_player_id") + ) + + if result.success: + await manager.broadcast_to_game(game_id, "player_substituted", result.to_dict()) + else: + await sio.emit("error", {"message": result.error}, to=sid) +``` + +### Step 6: Add Lock Timeout (15 min) + +Prevent deadlocks with timeout: + +```python +@asynccontextmanager +async def game_lock(self, game_id: UUID, timeout: float = 30.0): + """Acquire game lock with timeout.""" + lock = self._get_game_lock(game_id) + try: + await asyncio.wait_for(lock.acquire(), timeout=timeout) + yield + except asyncio.TimeoutError: + logger.error(f"Lock timeout for game {game_id}") + raise RuntimeError(f"Could not acquire lock for game {game_id}") + finally: + if lock.locked(): + lock.release() +``` + +### Step 7: Write Concurrency Tests (30 min) + +Create `backend/tests/unit/websocket/test_handler_locking.py`: + +```python +import pytest +import asyncio +from unittest.mock import AsyncMock, patch + +class TestHandlerLocking: + """Tests for WebSocket handler concurrency.""" + + @pytest.mark.asyncio + async def test_concurrent_rolls_serialized(self): + """Two concurrent roll_dice calls should be serialized.""" + results = [] + + async def mock_roll_dice(sid, data): + async with state_manager.game_lock(game_id): + results.append(f"start_{sid}") + await asyncio.sleep(0.1) # Simulate work + results.append(f"end_{sid}") + + # Launch concurrent handlers + await asyncio.gather( + mock_roll_dice("sid1", {}), + mock_roll_dice("sid2", {}) + ) + + # Should be serialized: start_X, end_X, start_Y, end_Y + assert results[0].startswith("start") + assert results[1].startswith("end") + assert results[0].split("_")[1] == results[1].split("_")[1] + + @pytest.mark.asyncio + async def test_lock_timeout_raises_error(self): + """Lock acquisition should timeout if held too long.""" + + @pytest.mark.asyncio + async def test_different_games_not_blocked(self): + """Locks for different games should not block each other.""" +``` + +## Verification Checklist + +- [ ] All state-modifying handlers use `game_lock` +- [ ] Lock timeout prevents deadlocks +- [ ] Different games can proceed concurrently +- [ ] Same game operations are serialized +- [ ] Concurrency tests pass +- [ ] Manual test: rapid-fire decisions don't corrupt state + +## Performance Considerations + +- Lock contention should be minimal (turn-based game) +- 30-second timeout is generous for any operation +- Per-game locks allow parallel games to proceed + +## Rollback Plan + +If issues arise: +1. Remove lock calls from handlers +2. Add rate limiting as temporary mitigation +3. Investigate specific race condition scenarios + +## Dependencies + +- None (can be implemented independently) +- Recommended: Implement after Plan 001 (Authorization) + +## Notes + +- Consider adding lock metrics for monitoring +- May want to add retry logic for timeout scenarios +- Future: Consider optimistic locking for read-heavy operations diff --git a/.claude/plans/003-idle-game-eviction.md b/.claude/plans/003-idle-game-eviction.md new file mode 100644 index 0000000..7af8307 --- /dev/null +++ b/.claude/plans/003-idle-game-eviction.md @@ -0,0 +1,338 @@ +# Plan 003: Idle Game Eviction + +**Priority**: CRITICAL +**Effort**: 1-2 hours +**Status**: NOT STARTED +**Risk Level**: HIGH - Memory leak / OOM + +--- + +## Problem Statement + +The `StateManager` tracks `_last_access[game_id]` for each game but **never uses it for eviction**. Games remain in memory indefinitely, causing unbounded memory growth. + +```python +# backend/app/core/state_manager.py +self._last_access[game_id] = pendulum.now("UTC") # Tracked but never used! +``` + +## Impact + +- **Memory**: ~50KB per game × 1000 games = 50MB+ after a month +- **Stability**: OOM crash after days/weeks of operation +- **Performance**: Degraded performance as dictionaries grow + +## Files to Modify + +| File | Changes | +|------|---------| +| `backend/app/core/state_manager.py` | Add eviction logic | +| `backend/app/main.py` | Start background eviction task | +| `backend/app/config.py` | Add eviction configuration | + +## Implementation Steps + +### Step 1: Add Configuration (10 min) + +Update `backend/app/config.py`: + +```python +class Settings(BaseSettings): + # ... existing settings ... + + # Game eviction settings + game_idle_timeout_hours: int = 24 # Evict games idle > 24 hours + game_eviction_interval_minutes: int = 60 # Check every hour + game_max_in_memory: int = 500 # Hard limit on in-memory games +``` + +### Step 2: Implement Eviction Logic (30 min) + +Update `backend/app/core/state_manager.py`: + +```python +import pendulum +from app.config import settings + +class StateManager: + # ... existing code ... + + async def evict_idle_games(self) -> list[UUID]: + """ + Remove games that have been idle beyond the timeout threshold. + Returns list of evicted game IDs. + """ + now = pendulum.now("UTC") + timeout_seconds = settings.game_idle_timeout_hours * 3600 + evicted = [] + + # Find idle games + for game_id, last_access in list(self._last_access.items()): + idle_seconds = (now - last_access).total_seconds() + if idle_seconds > timeout_seconds: + evicted.append(game_id) + + # Evict them + for game_id in evicted: + await self._evict_game(game_id) + logger.info(f"Evicted idle game {game_id} (idle {idle_seconds/3600:.1f} hours)") + + if evicted: + logger.info(f"Evicted {len(evicted)} idle games. Active: {len(self._games)}") + + return evicted + + async def _evict_game(self, game_id: UUID) -> None: + """ + Remove a single game from memory. + Persists final state to database before removal. + """ + # Persist final state + if game_id in self._games: + game_state = self._games[game_id] + try: + await db_ops.save_game_state(game_id, game_state) + logger.debug(f"Persisted game {game_id} before eviction") + except Exception as e: + logger.error(f"Failed to persist game {game_id}: {e}") + + # Remove from all tracking dictionaries + self._games.pop(game_id, None) + self._lineups.pop(game_id, None) + self._last_access.pop(game_id, None) + self._game_locks.pop(game_id, None) + + async def enforce_memory_limit(self) -> list[UUID]: + """ + Enforce hard limit on in-memory games. + Evicts oldest games if limit exceeded. + """ + if len(self._games) <= settings.game_max_in_memory: + return [] + + # Sort by last access time (oldest first) + sorted_games = sorted( + self._last_access.items(), + key=lambda x: x[1] + ) + + # Evict oldest until under limit + to_evict = len(self._games) - settings.game_max_in_memory + evicted = [] + + for game_id, _ in sorted_games[:to_evict]: + await self._evict_game(game_id) + evicted.append(game_id) + logger.warning(f"Force-evicted game {game_id} (memory limit)") + + return evicted + + def get_memory_stats(self) -> dict: + """Return memory usage statistics.""" + return { + "active_games": len(self._games), + "max_games": settings.game_max_in_memory, + "oldest_game_hours": self._get_oldest_game_age_hours(), + "total_lineups_cached": sum(len(l) for l in self._lineups.values()) + } + + def _get_oldest_game_age_hours(self) -> float: + if not self._last_access: + return 0.0 + oldest = min(self._last_access.values()) + return (pendulum.now("UTC") - oldest).total_seconds() / 3600 +``` + +### Step 3: Create Background Task (30 min) + +Update `backend/app/main.py`: + +```python +import asyncio +from contextlib import asynccontextmanager +from app.core.state_manager import state_manager +from app.config import settings + +# Background task handle +eviction_task: asyncio.Task | None = None + +async def periodic_eviction(): + """Background task to periodically evict idle games.""" + interval = settings.game_eviction_interval_minutes * 60 + + while True: + try: + await asyncio.sleep(interval) + + # Run eviction + evicted = await state_manager.evict_idle_games() + + # Enforce memory limit + force_evicted = await state_manager.enforce_memory_limit() + + # Log stats + stats = state_manager.get_memory_stats() + logger.info(f"Memory stats: {stats}") + + except asyncio.CancelledError: + logger.info("Eviction task cancelled") + break + except Exception as e: + logger.error(f"Eviction task error: {e}") + # Continue running despite errors + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Application lifespan handler.""" + global eviction_task + + # Startup + logger.info("Starting background eviction task") + eviction_task = asyncio.create_task(periodic_eviction()) + + yield + + # Shutdown + logger.info("Stopping background eviction task") + if eviction_task: + eviction_task.cancel() + try: + await eviction_task + except asyncio.CancelledError: + pass + +app = FastAPI(lifespan=lifespan) +``` + +### Step 4: Add Health Endpoint (15 min) + +Add to `backend/app/api/routes.py`: + +```python +@router.get("/health/memory") +async def memory_health(): + """Return memory usage statistics.""" + stats = state_manager.get_memory_stats() + + # Determine health status + usage_pct = stats["active_games"] / stats["max_games"] * 100 + if usage_pct > 90: + status = "critical" + elif usage_pct > 75: + status = "warning" + else: + status = "healthy" + + return { + "status": status, + "usage_percent": round(usage_pct, 1), + **stats + } +``` + +### Step 5: Write Tests (30 min) + +Create `backend/tests/unit/core/test_game_eviction.py`: + +```python +import pytest +import pendulum +from uuid import uuid4 +from unittest.mock import patch, AsyncMock + +class TestGameEviction: + """Tests for idle game eviction.""" + + @pytest.fixture + def state_manager(self): + from app.core.state_manager import StateManager + return StateManager() + + @pytest.mark.asyncio + async def test_evict_idle_games_removes_old_games(self, state_manager): + """Games idle beyond threshold are evicted.""" + game_id = uuid4() + + # Create game with old timestamp + state_manager._games[game_id] = MockGameState() + state_manager._last_access[game_id] = pendulum.now("UTC").subtract(hours=25) + + with patch.object(state_manager, '_evict_game', new_callable=AsyncMock) as mock: + evicted = await state_manager.evict_idle_games() + + assert game_id in evicted + mock.assert_called_once_with(game_id) + + @pytest.mark.asyncio + async def test_evict_idle_games_keeps_active_games(self, state_manager): + """Recently accessed games are not evicted.""" + game_id = uuid4() + + state_manager._games[game_id] = MockGameState() + state_manager._last_access[game_id] = pendulum.now("UTC").subtract(hours=1) + + evicted = await state_manager.evict_idle_games() + + assert game_id not in evicted + assert game_id in state_manager._games + + @pytest.mark.asyncio + async def test_enforce_memory_limit_evicts_oldest(self, state_manager): + """Oldest games evicted when memory limit exceeded.""" + # Create games at different times + for i in range(10): + game_id = uuid4() + state_manager._games[game_id] = MockGameState() + state_manager._last_access[game_id] = pendulum.now("UTC").subtract(hours=i) + + with patch.object(settings, 'game_max_in_memory', 5): + evicted = await state_manager.enforce_memory_limit() + + assert len(evicted) == 5 + assert len(state_manager._games) == 5 + + @pytest.mark.asyncio + async def test_evict_game_persists_state(self, state_manager): + """Game state is persisted before eviction.""" + game_id = uuid4() + game_state = MockGameState() + state_manager._games[game_id] = game_state + + with patch('app.database.operations.db_ops.save_game_state', new_callable=AsyncMock) as mock: + await state_manager._evict_game(game_id) + + mock.assert_called_once_with(game_id, game_state) +``` + +## Verification Checklist + +- [ ] Idle games are evicted after 24 hours +- [ ] Memory limit is enforced +- [ ] Game state is persisted before eviction +- [ ] Background task runs without errors +- [ ] Health endpoint shows accurate stats +- [ ] Tests pass + +## Monitoring + +After deployment, monitor: +- `/health/memory` endpoint +- Log messages for eviction events +- Memory usage of the process + +## Rollback Plan + +If issues arise: +1. Increase `game_idle_timeout_hours` to reduce evictions +2. Increase `game_max_in_memory` limit +3. Disable eviction task (comment out in lifespan) + +## Dependencies + +- None (can be implemented independently) + +## Notes + +- Consider adding WebSocket notification before eviction +- May want to add "extend session" API for active users +- Future: Add Redis-backed state for horizontal scaling diff --git a/.claude/plans/004-alembic-migrations.md b/.claude/plans/004-alembic-migrations.md new file mode 100644 index 0000000..d38aa8c --- /dev/null +++ b/.claude/plans/004-alembic-migrations.md @@ -0,0 +1,390 @@ +# Plan 004: Initialize Alembic Migrations + +**Priority**: CRITICAL +**Effort**: 2-3 hours +**Status**: NOT STARTED +**Risk Level**: HIGH - Schema evolution blocked + +--- + +## Problem Statement + +The database schema is created via `Base.metadata.create_all()` with no migration history. Only one migration file exists (`004_create_stat_materialized_views.py`), and it's for materialized views only. + +**Current State**: +- No version control of schema changes +- Cannot rollback to previous schema versions +- No documentation of schema evolution +- Production schema sync is risky + +## Impact + +- **Operations**: Cannot safely evolve schema +- **Rollback**: No way to revert schema changes +- **Audit**: No history of what changed when +- **Team**: Other developers can't sync schema + +## Files to Modify/Create + +| File | Action | +|------|--------| +| `backend/alembic/` | Initialize properly | +| `backend/alembic/env.py` | Configure for async SQLAlchemy | +| `backend/alembic/versions/001_initial_schema.py` | Create initial migration | +| `backend/app/database/session.py` | Remove `create_all()` call | + +## Implementation Steps + +### Step 1: Backup Current Schema (15 min) + +```bash +# Export current schema +cd /mnt/NV2/Development/strat-gameplay-webapp/backend + +# Dump schema from database +pg_dump --schema-only -d strat_gameplay > schema_backup.sql + +# Also save current models +cp app/models/db_models.py db_models_backup.py +``` + +### Step 2: Configure Alembic for Async (30 min) + +Update `backend/alembic/env.py`: + +```python +import asyncio +from logging.config import fileConfig + +from sqlalchemy import pool +from sqlalchemy.engine import Connection +from sqlalchemy.ext.asyncio import async_engine_from_config + +from alembic import context + +from app.models.db_models import Base +from app.config import settings + +# Alembic Config object +config = context.config + +# Set database URL from settings +config.set_main_option("sqlalchemy.url", settings.database_url.replace("+asyncpg", "")) + +# Interpret the config file for Python logging +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# Model metadata for autogenerate +target_metadata = Base.metadata + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode.""" + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def do_run_migrations(connection: Connection) -> None: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +async def run_async_migrations() -> None: + """Run migrations in 'online' mode with async engine.""" + connectable = async_engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + async with connectable.connect() as connection: + await connection.run_sync(do_run_migrations) + + await connectable.dispose() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + asyncio.run(run_async_migrations()) + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() +``` + +### Step 3: Create Initial Migration (30 min) + +```bash +# Generate initial migration from existing models +cd /mnt/NV2/Development/strat-gameplay-webapp/backend +alembic revision --autogenerate -m "Initial schema from models" +``` + +Review the generated migration and ensure it matches existing schema. + +Create/update `backend/alembic/versions/001_initial_schema.py`: + +```python +"""Initial schema from models + +Revision ID: 001 +Revises: +Create Date: 2025-01-27 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers +revision: str = '001' +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### Game table ### + op.create_table('games', + sa.Column('id', postgresql.UUID(as_uuid=True), nullable=False), + sa.Column('league_id', sa.String(10), nullable=False), + sa.Column('home_team_id', sa.Integer(), nullable=False), + sa.Column('away_team_id', sa.Integer(), nullable=False), + sa.Column('home_user_id', sa.Integer(), nullable=True), + sa.Column('away_user_id', sa.Integer(), nullable=True), + sa.Column('current_inning', sa.Integer(), server_default='1'), + sa.Column('current_half', sa.String(10), server_default='top'), + sa.Column('home_score', sa.Integer(), server_default='0'), + sa.Column('away_score', sa.Integer(), server_default='0'), + sa.Column('status', sa.String(20), server_default='pending'), + sa.Column('allow_spectators', sa.Boolean(), server_default='true'), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()')), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()')), + sa.PrimaryKeyConstraint('id') + ) + op.create_index('idx_game_status', 'games', ['status']) + op.create_index('idx_game_league', 'games', ['league_id']) + + # ### Lineup table ### + op.create_table('lineups', + sa.Column('id', sa.Integer(), autoincrement=True, nullable=False), + sa.Column('game_id', postgresql.UUID(as_uuid=True), nullable=False), + sa.Column('team_id', sa.Integer(), nullable=False), + sa.Column('card_id', sa.Integer(), nullable=True), + sa.Column('player_id', sa.Integer(), nullable=True), + sa.Column('position', sa.String(5), nullable=False), + sa.Column('batting_order', sa.Integer(), nullable=True), + sa.Column('is_active', sa.Boolean(), server_default='true'), + sa.Column('entered_game_at', sa.Integer(), nullable=True), + sa.Column('exited_game_at', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['game_id'], ['games.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('id') + ) + op.create_index('idx_lineup_game', 'lineups', ['game_id']) + op.create_index('idx_lineup_game_team', 'lineups', ['game_id', 'team_id']) + + # ### Play table ### + op.create_table('plays', + sa.Column('id', sa.Integer(), autoincrement=True, nullable=False), + sa.Column('game_id', postgresql.UUID(as_uuid=True), nullable=False), + sa.Column('play_number', sa.Integer(), nullable=False), + sa.Column('inning', sa.Integer(), nullable=False), + sa.Column('half', sa.String(10), nullable=False), + sa.Column('outs_before', sa.Integer(), nullable=False), + sa.Column('outs_after', sa.Integer(), nullable=False), + sa.Column('batter_id', sa.Integer(), nullable=True), + sa.Column('pitcher_id', sa.Integer(), nullable=True), + sa.Column('catcher_id', sa.Integer(), nullable=True), + sa.Column('outcome', sa.String(50), nullable=False), + sa.Column('description', sa.Text(), nullable=True), + # ... additional columns ... + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()')), + sa.ForeignKeyConstraint(['game_id'], ['games.id'], ondelete='CASCADE'), + sa.ForeignKeyConstraint(['batter_id'], ['lineups.id']), + sa.ForeignKeyConstraint(['pitcher_id'], ['lineups.id']), + sa.ForeignKeyConstraint(['catcher_id'], ['lineups.id']), + sa.PrimaryKeyConstraint('id') + ) + op.create_index('idx_play_game', 'plays', ['game_id']) + op.create_index('idx_play_game_number', 'plays', ['game_id', 'play_number']) + + # ### Additional tables (Roll, GameSession, RosterLink, etc.) ### + # ... (similar patterns for remaining tables) + + +def downgrade() -> None: + op.drop_table('plays') + op.drop_table('lineups') + op.drop_table('games') + # ... drop remaining tables in reverse order +``` + +### Step 4: Stamp Existing Database (15 min) + +For existing databases, mark as already at initial migration: + +```bash +# Mark existing database as having initial schema +alembic stamp 001 +``` + +### Step 5: Remove create_all() (10 min) + +Update `backend/app/database/session.py`: + +```python +async def init_db() -> None: + """ + Initialize database connection. + + NOTE: Schema creation is now handled by Alembic migrations. + Run `alembic upgrade head` to create/update schema. + """ + # Removed: await conn.run_sync(Base.metadata.create_all) + logger.info("Database connection initialized") +``` + +### Step 6: Update README (15 min) + +Add to `backend/README.md`: + +```markdown +## Database Migrations + +This project uses Alembic for database migrations. + +### Initial Setup + +```bash +# Apply all migrations +alembic upgrade head +``` + +### Creating New Migrations + +```bash +# Auto-generate from model changes +alembic revision --autogenerate -m "Description of changes" + +# Review the generated migration! +# Then apply: +alembic upgrade head +``` + +### Rolling Back + +```bash +# Rollback one migration +alembic downgrade -1 + +# Rollback to specific revision +alembic downgrade 001 +``` + +### Viewing History + +```bash +# Show migration history +alembic history + +# Show current revision +alembic current +``` +``` + +### Step 7: Integrate Existing Materialized Views Migration (15 min) + +Ensure `004_create_stat_materialized_views.py` is properly linked: + +```python +# Update revision to chain properly +revision: str = '004_stat_views' +down_revision: str = '001' # Chain to initial +``` + +### Step 8: Write Migration Tests (30 min) + +Create `backend/tests/integration/test_migrations.py`: + +```python +import pytest +from alembic import command +from alembic.config import Config + +class TestMigrations: + """Tests for Alembic migrations.""" + + @pytest.fixture + def alembic_config(self): + config = Config("alembic.ini") + return config + + def test_upgrade_to_head(self, alembic_config): + """Migrations can be applied cleanly.""" + command.upgrade(alembic_config, "head") + + def test_downgrade_to_base(self, alembic_config): + """Migrations can be rolled back.""" + command.upgrade(alembic_config, "head") + command.downgrade(alembic_config, "base") + + def test_upgrade_downgrade_upgrade(self, alembic_config): + """Full round-trip migration works.""" + command.upgrade(alembic_config, "head") + command.downgrade(alembic_config, "base") + command.upgrade(alembic_config, "head") +``` + +## Verification Checklist + +- [ ] `alembic upgrade head` creates all tables +- [ ] `alembic downgrade base` removes all tables +- [ ] Existing database can be stamped without issues +- [ ] New migrations can be auto-generated +- [ ] Migration tests pass +- [ ] README updated with migration instructions + +## CI/CD Integration + +Add to CI pipeline: + +```yaml +# .github/workflows/test.yml +- name: Run migrations + run: | + cd backend + alembic upgrade head +``` + +## Rollback Plan + +If issues arise: +1. `alembic downgrade -1` to revert last migration +2. Restore from `schema_backup.sql` if needed +3. Re-enable `create_all()` temporarily + +## Dependencies + +- None (can be implemented independently) + +## Notes + +- Always review auto-generated migrations before applying +- Test migrations on staging before production +- Keep migrations small and focused +- Future: Add data migrations for complex changes diff --git a/.claude/plans/005-exception-handling.md b/.claude/plans/005-exception-handling.md new file mode 100644 index 0000000..371985d --- /dev/null +++ b/.claude/plans/005-exception-handling.md @@ -0,0 +1,326 @@ +# Plan 005: Replace Broad Exception Handling + +**Priority**: HIGH +**Effort**: 2-3 hours +**Status**: NOT STARTED +**Risk Level**: MEDIUM - Hides bugs + +--- + +## Problem Statement + +Multiple locations catch bare `Exception` instead of specific types, which: +- Hides bugs by catching `SystemExit`, `KeyboardInterrupt` +- Can't distinguish recoverable vs fatal errors +- Swallows development errors that should fail fast + +**Locations Identified** (6+ instances): +- `backend/app/core/substitution_manager.py` +- `backend/app/core/game_engine.py` +- `backend/app/api/routes.py` +- `backend/app/websocket/handlers.py` + +## Impact + +- **Debugging**: Hard to diagnose root cause of failures +- **Reliability**: Silent failures instead of clear errors +- **Development**: Bugs hidden during development + +## Files to Modify + +| File | Instances | +|------|-----------| +| `backend/app/core/substitution_manager.py` | 2-3 | +| `backend/app/core/game_engine.py` | 1-2 | +| `backend/app/api/routes.py` | 1-2 | +| `backend/app/websocket/handlers.py` | 2-3 | + +## Implementation Steps + +### Step 1: Identify Specific Exceptions (30 min) + +Map each `except Exception` to specific types: + +```python +# Database operations +from sqlalchemy.exc import SQLAlchemyError, IntegrityError, OperationalError + +# Validation +from pydantic import ValidationError + +# Async operations +import asyncio +asyncio.TimeoutError +asyncio.CancelledError + +# HTTP client +import httpx +httpx.HTTPError +httpx.TimeoutException + +# Custom application errors +from app.core.exceptions import ( + GameNotFoundError, + InvalidStateError, + AuthorizationError +) +``` + +### Step 2: Create Custom Exception Classes (20 min) + +Create `backend/app/core/exceptions.py`: + +```python +"""Custom exceptions for the game engine.""" + + +class GameEngineError(Exception): + """Base class for game engine errors.""" + pass + + +class GameNotFoundError(GameEngineError): + """Raised when game doesn't exist.""" + def __init__(self, game_id): + self.game_id = game_id + super().__init__(f"Game not found: {game_id}") + + +class InvalidStateError(GameEngineError): + """Raised when game is in invalid state for operation.""" + def __init__(self, message: str, current_state: str = None): + self.current_state = current_state + super().__init__(message) + + +class SubstitutionError(GameEngineError): + """Raised when substitution is invalid.""" + pass + + +class AuthorizationError(GameEngineError): + """Raised when user lacks permission.""" + pass + + +class ExternalAPIError(GameEngineError): + """Raised when external API call fails.""" + def __init__(self, service: str, message: str): + self.service = service + super().__init__(f"{service} API error: {message}") +``` + +### Step 3: Update substitution_manager.py (30 min) + +**Before**: +```python +except Exception as e: + logger.error(f"Substitution failed: {e}") + return SubstitutionResult(success=False, error=str(e)) +``` + +**After**: +```python +from sqlalchemy.exc import SQLAlchemyError +from pydantic import ValidationError +from app.core.exceptions import SubstitutionError, InvalidStateError + +try: + # ... substitution logic ... +except ValidationError as e: + logger.warning(f"Invalid substitution data: {e}") + return SubstitutionResult(success=False, error="Invalid substitution data") +except SQLAlchemyError as e: + logger.error(f"Database error during substitution: {e}") + return SubstitutionResult(success=False, error="Database error") +except SubstitutionError as e: + logger.info(f"Substitution rejected: {e}") + return SubstitutionResult(success=False, error=str(e)) +except InvalidStateError as e: + logger.warning(f"Invalid game state for substitution: {e}") + return SubstitutionResult(success=False, error=str(e)) +# Let unexpected errors propagate! +``` + +### Step 4: Update game_engine.py (30 min) + +**Before**: +```python +except Exception as e: + logger.error(f"Play resolution failed: {e}") + raise +``` + +**After**: +```python +from sqlalchemy.exc import SQLAlchemyError +from app.core.exceptions import InvalidStateError, GameNotFoundError + +try: + # ... play resolution logic ... +except InvalidStateError as e: + logger.warning(f"Invalid state for play: {e}") + raise +except SQLAlchemyError as e: + logger.error(f"Database error during play resolution: {e}") + raise InvalidStateError(f"Database error: {e}") +except asyncio.TimeoutError: + logger.error("Play resolution timed out") + raise InvalidStateError("Operation timed out") +# Let unexpected errors propagate with full traceback! +``` + +### Step 5: Update WebSocket handlers.py (30 min) + +**Before**: +```python +@sio.event +async def submit_defensive_decision(sid, data): + try: + # ... handler logic ... + except Exception as e: + logger.error(f"Error in submit_defensive_decision: {e}") + await sio.emit("error", {"message": "Internal error"}, to=sid) +``` + +**After**: +```python +from pydantic import ValidationError +from sqlalchemy.exc import SQLAlchemyError +from app.core.exceptions import ( + GameNotFoundError, + InvalidStateError, + AuthorizationError +) + +@sio.event +async def submit_defensive_decision(sid, data): + try: + # ... handler logic ... + except ValidationError as e: + logger.info(f"Invalid decision data from {sid}: {e}") + await sio.emit("error", {"message": "Invalid decision format"}, to=sid) + except GameNotFoundError as e: + logger.warning(f"Game not found: {e.game_id}") + await sio.emit("error", {"message": "Game not found"}, to=sid) + except InvalidStateError as e: + logger.info(f"Invalid state for decision: {e}") + await sio.emit("error", {"message": str(e)}, to=sid) + except AuthorizationError as e: + logger.warning(f"Unauthorized action from {sid}: {e}") + await sio.emit("error", {"message": "Not authorized"}, to=sid) + except SQLAlchemyError as e: + logger.error(f"Database error in handler: {e}") + await sio.emit("error", {"message": "Database error"}, to=sid) + # Let unexpected errors propagate to Socket.io error handler! +``` + +### Step 6: Add Global Error Handler (20 min) + +Update `backend/app/main.py`: + +```python +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse +from app.core.exceptions import GameEngineError + +@app.exception_handler(GameEngineError) +async def game_engine_exception_handler(request: Request, exc: GameEngineError): + """Handle all game engine errors.""" + return JSONResponse( + status_code=400, + content={"error": exc.__class__.__name__, "message": str(exc)} + ) + +# Socket.io error handler +@sio.event +async def error(sid, error): + """Global Socket.io error handler.""" + logger.error(f"Unhandled Socket.io error for {sid}: {error}") +``` + +### Step 7: Write Tests (30 min) + +Create `backend/tests/unit/core/test_exception_handling.py`: + +```python +import pytest +from app.core.exceptions import ( + GameNotFoundError, + InvalidStateError, + SubstitutionError +) + +class TestExceptionHandling: + """Tests for custom exception handling.""" + + def test_game_not_found_includes_id(self): + """GameNotFoundError includes game ID.""" + exc = GameNotFoundError("abc-123") + assert "abc-123" in str(exc) + assert exc.game_id == "abc-123" + + def test_invalid_state_includes_current_state(self): + """InvalidStateError can include current state.""" + exc = InvalidStateError("Cannot roll", current_state="waiting_decision") + assert exc.current_state == "waiting_decision" + + @pytest.mark.asyncio + async def test_handler_returns_specific_error_for_validation(self): + """Handler returns validation-specific error message.""" + # Test that ValidationError is caught and converted + pass + + @pytest.mark.asyncio + async def test_unexpected_errors_propagate(self): + """Unexpected exceptions are not swallowed.""" + # Test that RuntimeError propagates up + pass +``` + +## Verification Checklist + +- [ ] All `except Exception` replaced with specific types +- [ ] Custom exception classes created +- [ ] Error messages are user-friendly (no stack traces) +- [ ] Unexpected errors propagate (not swallowed) +- [ ] Logging includes appropriate level (info/warning/error) +- [ ] Tests verify exception handling + +## Pattern Reference + +```python +# GOOD: Specific exceptions with appropriate actions +try: + await db_ops.save_play(play_data) +except IntegrityError: + logger.warning("Duplicate play detected") + raise InvalidStateError("Play already exists") +except OperationalError: + logger.error("Database connection failed") + raise # Let it propagate for retry logic + +# BAD: Bare exception catch +try: + await db_ops.save_play(play_data) +except Exception as e: + logger.error(f"Error: {e}") + return None # Swallows the error! +``` + +## Rollback Plan + +If issues arise: +1. Revert specific file changes +2. Add temporary broad catch as last resort +3. Log the unexpected exception type for future handling + +## Dependencies + +- None (can be implemented independently) + +## Notes + +- Consider adding exception tracking (Sentry) for production +- May want to add retry logic for transient errors +- Future: Add error codes for client-side handling diff --git a/.claude/plans/006-rate-limiting.md b/.claude/plans/006-rate-limiting.md new file mode 100644 index 0000000..afa854e --- /dev/null +++ b/.claude/plans/006-rate-limiting.md @@ -0,0 +1,451 @@ +# Plan 006: Add Rate Limiting + +**Priority**: HIGH +**Effort**: 2-3 hours +**Status**: NOT STARTED +**Risk Level**: MEDIUM - DoS vulnerability + +--- + +## Problem Statement + +No rate limiting exists on WebSocket events or REST API endpoints. A malicious or buggy client can: +- Spam decision submissions +- Flood dice roll requests +- Overwhelm the server with requests +- Cause denial of service + +## Impact + +- **Availability**: Server can be overwhelmed +- **Fairness**: Spammers can disrupt games +- **Cost**: Excessive resource usage + +## Files to Modify/Create + +| File | Action | +|------|--------| +| `backend/app/middleware/rate_limit.py` | Create rate limiter | +| `backend/app/websocket/handlers.py` | Add rate limit checks | +| `backend/app/api/routes.py` | Add rate limit decorator | +| `backend/app/config.py` | Add rate limit settings | + +## Implementation Steps + +### Step 1: Add Configuration (10 min) + +Update `backend/app/config.py`: + +```python +class Settings(BaseSettings): + # ... existing settings ... + + # Rate limiting + rate_limit_websocket_per_minute: int = 60 # Events per minute per connection + rate_limit_api_per_minute: int = 100 # API calls per minute per user + rate_limit_decision_per_game: int = 10 # Decisions per minute per game + rate_limit_roll_per_game: int = 20 # Rolls per minute per game +``` + +### Step 2: Create Rate Limiter (45 min) + +Create `backend/app/middleware/rate_limit.py`: + +```python +"""Rate limiting utilities for WebSocket and API endpoints.""" + +import asyncio +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Callable +import logging + +from app.config import settings + +logger = logging.getLogger(f"{__name__}.RateLimiter") + + +@dataclass +class RateLimitBucket: + """Token bucket for rate limiting.""" + tokens: int + max_tokens: int + refill_rate: float # tokens per second + last_refill: datetime = field(default_factory=datetime.utcnow) + + def consume(self, tokens: int = 1) -> bool: + """ + Try to consume tokens. Returns True if allowed, False if rate limited. + """ + self._refill() + if self.tokens >= tokens: + self.tokens -= tokens + return True + return False + + def _refill(self): + """Refill tokens based on time elapsed.""" + now = datetime.utcnow() + elapsed = (now - self.last_refill).total_seconds() + refill_amount = int(elapsed * self.refill_rate) + + if refill_amount > 0: + self.tokens = min(self.max_tokens, self.tokens + refill_amount) + self.last_refill = now + + +class RateLimiter: + """ + Rate limiter for WebSocket connections and API endpoints. + Uses token bucket algorithm for smooth rate limiting. + """ + + def __init__(self): + # Per-connection buckets + self._connection_buckets: dict[str, RateLimitBucket] = {} + # Per-game buckets (for game-specific limits) + self._game_buckets: dict[str, RateLimitBucket] = {} + # Per-user API buckets + self._user_buckets: dict[int, RateLimitBucket] = {} + # Cleanup task + self._cleanup_task: asyncio.Task | None = None + + def get_connection_bucket(self, sid: str) -> RateLimitBucket: + """Get or create bucket for WebSocket connection.""" + if sid not in self._connection_buckets: + self._connection_buckets[sid] = RateLimitBucket( + tokens=settings.rate_limit_websocket_per_minute, + max_tokens=settings.rate_limit_websocket_per_minute, + refill_rate=settings.rate_limit_websocket_per_minute / 60 + ) + return self._connection_buckets[sid] + + def get_game_bucket(self, game_id: str, action: str) -> RateLimitBucket: + """Get or create bucket for game-specific action.""" + key = f"{game_id}:{action}" + if key not in self._game_buckets: + if action == "decision": + limit = settings.rate_limit_decision_per_game + elif action == "roll": + limit = settings.rate_limit_roll_per_game + else: + limit = 30 # Default + + self._game_buckets[key] = RateLimitBucket( + tokens=limit, + max_tokens=limit, + refill_rate=limit / 60 + ) + return self._game_buckets[key] + + def get_user_bucket(self, user_id: int) -> RateLimitBucket: + """Get or create bucket for API user.""" + if user_id not in self._user_buckets: + self._user_buckets[user_id] = RateLimitBucket( + tokens=settings.rate_limit_api_per_minute, + max_tokens=settings.rate_limit_api_per_minute, + refill_rate=settings.rate_limit_api_per_minute / 60 + ) + return self._user_buckets[user_id] + + async def check_websocket_limit(self, sid: str) -> bool: + """Check if WebSocket event is allowed.""" + bucket = self.get_connection_bucket(sid) + allowed = bucket.consume() + if not allowed: + logger.warning(f"Rate limited WebSocket connection: {sid}") + return allowed + + async def check_game_limit(self, game_id: str, action: str) -> bool: + """Check if game action is allowed.""" + bucket = self.get_game_bucket(game_id, action) + allowed = bucket.consume() + if not allowed: + logger.warning(f"Rate limited game action: {game_id} {action}") + return allowed + + async def check_api_limit(self, user_id: int) -> bool: + """Check if API call is allowed.""" + bucket = self.get_user_bucket(user_id) + allowed = bucket.consume() + if not allowed: + logger.warning(f"Rate limited API user: {user_id}") + return allowed + + def remove_connection(self, sid: str): + """Clean up when connection closes.""" + self._connection_buckets.pop(sid, None) + + async def cleanup_stale_buckets(self): + """Periodically clean up stale buckets.""" + while True: + await asyncio.sleep(300) # Every 5 minutes + now = datetime.utcnow() + stale_threshold = timedelta(minutes=10) + + # Clean connection buckets + stale_connections = [ + sid for sid, bucket in self._connection_buckets.items() + if now - bucket.last_refill > stale_threshold + ] + for sid in stale_connections: + del self._connection_buckets[sid] + + # Clean game buckets + stale_games = [ + key for key, bucket in self._game_buckets.items() + if now - bucket.last_refill > stale_threshold + ] + for key in stale_games: + del self._game_buckets[key] + + logger.debug(f"Cleaned {len(stale_connections)} connection, {len(stale_games)} game buckets") + + +# Global rate limiter instance +rate_limiter = RateLimiter() +``` + +### Step 3: Create Decorator for Handlers (20 min) + +Add to `backend/app/middleware/rate_limit.py`: + +```python +from functools import wraps + +def rate_limited(action: str = "general"): + """ + Decorator for rate-limited WebSocket handlers. + + Usage: + @sio.event + @rate_limited(action="decision") + async def submit_defensive_decision(sid, data): + ... + """ + def decorator(func: Callable): + @wraps(func) + async def wrapper(sid, data, *args, **kwargs): + # Check connection-level limit + if not await rate_limiter.check_websocket_limit(sid): + await sio.emit("error", { + "message": "Rate limited. Please slow down.", + "code": "RATE_LIMITED" + }, to=sid) + return + + # Check game-level limit if game_id in data + game_id = data.get("game_id") if isinstance(data, dict) else None + if game_id and action != "general": + if not await rate_limiter.check_game_limit(str(game_id), action): + await sio.emit("error", { + "message": f"Too many {action} requests for this game.", + "code": "GAME_RATE_LIMITED" + }, to=sid) + return + + return await func(sid, data, *args, **kwargs) + return wrapper + return decorator +``` + +### Step 4: Apply to WebSocket Handlers (30 min) + +Update `backend/app/websocket/handlers.py`: + +```python +from app.middleware.rate_limit import rate_limited, rate_limiter + +@sio.event +async def connect(sid, environ, auth): + # ... existing logic ... + pass + +@sio.event +async def disconnect(sid): + # Clean up rate limiter + rate_limiter.remove_connection(sid) + # ... existing logic ... + +@sio.event +@rate_limited(action="decision") +async def submit_defensive_decision(sid, data): + # ... existing logic (rate limiting handled by decorator) ... + pass + +@sio.event +@rate_limited(action="decision") +async def submit_offensive_decision(sid, data): + # ... existing logic ... + pass + +@sio.event +@rate_limited(action="roll") +async def roll_dice(sid, data): + # ... existing logic ... + pass + +@sio.event +@rate_limited(action="substitution") +async def request_pinch_hitter(sid, data): + # ... existing logic ... + pass + +@sio.event +@rate_limited(action="substitution") +async def request_defensive_replacement(sid, data): + # ... existing logic ... + pass + +@sio.event +@rate_limited(action="substitution") +async def request_pitching_change(sid, data): + # ... existing logic ... + pass + +# Read-only handlers get general rate limit +@sio.event +@rate_limited() +async def get_lineup(sid, data): + # ... existing logic ... + pass + +@sio.event +@rate_limited() +async def get_box_score(sid, data): + # ... existing logic ... + pass +``` + +### Step 5: Add API Rate Limiting (20 min) + +Update `backend/app/api/routes.py`: + +```python +from fastapi import Depends, HTTPException +from app.middleware.rate_limit import rate_limiter + +async def check_rate_limit(user_id: int = Depends(get_current_user_id)): + """Dependency for API rate limiting.""" + if not await rate_limiter.check_api_limit(user_id): + raise HTTPException( + status_code=429, + detail="Rate limit exceeded. Please try again later." + ) + return user_id + +@router.post("/games", dependencies=[Depends(check_rate_limit)]) +async def create_game(...): + # ... existing logic ... + pass + +@router.get("/games/{game_id}", dependencies=[Depends(check_rate_limit)]) +async def get_game(...): + # ... existing logic ... + pass +``` + +### Step 6: Start Cleanup Task (10 min) + +Update `backend/app/main.py`: + +```python +from app.middleware.rate_limit import rate_limiter + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Start rate limiter cleanup + cleanup_task = asyncio.create_task(rate_limiter.cleanup_stale_buckets()) + + yield + + # Stop cleanup task + cleanup_task.cancel() +``` + +### Step 7: Write Tests (30 min) + +Create `backend/tests/unit/middleware/test_rate_limit.py`: + +```python +import pytest +from app.middleware.rate_limit import RateLimiter, RateLimitBucket + +class TestRateLimiting: + """Tests for rate limiting.""" + + def test_bucket_allows_under_limit(self): + """Bucket allows requests under limit.""" + bucket = RateLimitBucket(tokens=10, max_tokens=10, refill_rate=1) + assert bucket.consume() is True + assert bucket.tokens == 9 + + def test_bucket_denies_over_limit(self): + """Bucket denies requests over limit.""" + bucket = RateLimitBucket(tokens=1, max_tokens=10, refill_rate=0.1) + assert bucket.consume() is True + assert bucket.consume() is False + + def test_bucket_refills_over_time(self): + """Bucket refills tokens over time.""" + bucket = RateLimitBucket(tokens=0, max_tokens=10, refill_rate=100) + # Simulate time passing + bucket.last_refill = bucket.last_refill.replace( + second=bucket.last_refill.second - 1 + ) + bucket._refill() + assert bucket.tokens > 0 + + @pytest.mark.asyncio + async def test_rate_limiter_tracks_connections(self): + """Rate limiter tracks separate connections.""" + limiter = RateLimiter() + # Different connections get different buckets + bucket1 = limiter.get_connection_bucket("sid1") + bucket2 = limiter.get_connection_bucket("sid2") + assert bucket1 is not bucket2 + + @pytest.mark.asyncio + async def test_rate_limiter_cleans_up_on_disconnect(self): + """Rate limiter cleans up on disconnect.""" + limiter = RateLimiter() + limiter.get_connection_bucket("sid1") + assert "sid1" in limiter._connection_buckets + + limiter.remove_connection("sid1") + assert "sid1" not in limiter._connection_buckets +``` + +## Verification Checklist + +- [ ] WebSocket events are rate limited +- [ ] Game-specific limits work (decisions, rolls) +- [ ] API endpoints are rate limited +- [ ] Rate limit errors return clear messages +- [ ] Cleanup removes stale buckets +- [ ] Tests pass + +## Monitoring + +After deployment, monitor: +- Rate limit hit frequency in logs +- Memory usage of rate limiter +- False positive rate (legitimate users blocked) + +## Rollback Plan + +If issues arise: +1. Increase rate limits in config +2. Disable decorator temporarily +3. Remove rate limit checks from handlers + +## Dependencies + +- None (can be implemented independently) + +## Notes + +- Consider Redis-backed rate limiting for horizontal scaling +- May want different limits for authenticated vs anonymous +- Future: Add configurable rate limits per user tier diff --git a/.claude/plans/007-session-expiration.md b/.claude/plans/007-session-expiration.md new file mode 100644 index 0000000..b633251 --- /dev/null +++ b/.claude/plans/007-session-expiration.md @@ -0,0 +1,416 @@ +# Plan 007: Session Expiration + +**Priority**: HIGH +**Effort**: 1-2 hours +**Status**: NOT STARTED +**Risk Level**: MEDIUM - Zombie connections + +--- + +## Problem Statement + +WebSocket sessions persist indefinitely after network failures. There's no: +- Ping timeout configuration for Socket.io +- Session expiration tracking +- Cleanup of zombie connections + +Zombie connections accumulate, causing: +- Memory leaks +- Stale user presence in games +- Inaccurate connection counts + +## Impact + +- **Memory**: Unbounded connection tracking growth +- **UX**: Stale players shown as "connected" +- **Performance**: Broadcasting to dead connections + +## Files to Modify + +| File | Action | +|------|--------| +| `backend/app/main.py` | Configure Socket.io timeouts | +| `backend/app/websocket/connection_manager.py` | Add session expiration | +| `backend/app/websocket/handlers.py` | Handle heartbeat events | + +## Implementation Steps + +### Step 1: Configure Socket.io Timeouts (15 min) + +Update `backend/app/main.py`: + +```python +import socketio + +sio = socketio.AsyncServer( + async_mode="asgi", + cors_allowed_origins="*", + # Timeout configuration + ping_timeout=30, # Wait 30s for pong before disconnect + ping_interval=25, # Send ping every 25s + max_http_buffer_size=1_000_000, # 1MB max message size + logger=True, + engineio_logger=True +) +``` + +**Explanation**: +- `ping_interval=25`: Server sends ping every 25 seconds +- `ping_timeout=30`: Client must respond within 30 seconds +- Total: Connection dies after 55 seconds of no response + +### Step 2: Add Session Tracking (30 min) + +Update `backend/app/websocket/connection_manager.py`: + +```python +import pendulum +from dataclasses import dataclass +from uuid import UUID +import asyncio + +@dataclass +class SessionInfo: + """Tracks WebSocket session metadata.""" + user_id: int | None + connected_at: pendulum.DateTime + last_activity: pendulum.DateTime + games: set[UUID] + ip_address: str | None = None + + +class ConnectionManager: + def __init__(self): + self._sessions: dict[str, SessionInfo] = {} # sid -> SessionInfo + self._user_sessions: dict[int, set[str]] = {} # user_id -> sids + self._game_sessions: dict[UUID, set[str]] = {} # game_id -> sids + self._expiration_task: asyncio.Task | None = None + + async def connect(self, sid: str, user_id: int | None = None, ip_address: str | None = None): + """Register new connection.""" + now = pendulum.now("UTC") + self._sessions[sid] = SessionInfo( + user_id=user_id, + connected_at=now, + last_activity=now, + games=set(), + ip_address=ip_address + ) + + if user_id: + if user_id not in self._user_sessions: + self._user_sessions[user_id] = set() + self._user_sessions[user_id].add(sid) + + logger.info(f"Session connected: {sid} (user={user_id})") + + async def disconnect(self, sid: str): + """Clean up disconnected session.""" + session = self._sessions.pop(sid, None) + if session: + # Remove from user tracking + if session.user_id and session.user_id in self._user_sessions: + self._user_sessions[session.user_id].discard(sid) + if not self._user_sessions[session.user_id]: + del self._user_sessions[session.user_id] + + # Remove from game rooms + for game_id in session.games: + if game_id in self._game_sessions: + self._game_sessions[game_id].discard(sid) + + logger.info(f"Session disconnected: {sid} (was connected {session.connected_at})") + + async def update_activity(self, sid: str): + """Update last activity timestamp for session.""" + if sid in self._sessions: + self._sessions[sid].last_activity = pendulum.now("UTC") + + async def get_session(self, sid: str) -> SessionInfo | None: + """Get session info.""" + return self._sessions.get(sid) + + async def get_user_id(self, sid: str) -> int | None: + """Get user ID for session.""" + session = self._sessions.get(sid) + return session.user_id if session else None + + async def join_game(self, sid: str, game_id: UUID): + """Add session to game room.""" + if sid in self._sessions: + self._sessions[sid].games.add(game_id) + + if game_id not in self._game_sessions: + self._game_sessions[game_id] = set() + self._game_sessions[game_id].add(sid) + + await self.update_activity(sid) + + async def leave_game(self, sid: str, game_id: UUID): + """Remove session from game room.""" + if sid in self._sessions: + self._sessions[sid].games.discard(game_id) + + if game_id in self._game_sessions: + self._game_sessions[game_id].discard(sid) + + async def expire_inactive_sessions(self, timeout_seconds: int = 300): + """ + Expire sessions with no activity for timeout period. + Called periodically by background task. + """ + now = pendulum.now("UTC") + expired = [] + + for sid, session in list(self._sessions.items()): + inactive_seconds = (now - session.last_activity).total_seconds() + if inactive_seconds > timeout_seconds: + expired.append(sid) + logger.warning(f"Expiring inactive session: {sid} (inactive {inactive_seconds}s)") + + for sid in expired: + await self.disconnect(sid) + # Notify Socket.io to close the connection + try: + await sio.disconnect(sid) + except Exception as e: + logger.debug(f"Error disconnecting expired session {sid}: {e}") + + if expired: + logger.info(f"Expired {len(expired)} inactive sessions") + + return expired + + def get_stats(self) -> dict: + """Return connection statistics.""" + return { + "total_sessions": len(self._sessions), + "unique_users": len(self._user_sessions), + "active_games": len(self._game_sessions), + "sessions_per_game": { + str(gid): len(sids) for gid, sids in self._game_sessions.items() + } + } + +# Global instance +manager = ConnectionManager() +``` + +### Step 3: Start Expiration Background Task (15 min) + +Update `backend/app/main.py`: + +```python +from app.websocket.connection_manager import manager + +async def session_expiration_task(): + """Background task to expire inactive sessions.""" + while True: + try: + await asyncio.sleep(60) # Check every minute + await manager.expire_inactive_sessions(timeout_seconds=300) # 5 min timeout + except asyncio.CancelledError: + break + except Exception as e: + logger.error(f"Session expiration error: {e}") + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Start session expiration task + expiration_task = asyncio.create_task(session_expiration_task()) + + yield + + # Stop task + expiration_task.cancel() + try: + await expiration_task + except asyncio.CancelledError: + pass +``` + +### Step 4: Update Handlers to Track Activity (20 min) + +Update `backend/app/websocket/handlers.py`: + +```python +from app.websocket.connection_manager import manager + +@sio.event +async def connect(sid, environ, auth): + """Handle new connection.""" + # Extract user info from auth + user_id = None + if auth and "token" in auth: + user_id = await extract_user_id_from_token(auth["token"]) + + # Extract IP address + ip_address = environ.get("REMOTE_ADDR") + + await manager.connect(sid, user_id=user_id, ip_address=ip_address) + logger.info(f"Client connected: {sid}") + +@sio.event +async def disconnect(sid): + """Handle disconnection.""" + await manager.disconnect(sid) + logger.info(f"Client disconnected: {sid}") + +# Update activity on any action +@sio.event +async def submit_defensive_decision(sid, data): + await manager.update_activity(sid) + # ... existing logic ... + +@sio.event +async def submit_offensive_decision(sid, data): + await manager.update_activity(sid) + # ... existing logic ... + +@sio.event +async def roll_dice(sid, data): + await manager.update_activity(sid) + # ... existing logic ... + +# Add explicit heartbeat handler (optional, for client-initiated keepalive) +@sio.event +async def heartbeat(sid, data): + """Client-initiated heartbeat to keep session alive.""" + await manager.update_activity(sid) + await sio.emit("heartbeat_ack", {"timestamp": pendulum.now("UTC").isoformat()}, to=sid) +``` + +### Step 5: Add Health Endpoint (10 min) + +Update `backend/app/api/routes.py`: + +```python +from app.websocket.connection_manager import manager + +@router.get("/health/connections") +async def connection_health(): + """Return WebSocket connection statistics.""" + stats = manager.get_stats() + return { + "status": "healthy", + **stats + } +``` + +### Step 6: Write Tests (30 min) + +Create `backend/tests/unit/websocket/test_session_expiration.py`: + +```python +import pytest +import pendulum +from uuid import uuid4 +from app.websocket.connection_manager import ConnectionManager, SessionInfo + +class TestSessionExpiration: + """Tests for session expiration.""" + + @pytest.fixture + def manager(self): + return ConnectionManager() + + @pytest.mark.asyncio + async def test_connect_creates_session(self, manager): + """Connect creates session with correct info.""" + await manager.connect("sid1", user_id=123) + + session = await manager.get_session("sid1") + assert session is not None + assert session.user_id == 123 + + @pytest.mark.asyncio + async def test_disconnect_removes_session(self, manager): + """Disconnect removes session.""" + await manager.connect("sid1", user_id=123) + await manager.disconnect("sid1") + + session = await manager.get_session("sid1") + assert session is None + + @pytest.mark.asyncio + async def test_activity_updates_timestamp(self, manager): + """Activity updates last_activity timestamp.""" + await manager.connect("sid1") + original = manager._sessions["sid1"].last_activity + + await asyncio.sleep(0.01) + await manager.update_activity("sid1") + + updated = manager._sessions["sid1"].last_activity + assert updated > original + + @pytest.mark.asyncio + async def test_expire_removes_inactive_sessions(self, manager): + """Inactive sessions are expired.""" + await manager.connect("sid1") + + # Make session old + manager._sessions["sid1"].last_activity = pendulum.now("UTC").subtract(minutes=10) + + expired = await manager.expire_inactive_sessions(timeout_seconds=300) + + assert "sid1" in expired + assert "sid1" not in manager._sessions + + @pytest.mark.asyncio + async def test_active_sessions_not_expired(self, manager): + """Active sessions are not expired.""" + await manager.connect("sid1") + await manager.update_activity("sid1") + + expired = await manager.expire_inactive_sessions(timeout_seconds=300) + + assert "sid1" not in expired + assert "sid1" in manager._sessions + + @pytest.mark.asyncio + async def test_join_game_tracked(self, manager): + """Joining game updates session and game tracking.""" + await manager.connect("sid1") + game_id = uuid4() + + await manager.join_game("sid1", game_id) + + assert game_id in manager._sessions["sid1"].games + assert "sid1" in manager._game_sessions[game_id] +``` + +## Verification Checklist + +- [ ] Socket.io ping/pong configured +- [ ] Sessions track last activity +- [ ] Inactive sessions are expired (5 min default) +- [ ] Background task runs without errors +- [ ] Health endpoint shows connection stats +- [ ] Tests pass + +## Configuration Options + +| Setting | Default | Description | +|---------|---------|-------------| +| `ping_interval` | 25s | How often to send ping | +| `ping_timeout` | 30s | Max wait for pong | +| Expiration timeout | 300s | Inactivity before expiration | +| Check interval | 60s | How often to check for expired | + +## Rollback Plan + +If issues arise: +1. Increase expiration timeout +2. Disable expiration task +3. Revert Socket.io timeout config + +## Dependencies + +- None (can be implemented independently) + +## Notes + +- Consider sending "about to expire" warning to clients +- May want different timeouts for different game states +- Future: Add reconnection handling with session recovery diff --git a/.claude/plans/008-websocket-tests.md b/.claude/plans/008-websocket-tests.md new file mode 100644 index 0000000..1e40e94 --- /dev/null +++ b/.claude/plans/008-websocket-tests.md @@ -0,0 +1,677 @@ +# Plan 008: WebSocket Handler Tests + +**Priority**: HIGH +**Effort**: 3-4 days +**Status**: NOT STARTED +**Risk Level**: MEDIUM - Integration risk + +--- + +## Problem Statement + +Only 36% of WebSocket handlers have tests (4 of 11). Untested handlers include: +- All 3 substitution handlers (~540 lines) +- `connect` handler (authentication) +- `disconnect` handler (cleanup) +- `join_game` / `leave_game` handlers + +This creates risk for frontend integration as bugs won't be caught. + +## Impact + +- **Integration**: Frontend can't safely integrate +- **Regressions**: Changes may break untested handlers +- **Confidence**: Team can't verify WebSocket behavior + +## Current Test Coverage + +| Handler | Lines | Tests | Coverage | +|---------|-------|-------|----------| +| `connect` | ~50 | 0 | ❌ 0% | +| `disconnect` | ~20 | 0 | ❌ 0% | +| `join_game` | ~30 | 0 | ❌ 0% | +| `leave_game` | ~20 | 0 | ❌ 0% | +| `submit_defensive_decision` | ~100 | 3 | ✅ Partial | +| `submit_offensive_decision` | ~100 | 3 | ✅ Partial | +| `roll_dice` | ~80 | 5 | ✅ Good | +| `submit_manual_outcome` | ~100 | 7 | ✅ Good | +| `request_pinch_hitter` | ~180 | 0 | ❌ 0% | +| `request_defensive_replacement` | ~180 | 0 | ❌ 0% | +| `request_pitching_change` | ~180 | 0 | ❌ 0% | + +**Total**: ~1,040 lines, 18 tests → ~40% coverage + +## Files to Create + +| File | Tests | +|------|-------| +| `tests/unit/websocket/test_connect_handler.py` | 8 | +| `tests/unit/websocket/test_disconnect_handler.py` | 5 | +| `tests/unit/websocket/test_join_leave_handlers.py` | 8 | +| `tests/unit/websocket/test_pinch_hitter_handler.py` | 10 | +| `tests/unit/websocket/test_defensive_replacement_handler.py` | 10 | +| `tests/unit/websocket/test_pitching_change_handler.py` | 10 | + +**Total**: ~51 new tests + +## Implementation Steps + +### Step 1: Create Test Fixtures (1 hour) + +Create `backend/tests/unit/websocket/conftest.py`: + +```python +"""Shared fixtures for WebSocket handler tests.""" + +import pytest +from unittest.mock import AsyncMock, MagicMock, patch +from uuid import uuid4 + +import socketio +from app.models.game_models import GameState, LineupPlayerState +from app.core.state_manager import StateManager + + +@pytest.fixture +def mock_sio(): + """Mock Socket.io server.""" + sio = MagicMock(spec=socketio.AsyncServer) + sio.emit = AsyncMock() + sio.enter_room = AsyncMock() + sio.leave_room = AsyncMock() + sio.disconnect = AsyncMock() + return sio + + +@pytest.fixture +def mock_manager(): + """Mock ConnectionManager.""" + manager = MagicMock() + manager.connect = AsyncMock() + manager.disconnect = AsyncMock() + manager.join_game = AsyncMock() + manager.leave_game = AsyncMock() + manager.broadcast_to_game = AsyncMock() + manager.emit_to_user = AsyncMock() + manager.get_user_id = AsyncMock(return_value=123) + manager.update_activity = AsyncMock() + return manager + + +@pytest.fixture +def mock_state_manager(): + """Mock StateManager.""" + sm = MagicMock(spec=StateManager) + sm.get_game_state = MagicMock() + sm.game_lock = MagicMock() + return sm + + +@pytest.fixture +def sample_game_id(): + """Sample game UUID.""" + return uuid4() + + +@pytest.fixture +def sample_game_state(sample_game_id): + """Sample game state for testing.""" + return GameState( + game_id=sample_game_id, + league_id="sba", + home_team_id=1, + away_team_id=2, + inning=1, + half="top", + outs=0, + home_score=0, + away_score=0, + current_batter=LineupPlayerState( + lineup_id=1, card_id=101, position="CF", batting_order=1 + ), + current_pitcher=LineupPlayerState( + lineup_id=10, card_id=201, position="P", batting_order=None + ), + ) + + +@pytest.fixture +def sample_lineup(): + """Sample lineup data.""" + return [ + LineupPlayerState(lineup_id=i, card_id=100+i, position="P" if i == 0 else "CF", batting_order=i) + for i in range(1, 10) + ] + + +@pytest.fixture +def valid_auth_token(): + """Valid JWT token for testing.""" + return {"token": "valid_jwt_token_here"} + + +@pytest.fixture +def mock_auth(mock_manager): + """Mock authentication utilities.""" + with patch('app.websocket.handlers.extract_user_id_from_token', return_value=123): + with patch('app.websocket.handlers.get_user_role_in_game', return_value="home"): + yield +``` + +### Step 2: Test connect Handler (2 hours) + +Create `backend/tests/unit/websocket/test_connect_handler.py`: + +```python +"""Tests for WebSocket connect handler.""" + +import pytest +from unittest.mock import AsyncMock, patch, MagicMock + +class TestConnectHandler: + """Tests for the connect event handler.""" + + @pytest.mark.asyncio + async def test_connect_with_valid_token_succeeds(self, mock_sio, mock_manager, valid_auth_token): + """Connection with valid JWT token succeeds.""" + with patch('app.websocket.handlers.extract_user_id_from_token', return_value=123): + with patch('app.websocket.handlers.manager', mock_manager): + from app.websocket.handlers import connect + await connect("sid123", {}, valid_auth_token) + + mock_manager.connect.assert_called_once() + # Should not emit error + mock_sio.emit.assert_not_called() + + @pytest.mark.asyncio + async def test_connect_with_invalid_token_fails(self, mock_sio, mock_manager): + """Connection with invalid token is rejected.""" + with patch('app.websocket.handlers.extract_user_id_from_token', return_value=None): + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + from app.websocket.handlers import connect + result = await connect("sid123", {}, {"token": "invalid"}) + + # Should return False to reject connection + assert result is False + + @pytest.mark.asyncio + async def test_connect_without_auth_allowed_for_spectator(self, mock_sio, mock_manager): + """Connection without auth allowed if spectators enabled.""" + with patch('app.websocket.handlers.manager', mock_manager): + from app.websocket.handlers import connect + await connect("sid123", {}, None) + + # Should connect as anonymous + mock_manager.connect.assert_called_once() + + @pytest.mark.asyncio + async def test_connect_extracts_ip_address(self, mock_manager): + """Connection extracts IP from environ.""" + environ = {"REMOTE_ADDR": "192.168.1.1"} + + with patch('app.websocket.handlers.manager', mock_manager): + from app.websocket.handlers import connect + await connect("sid123", environ, None) + + # Verify IP passed to manager + call_args = mock_manager.connect.call_args + assert call_args.kwargs.get("ip_address") == "192.168.1.1" + + @pytest.mark.asyncio + async def test_connect_with_cookie_auth(self, mock_manager): + """Connection can authenticate via cookie.""" + environ = {"HTTP_COOKIE": "auth_token=valid_token"} + + with patch('app.websocket.handlers.extract_user_from_cookie', return_value=456): + with patch('app.websocket.handlers.manager', mock_manager): + from app.websocket.handlers import connect + await connect("sid123", environ, None) + + call_args = mock_manager.connect.call_args + assert call_args.kwargs.get("user_id") == 456 + + @pytest.mark.asyncio + async def test_connect_logs_connection(self, mock_manager, caplog): + """Connection is logged.""" + with patch('app.websocket.handlers.manager', mock_manager): + from app.websocket.handlers import connect + await connect("sid123", {}, None) + + assert "sid123" in caplog.text + + @pytest.mark.asyncio + async def test_connect_handles_token_decode_error(self, mock_sio, mock_manager): + """Connection handles token decode errors gracefully.""" + with patch('app.websocket.handlers.extract_user_id_from_token', side_effect=ValueError("Invalid")): + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + from app.websocket.handlers import connect + result = await connect("sid123", {}, {"token": "malformed"}) + + assert result is False + + @pytest.mark.asyncio + async def test_connect_initializes_rate_limiter(self, mock_manager): + """Connection initializes rate limiter bucket.""" + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.rate_limiter') as mock_limiter: + from app.websocket.handlers import connect + await connect("sid123", {}, None) + + # Rate limiter should track this connection + # (implementation dependent) +``` + +### Step 3: Test disconnect Handler (1 hour) + +Create `backend/tests/unit/websocket/test_disconnect_handler.py`: + +```python +"""Tests for WebSocket disconnect handler.""" + +import pytest +from unittest.mock import AsyncMock, patch + +class TestDisconnectHandler: + """Tests for the disconnect event handler.""" + + @pytest.mark.asyncio + async def test_disconnect_removes_session(self, mock_manager): + """Disconnect removes session from manager.""" + with patch('app.websocket.handlers.manager', mock_manager): + from app.websocket.handlers import disconnect + await disconnect("sid123") + + mock_manager.disconnect.assert_called_once_with("sid123") + + @pytest.mark.asyncio + async def test_disconnect_cleans_rate_limiter(self, mock_manager): + """Disconnect cleans up rate limiter.""" + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.rate_limiter') as mock_limiter: + from app.websocket.handlers import disconnect + await disconnect("sid123") + + mock_limiter.remove_connection.assert_called_once_with("sid123") + + @pytest.mark.asyncio + async def test_disconnect_leaves_all_games(self, mock_manager): + """Disconnect leaves all game rooms.""" + # Setup: user is in games + mock_manager.get_session = AsyncMock(return_value=MagicMock( + games={uuid4(), uuid4()} + )) + + with patch('app.websocket.handlers.manager', mock_manager): + from app.websocket.handlers import disconnect + await disconnect("sid123") + + # Should leave all games + mock_manager.disconnect.assert_called() + + @pytest.mark.asyncio + async def test_disconnect_notifies_game_participants(self, mock_manager, mock_sio): + """Disconnect notifies other players in game.""" + game_id = uuid4() + mock_manager.get_session = AsyncMock(return_value=MagicMock( + user_id=123, + games={game_id} + )) + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + from app.websocket.handlers import disconnect + await disconnect("sid123") + + # Should broadcast player_left to game room + mock_manager.broadcast_to_game.assert_called() + + @pytest.mark.asyncio + async def test_disconnect_logs_event(self, mock_manager, caplog): + """Disconnect is logged.""" + with patch('app.websocket.handlers.manager', mock_manager): + from app.websocket.handlers import disconnect + await disconnect("sid123") + + assert "sid123" in caplog.text or "disconnect" in caplog.text.lower() +``` + +### Step 4: Test join_game / leave_game (2 hours) + +Create `backend/tests/unit/websocket/test_join_leave_handlers.py`: + +```python +"""Tests for join_game and leave_game handlers.""" + +import pytest +from unittest.mock import AsyncMock, patch +from uuid import uuid4 + +class TestJoinGameHandler: + """Tests for the join_game event handler.""" + + @pytest.mark.asyncio + async def test_join_game_authorized_user_succeeds(self, mock_manager, mock_sio, sample_game_id): + """Authorized user can join game.""" + data = {"game_id": str(sample_game_id)} + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + with patch('app.websocket.handlers.require_game_participant', return_value=True): + from app.websocket.handlers import join_game + await join_game("sid123", data) + + mock_manager.join_game.assert_called_once() + mock_sio.enter_room.assert_called() + + @pytest.mark.asyncio + async def test_join_game_unauthorized_user_rejected(self, mock_manager, mock_sio, sample_game_id): + """Unauthorized user cannot join game.""" + data = {"game_id": str(sample_game_id)} + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + with patch('app.websocket.handlers.require_game_participant', return_value=False): + from app.websocket.handlers import join_game + await join_game("sid123", data) + + mock_manager.join_game.assert_not_called() + + @pytest.mark.asyncio + async def test_join_game_sends_current_state(self, mock_manager, mock_sio, sample_game_id, sample_game_state): + """Joining game sends current game state.""" + data = {"game_id": str(sample_game_id)} + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + with patch('app.websocket.handlers.require_game_participant', return_value=True): + with patch('app.websocket.handlers.state_manager') as mock_sm: + mock_sm.get_game_state.return_value = sample_game_state + from app.websocket.handlers import join_game + await join_game("sid123", data) + + # Should emit game_state to joining client + mock_sio.emit.assert_called() + call_args = mock_sio.emit.call_args + assert call_args[0][0] == "game_state" + + @pytest.mark.asyncio + async def test_join_game_notifies_other_players(self, mock_manager, mock_sio, sample_game_id): + """Joining game notifies other players.""" + data = {"game_id": str(sample_game_id)} + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + with patch('app.websocket.handlers.require_game_participant', return_value=True): + from app.websocket.handlers import join_game + await join_game("sid123", data) + + # Should broadcast player_joined + mock_manager.broadcast_to_game.assert_called() + + @pytest.mark.asyncio + async def test_join_game_invalid_game_id_error(self, mock_manager, mock_sio): + """Invalid game ID returns error.""" + data = {"game_id": "not-a-uuid"} + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + from app.websocket.handlers import join_game + await join_game("sid123", data) + + mock_sio.emit.assert_called_with("error", pytest.ANY, to="sid123") + + +class TestLeaveGameHandler: + """Tests for the leave_game event handler.""" + + @pytest.mark.asyncio + async def test_leave_game_removes_from_room(self, mock_manager, mock_sio, sample_game_id): + """Leaving game removes from room.""" + data = {"game_id": str(sample_game_id)} + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + from app.websocket.handlers import leave_game + await leave_game("sid123", data) + + mock_manager.leave_game.assert_called_once() + mock_sio.leave_room.assert_called() + + @pytest.mark.asyncio + async def test_leave_game_notifies_other_players(self, mock_manager, mock_sio, sample_game_id): + """Leaving game notifies other players.""" + data = {"game_id": str(sample_game_id)} + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + from app.websocket.handlers import leave_game + await leave_game("sid123", data) + + mock_manager.broadcast_to_game.assert_called() + + @pytest.mark.asyncio + async def test_leave_game_not_in_game_silent(self, mock_manager, mock_sio, sample_game_id): + """Leaving game you're not in is silent (no error).""" + data = {"game_id": str(sample_game_id)} + mock_manager.get_session = AsyncMock(return_value=MagicMock(games=set())) + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + from app.websocket.handlers import leave_game + await leave_game("sid123", data) + + # No error emitted + error_calls = [c for c in mock_sio.emit.call_args_list if c[0][0] == "error"] + assert len(error_calls) == 0 +``` + +### Step 5: Test Substitution Handlers (4 hours each) + +Create `backend/tests/unit/websocket/test_pinch_hitter_handler.py`: + +```python +"""Tests for request_pinch_hitter handler.""" + +import pytest +from unittest.mock import AsyncMock, patch, MagicMock +from uuid import uuid4 + +class TestPinchHitterHandler: + """Tests for the request_pinch_hitter event handler.""" + + @pytest.fixture + def valid_pinch_hitter_data(self, sample_game_id): + return { + "game_id": str(sample_game_id), + "team_id": 1, + "entering_player_id": 20, # Bench player + "exiting_player_id": 5, # Current batter + } + + @pytest.mark.asyncio + async def test_pinch_hitter_valid_request_succeeds( + self, mock_manager, mock_sio, mock_state_manager, + valid_pinch_hitter_data, sample_game_state + ): + """Valid pinch hitter request succeeds.""" + mock_state_manager.get_game_state.return_value = sample_game_state + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + with patch('app.websocket.handlers.state_manager', mock_state_manager): + with patch('app.websocket.handlers.require_team_control', return_value=True): + with patch('app.websocket.handlers.substitution_manager') as mock_sub: + mock_sub.process_pinch_hitter = AsyncMock(return_value=MagicMock( + success=True, + to_dict=lambda: {"type": "pinch_hitter"} + )) + from app.websocket.handlers import request_pinch_hitter + await request_pinch_hitter("sid123", valid_pinch_hitter_data) + + # Should broadcast substitution + mock_manager.broadcast_to_game.assert_called() + + @pytest.mark.asyncio + async def test_pinch_hitter_unauthorized_rejected( + self, mock_manager, mock_sio, valid_pinch_hitter_data + ): + """Unauthorized user cannot request pinch hitter.""" + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + with patch('app.websocket.handlers.require_team_control', return_value=False): + from app.websocket.handlers import request_pinch_hitter + await request_pinch_hitter("sid123", valid_pinch_hitter_data) + + # Error should be emitted + mock_sio.emit.assert_called_with("error", pytest.ANY, to="sid123") + + @pytest.mark.asyncio + async def test_pinch_hitter_invalid_player_rejected( + self, mock_manager, mock_sio, mock_state_manager, + valid_pinch_hitter_data, sample_game_state + ): + """Invalid entering player is rejected.""" + mock_state_manager.get_game_state.return_value = sample_game_state + valid_pinch_hitter_data["entering_player_id"] = 999 # Non-existent + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + with patch('app.websocket.handlers.state_manager', mock_state_manager): + with patch('app.websocket.handlers.require_team_control', return_value=True): + with patch('app.websocket.handlers.substitution_manager') as mock_sub: + mock_sub.process_pinch_hitter = AsyncMock(return_value=MagicMock( + success=False, + error="Player not on roster" + )) + from app.websocket.handlers import request_pinch_hitter + await request_pinch_hitter("sid123", valid_pinch_hitter_data) + + # Error should be emitted + mock_sio.emit.assert_called_with("error", pytest.ANY, to="sid123") + + @pytest.mark.asyncio + async def test_pinch_hitter_wrong_time_rejected( + self, mock_manager, mock_sio, mock_state_manager, + valid_pinch_hitter_data, sample_game_state + ): + """Pinch hitter at wrong time (mid-at-bat) is rejected.""" + sample_game_state.at_bat_in_progress = True + mock_state_manager.get_game_state.return_value = sample_game_state + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + with patch('app.websocket.handlers.state_manager', mock_state_manager): + with patch('app.websocket.handlers.require_team_control', return_value=True): + with patch('app.websocket.handlers.substitution_manager') as mock_sub: + mock_sub.process_pinch_hitter = AsyncMock(return_value=MagicMock( + success=False, + error="Cannot substitute during at-bat" + )) + from app.websocket.handlers import request_pinch_hitter + await request_pinch_hitter("sid123", valid_pinch_hitter_data) + + mock_sio.emit.assert_called_with("error", pytest.ANY, to="sid123") + + @pytest.mark.asyncio + async def test_pinch_hitter_updates_lineup( + self, mock_manager, mock_sio, mock_state_manager, + valid_pinch_hitter_data, sample_game_state + ): + """Successful pinch hitter updates lineup state.""" + mock_state_manager.get_game_state.return_value = sample_game_state + + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + with patch('app.websocket.handlers.state_manager', mock_state_manager): + with patch('app.websocket.handlers.require_team_control', return_value=True): + with patch('app.websocket.handlers.substitution_manager') as mock_sub: + mock_sub.process_pinch_hitter = AsyncMock(return_value=MagicMock( + success=True, + to_dict=lambda: {"type": "pinch_hitter"} + )) + from app.websocket.handlers import request_pinch_hitter + await request_pinch_hitter("sid123", valid_pinch_hitter_data) + + # Should persist to database + # (verify db_ops call if applicable) + + @pytest.mark.asyncio + async def test_pinch_hitter_rate_limited( + self, mock_manager, mock_sio, valid_pinch_hitter_data + ): + """Rapid pinch hitter requests are rate limited.""" + with patch('app.websocket.handlers.rate_limiter') as mock_limiter: + mock_limiter.check_game_limit = AsyncMock(return_value=False) + with patch('app.websocket.handlers.manager', mock_manager): + with patch('app.websocket.handlers.sio', mock_sio): + from app.websocket.handlers import request_pinch_hitter + await request_pinch_hitter("sid123", valid_pinch_hitter_data) + + # Rate limit error + mock_sio.emit.assert_called() + + # Additional tests for: + # - Player already used validation + # - Batting order maintenance + # - Database persistence + # - Concurrent request handling +``` + +Create similar test files for: +- `test_defensive_replacement_handler.py` (10 tests) +- `test_pitching_change_handler.py` (10 tests) + +### Step 6: Run Full Test Suite (30 min) + +```bash +cd /mnt/NV2/Development/strat-gameplay-webapp/backend + +# Run all WebSocket tests +pytest tests/unit/websocket/ -v + +# Run with coverage +pytest tests/unit/websocket/ -v --cov=app/websocket --cov-report=html +``` + +## Verification Checklist + +- [ ] All 11 handlers have tests +- [ ] Coverage > 80% for `handlers.py` +- [ ] Authorization tests verify access control +- [ ] Rate limiting tests verify throttling +- [ ] Error handling tests verify error messages +- [ ] All new tests pass + +## Test Summary Target + +| Handler | Tests | Status | +|---------|-------|--------| +| `connect` | 8 | TODO | +| `disconnect` | 5 | TODO | +| `join_game` | 5 | TODO | +| `leave_game` | 3 | TODO | +| `submit_defensive_decision` | 5 | Expand | +| `submit_offensive_decision` | 5 | Expand | +| `roll_dice` | 5 | ✅ | +| `submit_manual_outcome` | 7 | ✅ | +| `request_pinch_hitter` | 10 | TODO | +| `request_defensive_replacement` | 10 | TODO | +| `request_pitching_change` | 10 | TODO | +| **Total** | **73** | - | + +## Rollback Plan + +Tests are additive - no rollback needed. + +## Dependencies + +- Plan 001 (Authorization) - tests assume auth utilities exist +- Plan 002 (Locking) - tests verify concurrent access + +## Notes + +- Consider adding property-based tests for edge cases +- May want integration tests for full WebSocket flow +- Future: Add load tests for concurrent connections diff --git a/.claude/plans/009-integration-test-fix.md b/.claude/plans/009-integration-test-fix.md new file mode 100644 index 0000000..d4d6723 --- /dev/null +++ b/.claude/plans/009-integration-test-fix.md @@ -0,0 +1,478 @@ +# Plan 009: Fix Integration Test Infrastructure + +**Priority**: MEDIUM +**Effort**: 2-3 days +**Status**: NOT STARTED +**Risk Level**: LOW - Testing infrastructure + +--- + +## Problem Statement + +Integration tests have infrastructure issues with AsyncPG: +- Connection conflicts with concurrent tests +- Fixture scope mismatches (module vs function) +- ~49 errors, 28 failures in integration test suite + +Current workaround: Run tests individually or serially. + +## Impact + +- **CI/CD**: Can't run full test suite in pipeline +- **Confidence**: Integration behavior not verified automatically +- **Speed**: Serial execution is slow + +## Root Causes + +### 1. AsyncPG Connection Conflicts +``` +asyncpg.exceptions.InterfaceError: cannot perform operation: +another operation is in progress +``` + +AsyncPG doesn't support concurrent operations on a single connection. + +### 2. Fixture Scope Mismatches +```python +@pytest.fixture(scope="module") # Module scope +async def setup_database(event_loop): # Depends on function-scoped event_loop +``` + +Different scopes cause `ScopeMismatch` errors. + +### 3. Shared Database State +Tests don't properly isolate database state, causing cascading failures. + +## Files to Modify + +| File | Changes | +|------|---------| +| `backend/tests/conftest.py` | Fix fixture scopes | +| `backend/tests/integration/conftest.py` | Add test isolation | +| `backend/pyproject.toml` | Update pytest-asyncio config | + +## Implementation Steps + +### Step 1: Update pytest-asyncio Configuration (30 min) + +Update `backend/pyproject.toml`: + +```toml +[tool.pytest.ini_options] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" # Important! +``` + +This ensures each test function gets its own event loop. + +### Step 2: Create Test Database Utilities (1 hour) + +Create `backend/tests/utils/db_utils.py`: + +```python +"""Database utilities for integration tests.""" + +import asyncio +from contextlib import asynccontextmanager +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker +from sqlalchemy.pool import NullPool + +from app.models.db_models import Base +from app.config import settings + + +def get_test_database_url() -> str: + """Get test database URL (separate from dev/prod).""" + # Use a different database for tests + base_url = settings.database_url + if "strat_gameplay" in base_url: + return base_url.replace("strat_gameplay", "strat_gameplay_test") + return base_url + "_test" + + +def create_test_engine(): + """Create engine with NullPool (no connection pooling).""" + return create_async_engine( + get_test_database_url(), + poolclass=NullPool, # Each connection is created fresh + echo=False, + ) + + +@asynccontextmanager +async def test_session(): + """Create isolated test session.""" + engine = create_test_engine() + + async with engine.begin() as conn: + # Create tables + await conn.run_sync(Base.metadata.create_all) + + AsyncTestSession = async_sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False, + ) + + async with AsyncTestSession() as session: + try: + yield session + await session.commit() + except Exception: + await session.rollback() + raise + finally: + await session.close() + + # Clean up + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + + await engine.dispose() + + +async def reset_database(session: AsyncSession): + """Reset database to clean state.""" + # Delete all data in reverse dependency order + for table in reversed(Base.metadata.sorted_tables): + await session.execute(table.delete()) + await session.commit() +``` + +### Step 3: Update Integration Test Fixtures (2 hours) + +Update `backend/tests/integration/conftest.py`: + +```python +"""Integration test fixtures.""" + +import pytest +import pytest_asyncio +from uuid import uuid4 + +from tests.utils.db_utils import create_test_engine, test_session, reset_database +from app.models.db_models import Base +from app.database.operations import DatabaseOperations + + +@pytest_asyncio.fixture(scope="function") +async def test_db(): + """ + Create fresh database for each test. + Uses NullPool to avoid connection conflicts. + """ + engine = create_test_engine() + + # Create tables + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + + yield engine + + # Drop tables + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + + await engine.dispose() + + +@pytest_asyncio.fixture(scope="function") +async def db_session(test_db): + """ + Provide isolated database session for each test. + Automatically rolls back after test. + """ + from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + + AsyncTestSession = async_sessionmaker( + test_db, + class_=AsyncSession, + expire_on_commit=False, + ) + + async with AsyncTestSession() as session: + yield session + # Rollback any uncommitted changes + await session.rollback() + + +@pytest_asyncio.fixture(scope="function") +async def db_ops(db_session, monkeypatch): + """ + Provide DatabaseOperations with test session. + """ + from app.database import session as db_module + + # Monkeypatch to use test session + original_session = db_module.AsyncSessionLocal + + # Create factory that returns our test session + async def get_test_session(): + return db_session + + monkeypatch.setattr(db_module, 'AsyncSessionLocal', get_test_session) + + yield DatabaseOperations() + + # Restore + monkeypatch.setattr(db_module, 'AsyncSessionLocal', original_session) + + +@pytest_asyncio.fixture(scope="function") +async def sample_game(db_ops): + """Create sample game for testing.""" + game_id = uuid4() + await db_ops.create_game({ + "id": game_id, + "league_id": "sba", + "home_team_id": 1, + "away_team_id": 2, + "status": "in_progress" + }) + return game_id + + +@pytest_asyncio.fixture(scope="function") +async def sample_lineups(db_ops, sample_game): + """Create sample lineups for testing.""" + lineups = [] + for team_id in [1, 2]: + for i in range(9): + lineup = await db_ops.create_lineup({ + "game_id": sample_game, + "team_id": team_id, + "card_id": 100 + (team_id * 10) + i, + "position": ["P", "C", "1B", "2B", "3B", "SS", "LF", "CF", "RF"][i], + "batting_order": i + 1 if i > 0 else None, + "is_active": True + }) + lineups.append(lineup) + return lineups +``` + +### Step 4: Fix Specific Test Files (4 hours) + +Update each integration test file to use function-scoped fixtures: + +**Example: `tests/integration/database/test_operations.py`** + +```python +"""Integration tests for DatabaseOperations.""" + +import pytest +from uuid import uuid4 + +class TestDatabaseOperations: + """Tests for database CRUD operations.""" + + @pytest.mark.asyncio + async def test_create_game(self, db_ops): + """Test game creation.""" + game_id = uuid4() + result = await db_ops.create_game({ + "id": game_id, + "league_id": "sba", + "home_team_id": 1, + "away_team_id": 2, + }) + + assert result is not None + assert result["id"] == game_id + + @pytest.mark.asyncio + async def test_get_game(self, db_ops, sample_game): + """Test game retrieval.""" + game = await db_ops.get_game(sample_game) + + assert game is not None + assert game["id"] == sample_game + + @pytest.mark.asyncio + async def test_create_play(self, db_ops, sample_game, sample_lineups): + """Test play creation.""" + play_id = await db_ops.save_play({ + "game_id": sample_game, + "play_number": 1, + "inning": 1, + "half": "top", + "outs_before": 0, + "outs_after": 1, + "batter_id": sample_lineups[0].id, + "pitcher_id": sample_lineups[9].id, + "outcome": "STRIKEOUT", + }) + + assert play_id is not None + + @pytest.mark.asyncio + async def test_get_plays(self, db_ops, sample_game, sample_lineups): + """Test play retrieval.""" + # Create a play first + await db_ops.save_play({ + "game_id": sample_game, + "play_number": 1, + # ... play data + }) + + plays = await db_ops.get_plays(sample_game) + + assert len(plays) == 1 +``` + +### Step 5: Add Test Isolation Markers (1 hour) + +Update `backend/pyproject.toml`: + +```toml +[tool.pytest.ini_options] +markers = [ + "integration: marks tests as integration tests (require database)", + "slow: marks tests as slow (>1s)", + "serial: marks tests that must run serially", +] +``` + +Update tests that need serial execution: + +```python +@pytest.mark.serial +@pytest.mark.integration +class TestGameStateRecovery: + """Tests that must run serially due to shared state.""" + pass +``` + +### Step 6: Create pytest Plugin for Serial Tests (1 hour) + +Create `backend/tests/plugins/serial_runner.py`: + +```python +"""Plugin for running serial tests in order.""" + +import pytest + + +def pytest_configure(config): + """Register serial marker.""" + config.addinivalue_line( + "markers", "serial: mark test to run serially" + ) + + +@pytest.hookimpl(tryfirst=True) +def pytest_collection_modifyitems(config, items): + """Ensure serial tests run after parallel tests.""" + serial_tests = [] + parallel_tests = [] + + for item in items: + if item.get_closest_marker("serial"): + serial_tests.append(item) + else: + parallel_tests.append(item) + + # Run parallel tests first, then serial + items[:] = parallel_tests + serial_tests +``` + +### Step 7: Update CI Configuration (30 min) + +Update `.github/workflows/test.yml`: + +```yaml +jobs: + test: + runs-on: ubuntu-latest + services: + postgres: + image: postgres:14 + env: + POSTGRES_USER: test + POSTGRES_PASSWORD: test + POSTGRES_DB: strat_gameplay_test + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.13' + + - name: Install dependencies + run: | + cd backend + pip install -e ".[dev]" + + - name: Run unit tests + run: | + cd backend + pytest tests/unit/ -v --tb=short + + - name: Run integration tests + env: + DATABASE_URL: postgresql+asyncpg://test:test@localhost:5432/strat_gameplay_test + run: | + cd backend + pytest tests/integration/ -v --tb=short -x +``` + +### Step 8: Verify Test Suite (1 hour) + +```bash +cd /mnt/NV2/Development/strat-gameplay-webapp/backend + +# Run unit tests (should all pass) +pytest tests/unit/ -v + +# Run integration tests (should now pass) +pytest tests/integration/ -v + +# Run full suite +pytest tests/ -v --tb=short + +# Check for any remaining failures +pytest tests/ -v --tb=long -x # Stop on first failure +``` + +## Verification Checklist + +- [ ] All unit tests pass (739/739) +- [ ] Integration tests pass without connection errors +- [ ] No fixture scope mismatch errors +- [ ] Tests can run in parallel with pytest-xdist +- [ ] CI pipeline passes +- [ ] Test execution time < 60 seconds + +## Expected Outcome + +| Metric | Before | After | +|--------|--------|-------| +| Unit tests | 739/739 ✅ | 739/739 ✅ | +| Integration tests | ~49 errors | 0 errors | +| Total time | N/A (can't run) | < 60s | +| CI status | ❌ Failing | ✅ Passing | + +## Rollback Plan + +If issues persist: +1. Keep serial execution as workaround +2. Skip problematic tests with `@pytest.mark.skip` +3. Run integration tests nightly instead of per-commit + +## Dependencies + +- None (can be implemented independently) + +## Notes + +- Consider using testcontainers for fully isolated DB +- May want to add pytest-xdist for parallel execution +- Future: Add database seeding scripts for complex scenarios diff --git a/.claude/plans/010-shared-components.md b/.claude/plans/010-shared-components.md new file mode 100644 index 0000000..1eddfa7 --- /dev/null +++ b/.claude/plans/010-shared-components.md @@ -0,0 +1,451 @@ +# Plan 010: Create Shared Component Library + +**Priority**: MEDIUM +**Effort**: 1-2 weeks +**Status**: NOT STARTED +**Risk Level**: LOW - Code organization + +--- + +## Problem Statement + +SBA and PD frontends have duplicate component implementations. When a component is updated in one frontend, it must be manually synchronized to the other. + +Current state: +- `frontend-sba/components/` - Full implementation (~20 components) +- `frontend-pd/components/` - Minimal (mostly stubs) + +## Impact + +- **DRY Violation**: Same code in two places +- **Maintenance**: Updates require changes in both places +- **Consistency**: Risk of drift between implementations +- **Testing**: Need to test same component twice + +## Proposed Structure + +``` +strat-gameplay-webapp/ +├── packages/ +│ └── shared-ui/ # Shared component library +│ ├── package.json +│ ├── nuxt.config.ts # Nuxt module config +│ ├── components/ +│ │ ├── Game/ +│ │ ├── Decisions/ +│ │ ├── Gameplay/ +│ │ ├── Substitutions/ +│ │ └── UI/ +│ ├── composables/ +│ ├── types/ +│ └── styles/ +├── frontend-sba/ # SBA-specific +│ └── nuxt.config.ts # Extends shared-ui +└── frontend-pd/ # PD-specific + └── nuxt.config.ts # Extends shared-ui +``` + +## Implementation Steps + +### Step 1: Create Package Structure (1 hour) + +```bash +cd /mnt/NV2/Development/strat-gameplay-webapp + +# Create shared package +mkdir -p packages/shared-ui/{components,composables,types,styles} + +# Initialize package +cd packages/shared-ui +``` + +Create `packages/shared-ui/package.json`: + +```json +{ + "name": "@strat-gameplay/shared-ui", + "version": "0.1.0", + "private": true, + "type": "module", + "exports": { + ".": { + "import": "./index.ts" + }, + "./components/*": { + "import": "./components/*" + }, + "./composables/*": { + "import": "./composables/*" + }, + "./types/*": { + "import": "./types/*" + } + }, + "dependencies": { + "vue": "^3.4.0" + }, + "devDependencies": { + "@nuxt/kit": "^3.14.0", + "typescript": "^5.3.0" + }, + "peerDependencies": { + "nuxt": "^3.14.0" + } +} +``` + +### Step 2: Create Nuxt Module (2 hours) + +Create `packages/shared-ui/nuxt.config.ts`: + +```typescript +import { defineNuxtModule, addComponentsDir, addImportsDir } from '@nuxt/kit' +import { fileURLToPath } from 'url' +import { dirname, join } from 'path' + +export default defineNuxtModule({ + meta: { + name: '@strat-gameplay/shared-ui', + configKey: 'sharedUi', + }, + defaults: { + prefix: '', // No prefix for components + }, + setup(options, nuxt) { + const runtimeDir = dirname(fileURLToPath(import.meta.url)) + + // Add components + addComponentsDir({ + path: join(runtimeDir, 'components'), + prefix: options.prefix, + pathPrefix: false, + }) + + // Add composables + addImportsDir(join(runtimeDir, 'composables')) + + // Add types + nuxt.hook('prepare:types', ({ tsConfig }) => { + tsConfig.include = tsConfig.include || [] + tsConfig.include.push(join(runtimeDir, 'types/**/*.d.ts')) + }) + }, +}) +``` + +Create `packages/shared-ui/index.ts`: + +```typescript +// Export module +export { default } from './nuxt.config' + +// Export types +export * from './types/game' +export * from './types/player' +export * from './types/websocket' + +// Export composables +export { useWebSocket } from './composables/useWebSocket' +export { useGameActions } from './composables/useGameActions' +``` + +### Step 3: Move Shared Components (4 hours) + +Identify components that are league-agnostic: + +**Fully Shared (move to packages/shared-ui):** +- `components/UI/` - All UI primitives +- `components/Game/ScoreBoard.vue` - League-agnostic +- `components/Game/GameBoard.vue` - League-agnostic +- `components/Game/CurrentSituation.vue` - League-agnostic +- `components/Gameplay/DiceRoller.vue` - League-agnostic +- `components/Gameplay/PlayResult.vue` - League-agnostic + +**League-Specific (keep in frontend-*):** +- `components/Decisions/` - Different for SBA vs PD +- Player cards - Different data structures +- Scouting views - PD only + +Move shared components: + +```bash +# Move UI components +cp -r frontend-sba/components/UI packages/shared-ui/components/ + +# Move Game components +cp -r frontend-sba/components/Game packages/shared-ui/components/ + +# Move Gameplay components +cp -r frontend-sba/components/Gameplay packages/shared-ui/components/ + +# Move composables +cp frontend-sba/composables/useWebSocket.ts packages/shared-ui/composables/ +cp frontend-sba/composables/useGameActions.ts packages/shared-ui/composables/ + +# Move types +cp frontend-sba/types/game.ts packages/shared-ui/types/ +cp frontend-sba/types/player.ts packages/shared-ui/types/ +cp frontend-sba/types/websocket.ts packages/shared-ui/types/ +``` + +### Step 4: Create Theme System (2 hours) + +Create `packages/shared-ui/styles/themes.ts`: + +```typescript +export interface LeagueTheme { + primary: string + secondary: string + accent: string + background: string + surface: string + text: string + textMuted: string +} + +export const sbaTheme: LeagueTheme = { + primary: '#1e40af', // Blue + secondary: '#3b82f6', + accent: '#f59e0b', // Amber + background: '#0f172a', + surface: '#1e293b', + text: '#f8fafc', + textMuted: '#94a3b8', +} + +export const pdTheme: LeagueTheme = { + primary: '#7c3aed', // Purple + secondary: '#a78bfa', + accent: '#10b981', // Emerald + background: '#0f172a', + surface: '#1e293b', + text: '#f8fafc', + textMuted: '#94a3b8', +} +``` + +Create `packages/shared-ui/composables/useTheme.ts`: + +```typescript +import { inject, provide, ref, readonly } from 'vue' +import type { LeagueTheme } from '../styles/themes' +import { sbaTheme } from '../styles/themes' + +const THEME_KEY = Symbol('theme') + +export function provideTheme(theme: LeagueTheme) { + const themeRef = ref(theme) + provide(THEME_KEY, readonly(themeRef)) + return themeRef +} + +export function useTheme(): LeagueTheme { + const theme = inject(THEME_KEY) + if (!theme) { + console.warn('No theme provided, using SBA default') + return sbaTheme + } + return theme +} +``` + +### Step 5: Update Frontends to Use Shared Package (2 hours) + +Update `frontend-sba/package.json`: + +```json +{ + "dependencies": { + "@strat-gameplay/shared-ui": "workspace:*" + } +} +``` + +Update `frontend-sba/nuxt.config.ts`: + +```typescript +export default defineNuxtConfig({ + modules: [ + '@strat-gameplay/shared-ui', + ], + sharedUi: { + prefix: '', + }, + // Override theme + runtimeConfig: { + public: { + league: 'sba', + }, + }, +}) +``` + +Update `frontend-sba/app.vue`: + +```vue + +``` + +### Step 6: Setup Workspace (1 hour) + +Create/update root `package.json`: + +```json +{ + "name": "strat-gameplay-webapp", + "private": true, + "workspaces": [ + "packages/*", + "frontend-sba", + "frontend-pd" + ], + "scripts": { + "dev:sba": "cd frontend-sba && npm run dev", + "dev:pd": "cd frontend-pd && npm run dev", + "build": "npm run build --workspaces", + "test": "npm run test --workspaces" + } +} +``` + +Install dependencies: + +```bash +cd /mnt/NV2/Development/strat-gameplay-webapp +npm install # or bun install +``` + +### Step 7: Update Imports in Frontends (2 hours) + +Update components to import from shared package: + +**Before:** +```typescript +// frontend-sba/pages/game/[id].vue +import ScoreBoard from '~/components/Game/ScoreBoard.vue' +import { useWebSocket } from '~/composables/useWebSocket' +``` + +**After:** +```typescript +// frontend-sba/pages/game/[id].vue +// ScoreBoard auto-imported from shared-ui module +// useWebSocket auto-imported from shared-ui module +``` + +### Step 8: Add Shared Tests (2 hours) + +Create `packages/shared-ui/tests/`: + +```typescript +// packages/shared-ui/tests/components/ScoreBoard.spec.ts +import { describe, it, expect } from 'vitest' +import { mount } from '@vue/test-utils' +import ScoreBoard from '../components/Game/ScoreBoard.vue' + +describe('ScoreBoard', () => { + it('renders team scores', () => { + const wrapper = mount(ScoreBoard, { + props: { + homeScore: 5, + awayScore: 3, + homeTeamName: 'Home Team', + awayTeamName: 'Away Team', + }, + }) + + expect(wrapper.text()).toContain('5') + expect(wrapper.text()).toContain('3') + }) + + it('applies theme colors', () => { + // Test theme integration + }) +}) +``` + +### Step 9: Documentation (1 hour) + +Create `packages/shared-ui/README.md`: + +```markdown +# @strat-gameplay/shared-ui + +Shared Vue 3 components for Strat Gameplay frontends. + +## Usage + +Add to your Nuxt config: + +```typescript +export default defineNuxtConfig({ + modules: ['@strat-gameplay/shared-ui'], +}) +``` + +## Components + +### Game Components +- `ScoreBoard` - Displays game score, inning, count +- `GameBoard` - Diamond visualization +- `CurrentSituation` - Batter/pitcher matchup + +### UI Components +- `ActionButton` - Styled action button +- `ToggleSwitch` - Boolean toggle +- `ButtonGroup` - Button group container + +## Composables + +- `useWebSocket` - WebSocket connection management +- `useGameActions` - Type-safe game action emitters +- `useTheme` - Theme injection/consumption + +## Theming + +Provide theme at app root: + +```vue + +``` +``` + +## Verification Checklist + +- [ ] Shared package builds without errors +- [ ] SBA frontend works with shared components +- [ ] PD frontend works with shared components +- [ ] Themes apply correctly per league +- [ ] Tests pass for shared components +- [ ] No duplicate component code remains + +## Migration Strategy + +1. **Phase 1**: Create package, move UI primitives +2. **Phase 2**: Move game display components +3. **Phase 3**: Move composables and types +4. **Phase 4**: Refactor league-specific components + +## Rollback Plan + +If issues arise: +1. Remove shared-ui from module list +2. Restore original component imports +3. Keep shared package for future use + +## Dependencies + +- None (can be implemented independently) + +## Notes + +- Consider Storybook for component documentation +- May want to publish to private npm registry eventually +- Future: Add design tokens for full design system diff --git a/.claude/plans/011-database-indexes.md b/.claude/plans/011-database-indexes.md new file mode 100644 index 0000000..6f53308 --- /dev/null +++ b/.claude/plans/011-database-indexes.md @@ -0,0 +1,287 @@ +# Plan 011: Add Database Indexes + +**Priority**: MEDIUM +**Effort**: 1 hour +**Status**: NOT STARTED +**Risk Level**: LOW - Performance optimization + +--- + +## Problem Statement + +The database schema is missing composite indexes for common query patterns: +- `(game_id, play_number)` on plays table +- `(game_id, team_id)` on lineups table +- `(game_id, is_active)` on lineups table + +This causes sequential scans on tables that could use index lookups. + +## Impact + +- **Performance**: Slower queries for game recovery +- **Scalability**: Performance degrades with more data +- **Cost**: Higher database CPU usage + +## Current Query Patterns + +### 1. Get Plays for Game (Recovery) +```python +# operations.py:467-468 +select(Play).where(Play.game_id == game_id).order_by(Play.play_number) +``` +**Current**: Index on `game_id`, sequential scan for ordering +**Needed**: Composite index `(game_id, play_number)` + +### 2. Get Lineups for Team +```python +# operations.py:488-493 +select(Lineup).where( + Lineup.game_id == game_id, + Lineup.team_id == team_id, + Lineup.is_active == True +) +``` +**Current**: Index on `game_id`, filter on `team_id` and `is_active` +**Needed**: Composite index `(game_id, team_id, is_active)` + +### 3. Get Active Players +```python +select(Lineup).where( + Lineup.game_id == game_id, + Lineup.is_active == True +) +``` +**Current**: Index on `game_id`, filter on `is_active` +**Needed**: Composite index `(game_id, is_active)` + +## Implementation Steps + +### Step 1: Create Migration (15 min) + +```bash +cd /mnt/NV2/Development/strat-gameplay-webapp/backend +alembic revision -m "Add composite indexes for common queries" +``` + +Create migration file: + +```python +"""Add composite indexes for common queries + +Revision ID: 005 +Revises: 004 +Create Date: 2025-01-27 + +""" +from typing import Sequence, Union +from alembic import op + +revision: str = '005' +down_revision: str = '004' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Plays table - optimize game recovery and play history + op.create_index( + 'idx_play_game_number', + 'plays', + ['game_id', 'play_number'], + unique=False + ) + + # Lineups table - optimize team lineup queries + op.create_index( + 'idx_lineup_game_team_active', + 'lineups', + ['game_id', 'team_id', 'is_active'], + unique=False + ) + + # Lineups table - optimize active player queries + op.create_index( + 'idx_lineup_game_active', + 'lineups', + ['game_id', 'is_active'], + unique=False + ) + + # Rolls table - optimize roll history queries + op.create_index( + 'idx_roll_game_type', + 'rolls', + ['game_id', 'roll_type'], + unique=False + ) + + # Games table - optimize status queries + op.create_index( + 'idx_game_status_created', + 'games', + ['status', 'created_at'], + unique=False + ) + + +def downgrade() -> None: + op.drop_index('idx_play_game_number', table_name='plays') + op.drop_index('idx_lineup_game_team_active', table_name='lineups') + op.drop_index('idx_lineup_game_active', table_name='lineups') + op.drop_index('idx_roll_game_type', table_name='rolls') + op.drop_index('idx_game_status_created', table_name='games') +``` + +### Step 2: Update Models (Optional - for documentation) (15 min) + +Update `backend/app/models/db_models.py` to document indexes: + +```python +class Play(Base): + __tablename__ = "plays" + + # ... columns ... + + __table_args__ = ( + Index('idx_play_game_number', 'game_id', 'play_number'), + # ... other constraints ... + ) + + +class Lineup(Base): + __tablename__ = "lineups" + + # ... columns ... + + __table_args__ = ( + Index('idx_lineup_game_team_active', 'game_id', 'team_id', 'is_active'), + Index('idx_lineup_game_active', 'game_id', 'is_active'), + # ... other constraints ... + ) +``` + +### Step 3: Apply Migration (5 min) + +```bash +cd /mnt/NV2/Development/strat-gameplay-webapp/backend + +# Apply migration +alembic upgrade head + +# Verify indexes created +psql -d strat_gameplay -c "\di" +``` + +### Step 4: Verify Query Plans (15 min) + +Test that queries use the new indexes: + +```sql +-- Check play query uses index +EXPLAIN ANALYZE +SELECT * FROM plays +WHERE game_id = 'some-uuid' +ORDER BY play_number; + +-- Should show: Index Scan using idx_play_game_number + +-- Check lineup query uses index +EXPLAIN ANALYZE +SELECT * FROM lineups +WHERE game_id = 'some-uuid' +AND team_id = 1 +AND is_active = true; + +-- Should show: Index Scan using idx_lineup_game_team_active +``` + +### Step 5: Add Performance Test (15 min) + +Create `backend/tests/integration/test_query_performance.py`: + +```python +"""Performance tests for database queries.""" + +import pytest +import time +from uuid import uuid4 + +class TestQueryPerformance: + """Tests that queries use indexes efficiently.""" + + @pytest.mark.integration + @pytest.mark.asyncio + async def test_get_plays_uses_index(self, db_ops, sample_game): + """Play retrieval should use composite index.""" + # Create 100 plays + for i in range(100): + await db_ops.save_play({ + "game_id": sample_game, + "play_number": i + 1, + # ... other fields + }) + + # Time the query + start = time.perf_counter() + plays = await db_ops.get_plays(sample_game) + duration = time.perf_counter() - start + + assert len(plays) == 100 + assert duration < 0.1 # Should be < 100ms with index + + @pytest.mark.integration + @pytest.mark.asyncio + async def test_get_lineups_uses_index(self, db_ops, sample_game, sample_lineups): + """Lineup retrieval should use composite index.""" + start = time.perf_counter() + lineups = await db_ops.get_lineups(sample_game, team_id=1) + duration = time.perf_counter() - start + + assert len(lineups) == 9 + assert duration < 0.05 # Should be < 50ms with index +``` + +## Index Summary + +| Index Name | Table | Columns | Purpose | +|------------|-------|---------|---------| +| `idx_play_game_number` | plays | (game_id, play_number) | Game recovery, play history | +| `idx_lineup_game_team_active` | lineups | (game_id, team_id, is_active) | Team lineup queries | +| `idx_lineup_game_active` | lineups | (game_id, is_active) | Active player queries | +| `idx_roll_game_type` | rolls | (game_id, roll_type) | Roll history queries | +| `idx_game_status_created` | games | (status, created_at) | Game listing queries | + +## Expected Performance Improvement + +| Query | Before | After | Improvement | +|-------|--------|-------|-------------| +| Get 100 plays | ~50ms | ~5ms | 10x | +| Get team lineup | ~20ms | ~2ms | 10x | +| List active games | ~100ms | ~10ms | 10x | + +## Verification Checklist + +- [ ] Migration created and reviewed +- [ ] Migration applied successfully +- [ ] EXPLAIN shows index usage +- [ ] Performance tests pass +- [ ] No regression in other queries + +## Rollback Plan + +```bash +# Revert migration +alembic downgrade -1 +``` + +Indexes are non-destructive - removing them just reverts to slower queries. + +## Dependencies + +- Plan 004 (Alembic Migrations) - need migration infrastructure + +## Notes + +- Monitor index usage with `pg_stat_user_indexes` +- Consider partial indexes for very large tables +- Future: Add covering indexes if SELECT includes specific columns diff --git a/.claude/plans/012-connection-pool-monitoring.md b/.claude/plans/012-connection-pool-monitoring.md new file mode 100644 index 0000000..f05cf5e --- /dev/null +++ b/.claude/plans/012-connection-pool-monitoring.md @@ -0,0 +1,442 @@ +# Plan 012: Connection Pool Monitoring + +**Priority**: MEDIUM +**Effort**: 2 hours +**Status**: NOT STARTED +**Risk Level**: LOW - Observability + +--- + +## Problem Statement + +No monitoring exists for database connection pool usage: +- Can't detect pool exhaustion before it causes failures +- No visibility into connection health +- No alerting for connection issues + +## Impact + +- **Reliability**: Pool exhaustion causes request failures +- **Debugging**: Hard to diagnose connection issues +- **Capacity**: Can't plan for scaling needs + +## Current Configuration + +```python +# config.py +db_pool_size: int = 20 +db_max_overflow: int = 10 +# Total capacity: 30 connections +``` + +## Implementation Steps + +### Step 1: Create Pool Monitor (30 min) + +Create `backend/app/monitoring/pool_monitor.py`: + +```python +"""Database connection pool monitoring.""" + +import asyncio +import logging +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + +from sqlalchemy.ext.asyncio import AsyncEngine + +from app.config import settings + +logger = logging.getLogger(f"{__name__}.PoolMonitor") + + +@dataclass +class PoolStats: + """Connection pool statistics.""" + pool_size: int + max_overflow: int + checkedin: int # Available connections + checkedout: int # In-use connections + overflow: int # Overflow connections in use + total_capacity: int + usage_percent: float + timestamp: datetime + + +class PoolMonitor: + """ + Monitor database connection pool health. + Provides stats and alerts for pool usage. + """ + + def __init__(self, engine: AsyncEngine): + self._engine = engine + self._stats_history: list[PoolStats] = [] + self._max_history = 100 + self._alert_threshold = 0.8 # 80% usage + + def get_stats(self) -> PoolStats: + """Get current pool statistics.""" + pool = self._engine.pool + + checkedin = pool.checkedin() + checkedout = pool.checkedout() + overflow = pool.overflow() + total_capacity = settings.db_pool_size + settings.db_max_overflow + + usage_percent = checkedout / total_capacity if total_capacity > 0 else 0 + + stats = PoolStats( + pool_size=settings.db_pool_size, + max_overflow=settings.db_max_overflow, + checkedin=checkedin, + checkedout=checkedout, + overflow=overflow, + total_capacity=total_capacity, + usage_percent=usage_percent, + timestamp=datetime.utcnow() + ) + + # Record history + self._stats_history.append(stats) + if len(self._stats_history) > self._max_history: + self._stats_history.pop(0) + + # Check for alerts + if usage_percent >= self._alert_threshold: + logger.warning( + f"Connection pool usage high: {usage_percent:.1%} " + f"({checkedout}/{total_capacity})" + ) + + if overflow > 0: + logger.info(f"Pool overflow active: {overflow} overflow connections") + + return stats + + def get_health_status(self) -> dict: + """Get pool health status for monitoring endpoint.""" + stats = self.get_stats() + + if stats.usage_percent >= 0.9: + status = "critical" + elif stats.usage_percent >= 0.75: + status = "warning" + else: + status = "healthy" + + return { + "status": status, + "pool_size": stats.pool_size, + "max_overflow": stats.max_overflow, + "available": stats.checkedin, + "in_use": stats.checkedout, + "overflow_active": stats.overflow, + "total_capacity": stats.total_capacity, + "usage_percent": round(stats.usage_percent * 100, 1), + "timestamp": stats.timestamp.isoformat() + } + + def get_history(self, limit: int = 10) -> list[dict]: + """Get recent stats history.""" + return [ + { + "checkedout": s.checkedout, + "usage_percent": round(s.usage_percent * 100, 1), + "timestamp": s.timestamp.isoformat() + } + for s in self._stats_history[-limit:] + ] + + async def start_monitoring(self, interval_seconds: int = 60): + """ + Background task to periodically collect stats. + Useful for logging and alerting. + """ + while True: + try: + stats = self.get_stats() + logger.debug( + f"Pool stats: {stats.checkedout}/{stats.total_capacity} " + f"({stats.usage_percent:.1%})" + ) + await asyncio.sleep(interval_seconds) + except asyncio.CancelledError: + logger.info("Pool monitoring stopped") + break + except Exception as e: + logger.error(f"Pool monitoring error: {e}") + await asyncio.sleep(interval_seconds) + + +# Global instance (initialized in main.py) +pool_monitor: Optional[PoolMonitor] = None + + +def init_pool_monitor(engine: AsyncEngine) -> PoolMonitor: + """Initialize global pool monitor.""" + global pool_monitor + pool_monitor = PoolMonitor(engine) + return pool_monitor +``` + +### Step 2: Add Health Endpoint (20 min) + +Update `backend/app/api/routes.py`: + +```python +from app.monitoring.pool_monitor import pool_monitor + +@router.get("/health/database") +async def database_health(): + """ + Database connection pool health. + + Returns: + Pool statistics and health status. + """ + if not pool_monitor: + return {"status": "unknown", "message": "Pool monitor not initialized"} + + health = pool_monitor.get_health_status() + history = pool_monitor.get_history(limit=5) + + return { + **health, + "recent_history": history + } + + +@router.get("/health") +async def overall_health(): + """ + Overall application health including database. + """ + db_health = pool_monitor.get_health_status() if pool_monitor else {"status": "unknown"} + + # Aggregate health status + statuses = [db_health.get("status", "unknown")] + + if "critical" in statuses: + overall = "critical" + elif "warning" in statuses: + overall = "warning" + elif "unknown" in statuses: + overall = "degraded" + else: + overall = "healthy" + + return { + "status": overall, + "components": { + "database": db_health + } + } +``` + +### Step 3: Initialize in Application (15 min) + +Update `backend/app/main.py`: + +```python +from app.database.session import engine +from app.monitoring.pool_monitor import init_pool_monitor, pool_monitor + +@asynccontextmanager +async def lifespan(app: FastAPI): + # Initialize pool monitor + monitor = init_pool_monitor(engine) + + # Start background monitoring + monitoring_task = asyncio.create_task( + monitor.start_monitoring(interval_seconds=60) + ) + + logger.info("Pool monitoring started") + + yield + + # Stop monitoring + monitoring_task.cancel() + try: + await monitoring_task + except asyncio.CancelledError: + pass + + logger.info("Pool monitoring stopped") +``` + +### Step 4: Add Connection Recycling (15 min) + +Update `backend/app/database/session.py`: + +```python +from sqlalchemy.ext.asyncio import create_async_engine +from app.config import settings + +engine = create_async_engine( + settings.database_url, + echo=settings.debug, + pool_size=settings.db_pool_size, + max_overflow=settings.db_max_overflow, + # New: Connection health settings + pool_pre_ping=True, # Test connection before use + pool_recycle=3600, # Recycle connections after 1 hour + pool_timeout=30, # Wait max 30s for connection +) +``` + +### Step 5: Add Prometheus Metrics (Optional) (30 min) + +Create `backend/app/monitoring/metrics.py`: + +```python +"""Prometheus metrics for monitoring.""" + +from prometheus_client import Gauge, Counter, Histogram + +# Connection pool metrics +db_pool_size = Gauge( + 'db_pool_size', + 'Database connection pool size' +) + +db_pool_checkedout = Gauge( + 'db_pool_checkedout', + 'Database connections currently in use' +) + +db_pool_overflow = Gauge( + 'db_pool_overflow', + 'Database overflow connections in use' +) + +db_pool_usage = Gauge( + 'db_pool_usage_percent', + 'Database connection pool usage percentage' +) + +# Query metrics +db_query_duration = Histogram( + 'db_query_duration_seconds', + 'Database query duration', + buckets=[0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0] +) + +db_query_errors = Counter( + 'db_query_errors_total', + 'Total database query errors' +) + + +def update_pool_metrics(stats): + """Update Prometheus metrics with pool stats.""" + db_pool_size.set(stats.pool_size) + db_pool_checkedout.set(stats.checkedout) + db_pool_overflow.set(stats.overflow) + db_pool_usage.set(stats.usage_percent * 100) +``` + +### Step 6: Write Tests (15 min) + +Create `backend/tests/unit/monitoring/test_pool_monitor.py`: + +```python +"""Tests for connection pool monitoring.""" + +import pytest +from unittest.mock import MagicMock +from app.monitoring.pool_monitor import PoolMonitor, PoolStats + +class TestPoolMonitor: + """Tests for PoolMonitor.""" + + @pytest.fixture + def mock_engine(self): + engine = MagicMock() + pool = MagicMock() + pool.checkedin.return_value = 15 + pool.checkedout.return_value = 5 + pool.overflow.return_value = 0 + engine.pool = pool + return engine + + def test_get_stats_returns_pool_stats(self, mock_engine): + """get_stats returns PoolStats with correct values.""" + monitor = PoolMonitor(mock_engine) + stats = monitor.get_stats() + + assert isinstance(stats, PoolStats) + assert stats.checkedout == 5 + assert stats.checkedin == 15 + + def test_health_status_healthy(self, mock_engine): + """Health status is healthy when usage < 75%.""" + monitor = PoolMonitor(mock_engine) + health = monitor.get_health_status() + + assert health["status"] == "healthy" + + def test_health_status_warning(self, mock_engine): + """Health status is warning when usage 75-90%.""" + mock_engine.pool.checkedout.return_value = 24 # 80% + mock_engine.pool.checkedin.return_value = 6 + + monitor = PoolMonitor(mock_engine) + health = monitor.get_health_status() + + assert health["status"] == "warning" + + def test_health_status_critical(self, mock_engine): + """Health status is critical when usage >= 90%.""" + mock_engine.pool.checkedout.return_value = 28 # 93% + mock_engine.pool.checkedin.return_value = 2 + + monitor = PoolMonitor(mock_engine) + health = monitor.get_health_status() + + assert health["status"] == "critical" + + def test_history_tracked(self, mock_engine): + """Stats history is tracked.""" + monitor = PoolMonitor(mock_engine) + + # Get stats multiple times + for _ in range(5): + monitor.get_stats() + + history = monitor.get_history(limit=5) + assert len(history) == 5 +``` + +## Verification Checklist + +- [ ] Pool monitor initialized on startup +- [ ] `/health/database` endpoint returns stats +- [ ] Background monitoring logs stats periodically +- [ ] High usage triggers warning log +- [ ] Pool recycling configured +- [ ] Tests pass + +## Monitoring Dashboard (Future) + +Consider adding Grafana dashboard with: +- Pool usage over time +- Connection wait times +- Error rates +- Query latencies + +## Rollback Plan + +Pool monitoring is additive - simply remove the health endpoint if issues arise. + +## Dependencies + +- None (can be implemented independently) + +## Notes + +- Consider alerting integration (PagerDuty, Slack) +- May want to add connection timeout tracking +- Future: Add slow query logging diff --git a/.claude/plans/MASTER_TRACKER.md b/.claude/plans/MASTER_TRACKER.md new file mode 100644 index 0000000..f4d18f9 --- /dev/null +++ b/.claude/plans/MASTER_TRACKER.md @@ -0,0 +1,380 @@ +# Architecture Remediation Master Tracker + +**Created**: 2025-01-27 +**Last Updated**: 2025-01-27 +**Review Date**: From comprehensive architectural review + +--- + +## Executive Summary + +This tracker consolidates 12 remediation tasks identified from a comprehensive architectural review of the Paper Dynasty Real-Time Game Engine. Tasks are prioritized by risk and impact. + +| Priority | Count | Total Effort | Status | +|----------|-------|--------------|--------| +| 🔴 CRITICAL | 3 | 5-8 hours | **3/3 COMPLETE** | +| 🟠 HIGH | 4 | 8-11 days | **4/4 COMPLETE** | +| 🟡 MEDIUM | 3 | 3-4 days | **3/3 COMPLETE** | +| ⏸️ DEFERRED | 2 | 1-2 weeks | Deferred (MVP/post-launch) | +| **TOTAL** | **12** | **~3 weeks** | 10/10 (100%) | + +--- + +## Quick Reference + +| # | Task | Plan File | Priority | Effort | Status | +|---|------|-----------|----------|--------|--------| +| 001 | WebSocket Authorization | [001-websocket-authorization.md](./001-websocket-authorization.md) | ⏸️ DEFERRED | 4-6h | ⏸️ DEFERRED (MVP testing) | +| 002 | WebSocket Locking | [002-websocket-locking.md](./002-websocket-locking.md) | 🔴 CRITICAL | 2-3h | ✅ COMPLETE | +| 003 | Idle Game Eviction | [003-idle-game-eviction.md](./003-idle-game-eviction.md) | 🔴 CRITICAL | 1-2h | ✅ COMPLETE | +| 004 | Alembic Migrations | [004-alembic-migrations.md](./004-alembic-migrations.md) | 🔴 CRITICAL | 2-3h | ✅ COMPLETE | +| 005 | Exception Handling | [005-exception-handling.md](./005-exception-handling.md) | 🟠 HIGH | 2-3h | ✅ COMPLETE | +| 006 | Rate Limiting | [006-rate-limiting.md](./006-rate-limiting.md) | 🟠 HIGH | 2-3h | ✅ COMPLETE | +| 007 | Session Expiration | [007-session-expiration.md](./007-session-expiration.md) | 🟠 HIGH | 1-2h | ✅ COMPLETE | +| 008 | WebSocket Tests | [008-websocket-tests.md](./008-websocket-tests.md) | 🟠 HIGH | 3-4d | ✅ COMPLETE | +| 009 | Integration Test Fix | [009-integration-test-fix.md](./009-integration-test-fix.md) | 🟡 MEDIUM | 2-3d | ✅ COMPLETE | +| 010 | Shared Components | [010-shared-components.md](./010-shared-components.md) | ⏸️ DEFERRED | 1-2w | ⏸️ DEFERRED (post-launch) | +| 011 | Database Indexes | [011-database-indexes.md](./011-database-indexes.md) | 🟡 MEDIUM | 1h | ✅ COMPLETE | +| 012 | Pool Monitoring | [012-connection-pool-monitoring.md](./012-connection-pool-monitoring.md) | 🟡 MEDIUM | 2h | ✅ COMPLETE | + +--- + +## 🔴 CRITICAL Tasks (Production Blockers) + +These must be completed before production deployment. + +### 002 - WebSocket Handler Locking ✅ COMPLETE +**File**: [002-websocket-locking.md](./002-websocket-locking.md) +**Risk**: DATA CORRUPTION - Race conditions +**Effort**: 2-3 hours +**Completed**: 2025-01-27 + +| Checkbox | Step | Status | +|----------|------|--------| +| ✅ | Expose lock context manager | Complete | +| ✅ | Identify handlers requiring locks | Complete | +| ✅ | Update decision handlers | Complete | +| ✅ | Update roll/outcome handlers | Complete | +| ✅ | Update substitution handlers | Complete | +| ✅ | Add lock timeout | Complete | +| ✅ | Write concurrency tests | Complete (97 WebSocket tests) | + +--- + +### 003 - Idle Game Eviction ✅ COMPLETE +**File**: [003-idle-game-eviction.md](./003-idle-game-eviction.md) +**Risk**: MEMORY LEAK - OOM crash +**Effort**: 1-2 hours +**Completed**: 2025-01-27 + +| Checkbox | Step | Status | +|----------|------|--------| +| ✅ | Add configuration | Complete | +| ✅ | Implement eviction logic | Complete | +| ✅ | Create background task | Complete | +| ✅ | Add health endpoint | Complete | +| ✅ | Write tests | Complete (12 tests) | + +--- + +### 004 - Initialize Alembic Migrations ✅ COMPLETE +**File**: [004-alembic-migrations.md](./004-alembic-migrations.md) +**Risk**: SCHEMA EVOLUTION - Cannot rollback +**Effort**: 2-3 hours +**Completed**: 2025-01-27 + +| Checkbox | Step | Status | +|----------|------|--------| +| ✅ | Backup current schema | Complete (schema exists in migration 001) | +| ✅ | Configure Alembic for async | Complete (psycopg2 sync driver for migrations) | +| ✅ | Create initial migration | Complete (001_initial_schema.py) | +| ✅ | Stamp existing database | Complete (stamped at revision 004) | +| ✅ | Remove create_all() | Complete (session.py updated) | +| ✅ | Update README | Complete (migration instructions added) | +| ✅ | Integrate materialized views migration | Complete (004 chains to 001) | +| ✅ | Write migration tests | Complete (8 tests passing) | + +--- + +## 🟠 HIGH Priority Tasks (Before MVP Launch) + +### 005 - Replace Broad Exception Handling ✅ COMPLETE +**File**: [005-exception-handling.md](./005-exception-handling.md) +**Risk**: DEBUGGING - Hides bugs +**Effort**: 2-3 hours +**Completed**: 2025-11-27 + +| Checkbox | Step | Status | +|----------|------|--------| +| ✅ | Identify specific exceptions | Complete (handlers.py, game_engine.py, substitution_manager.py) | +| ✅ | Create custom exception classes | Complete (app/core/exceptions.py - 10 exception types) | +| ✅ | Update substitution_manager.py | Complete (SQLAlchemy-specific catches) | +| ✅ | Update game_engine.py | Complete (DatabaseError wrapping) | +| ✅ | Update WebSocket handlers | Complete (12 handlers updated) | +| ✅ | Add global error handler | Complete (FastAPI exception handlers in main.py) | +| ✅ | Write tests | Complete (37 tests for exception classes) + +--- + +### 006 - Add Rate Limiting ✅ COMPLETE +**File**: [006-rate-limiting.md](./006-rate-limiting.md) +**Risk**: DOS - Server overwhelm +**Effort**: 2-3 hours +**Completed**: 2025-11-27 + +| Checkbox | Step | Status | +|----------|------|--------| +| ✅ | Add configuration | Complete (6 settings in config.py) | +| ✅ | Create rate limiter | Complete (token bucket algorithm in app/middleware/rate_limit.py) | +| ✅ | Create decorator for handlers | Complete (rate_limited decorator) | +| ✅ | Apply to WebSocket handlers | Complete (12 handlers with rate limiting) | +| ✅ | Add API rate limiting | Complete (per-user API buckets ready) | +| ✅ | Start cleanup task | Complete (background task in main.py lifespan) | +| ✅ | Write tests | Complete (37 tests in tests/unit/middleware/test_rate_limit.py) + +--- + +### 007 - Session Expiration ✅ COMPLETE +**File**: [007-session-expiration.md](./007-session-expiration.md) +**Risk**: MEMORY - Zombie connections +**Effort**: 1-2 hours +**Completed**: 2025-01-27 + +| Checkbox | Step | Status | +|----------|------|--------| +| ✅ | Configure Socket.io timeouts | Complete (ping_interval/ping_timeout) | +| ✅ | Add session tracking | Complete (SessionInfo dataclass, activity timestamps) | +| ✅ | Start expiration background task | Complete (periodic_session_expiration) | +| ✅ | Update handlers to track activity | Complete (11 handlers updated) | +| ✅ | Add health endpoint | Complete (/api/health/connections) | +| ✅ | Write tests | Complete (20 new tests, 856 total passing) + +--- + +### 008 - WebSocket Handler Tests ✅ COMPLETE +**File**: [008-websocket-tests.md](./008-websocket-tests.md) +**Risk**: INTEGRATION - Frontend can't integrate safely +**Effort**: 3-4 days +**Completed**: 2025-11-27 + +| Checkbox | Step | Status | +|----------|------|--------| +| ✅ | Create test fixtures | Complete (conftest.py with 15+ fixtures) | +| ✅ | Test connect handler (10 tests) | Complete (cookie auth, IP extraction, error cases) | +| ✅ | Test disconnect handler (4 tests) | Complete (cleanup, rate limiter removal) | +| ✅ | Test join_game/leave_game (5 tests) | Complete (success, errors, roles) | +| ✅ | Test pinch_hitter handler (9 tests) | Complete (validation, errors, locking) | +| ✅ | Test defensive_replacement (10 tests) | Complete (all field validation, edge cases) | +| ✅ | Test pitching_change (9 tests) | Complete (validation, errors, locking) | +| ✅ | Test rate limiting (20 tests) | Complete (connection + game level limits) | +| ✅ | Run full test suite | Complete (148 WebSocket tests, 961 total unit tests) + +--- + +## ⏸️ DEFERRED Tasks + +These tasks are deferred to later phases for specific reasons. + +### 001 - WebSocket Authorization +**File**: [001-websocket-authorization.md](./001-websocket-authorization.md) +**Risk**: SECURITY - Unauthorized game access +**Effort**: 4-6 hours +**Deferred Until**: After MVP testing complete + +**Reason**: Deferred to allow testers to test both sides of a game without authentication restrictions during MVP development and testing phase. Will be implemented before production launch. + +| Checkbox | Step | Status | +|----------|------|--------| +| ⬜ | Create authorization utility | Deferred | +| ⬜ | Add user tracking to ConnectionManager | Deferred | +| ⬜ | Update join_game handler | Deferred | +| ⬜ | Update decision handlers | Deferred | +| ⬜ | Update spectator-only handlers | Deferred | +| ⬜ | Add database queries | Deferred | +| ⬜ | Write tests | Deferred | + +--- + +## 🟡 MEDIUM Priority Tasks (Before Beta) + +### 009 - Fix Integration Test Infrastructure ✅ COMPLETE +**File**: [009-integration-test-fix.md](./009-integration-test-fix.md) +**Effort**: 2-3 days +**Completed**: 2025-11-27 + +| Checkbox | Step | Status | +|----------|------|--------| +| ✅ | Update pytest-asyncio config | Complete (asyncio_mode=auto, function scope) | +| ✅ | Create test database utilities | Complete (NullPool, session injection) | +| ✅ | Update integration test fixtures | Complete (tests/integration/conftest.py) | +| ✅ | Fix specific test files | Complete (test_operations.py, test_migrations.py) | +| ✅ | Refactor DatabaseOperations | Complete (session injection pattern) | +| ✅ | Update game_engine.py | Complete (transaction-scoped db_ops) | +| ✅ | Fix test data issues | Complete (batter_id, pitcher_id, catcher_id) | +| ✅ | Verify test suite | Complete (32/32 integration, 979 unit) + +**Solution**: Implemented session injection pattern in `DatabaseOperations`: +- Constructor accepts optional `AsyncSession` parameter +- Methods use `_get_session()` context manager +- Injected sessions share transaction (no connection conflicts) +- Non-injected sessions auto-commit (backwards compatible) +- Tests use `NullPool` to prevent connection reuse issues + +--- + +### 010 - Create Shared Component Library ⏸️ DEFERRED +**File**: [010-shared-components.md](./010-shared-components.md) +**Effort**: 1-2 weeks +**Deferred Until**: Post-launch / Release Candidate + +**Reason**: Deferred since we're building one frontend at a time. Makes more sense to refactor into shared components once we have a release candidate or launch, rather than abstracting prematurely. + +| Checkbox | Step | Status | +|----------|------|--------| +| ⏸️ | Create package structure | Deferred | +| ⏸️ | Create Nuxt module | Deferred | +| ⏸️ | Move shared components | Deferred | +| ⏸️ | Create theme system | Deferred | +| ⏸️ | Update frontends | Deferred | +| ⏸️ | Setup workspace | Deferred | +| ⏸️ | Update imports | Deferred | +| ⏸️ | Add shared tests | Deferred | +| ⏸️ | Documentation | Deferred | + +--- + +### 011 - Add Database Indexes ✅ COMPLETE +**File**: [011-database-indexes.md](./011-database-indexes.md) +**Effort**: 1 hour +**Completed**: 2025-11-27 + +| Checkbox | Step | Status | +|----------|------|--------| +| ✅ | Create migration | Complete (005_add_composite_indexes.py) | +| ✅ | Apply migration | Complete (now at revision 005) | +| ✅ | Verify indexes | Complete (5 indexes created) | + +**Indexes Created**: +- `idx_play_game_number` - plays(game_id, play_number) +- `idx_lineup_game_team_active` - lineups(game_id, team_id, is_active) +- `idx_lineup_game_active` - lineups(game_id, is_active) +- `idx_roll_game_type` - rolls(game_id, roll_type) +- `idx_game_status_created` - games(status, created_at) + +--- + +### 012 - Connection Pool Monitoring ✅ COMPLETE +**File**: [012-connection-pool-monitoring.md](./012-connection-pool-monitoring.md) +**Effort**: 2 hours +**Completed**: 2025-11-27 + +| Checkbox | Step | Status | +|----------|------|--------| +| ✅ | Create pool monitor | Complete (app/monitoring/pool_monitor.py) | +| ✅ | Add health endpoint | Complete (/health/pool, /health/full) | +| ✅ | Initialize in application | Complete (main.py lifespan) | +| ✅ | Background monitoring | Complete (60s interval logging) | +| ⏸️ | Add Prometheus metrics (optional) | Deferred (add when needed) | +| ✅ | Write tests | Complete (18 tests) + +--- + +## Implementation Schedule + +### Week 1: Critical Stability (No Auth Needed for Testing) ✅ COMPLETE +- [x] 002 - WebSocket Locking (2-3h) +- [x] 003 - Idle Game Eviction (1-2h) +- [x] 004 - Alembic Migrations (2-3h) + +### Week 2: High Priority ✅ COMPLETE +- [x] 005 - Exception Handling (2-3h) ✅ +- [x] 006 - Rate Limiting (2-3h) ✅ +- [x] 007 - Session Expiration (1-2h) ✅ +- [x] 008 - WebSocket Tests ✅ + +### Week 3: Testing & Polish ✅ COMPLETE +- [x] 009 - Integration Test Fix ✅ +- [x] 011 - Database Indexes (1h) ✅ +- [x] 012 - Pool Monitoring (2h) ✅ + +### Post-MVP: Deferred Tasks +- [ ] 001 - WebSocket Authorization (before production) +- [ ] 010 - Shared Components (post-launch) + +--- + +## Progress Summary + +``` +CRITICAL: [████] 3/3 complete (002, 003, 004) +HIGH: [████] 4/4 complete (005, 006, 007, 008) +MEDIUM: [████] 3/3 complete (009, 011, 012) +DEFERRED: [⏸️⏸️__] 0/2 deferred (001 MVP testing, 010 post-launch) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +OVERALL: [██████████] 10/10 active (100%) +``` + +--- + +## Dependencies + +``` +001 WebSocket Authorization (DEFERRED) + └── Deferred until after MVP testing + └── 008 WebSocket Tests will need update when implemented + +002 WebSocket Locking + └── (independent - no auth dependency) + +003 Idle Game Eviction + └── (independent) + +004 Alembic Migrations + └── 011 Database Indexes (requires migrations) + +005 Exception Handling + └── (independent) + +006 Rate Limiting + └── 007 Session Expiration (can combine) + +008 WebSocket Tests + └── 002 WebSocket Locking (tests verify locking) + └── Note: Auth tests will be skipped until 001 is implemented + +009 Integration Test Fix + └── (independent) + +010 Shared Components + └── (independent, long-term) + +012 Connection Pool Monitoring + └── (independent) +``` + +--- + +## Notes + +- Tasks can be parallelized across developers +- Critical tasks should block deployment +- High priority should complete before MVP +- Medium priority is "nice to have" for beta +- **WebSocket Authorization (001) is intentionally deferred** to allow testers to control both sides of games during MVP testing without authentication friction + +--- + +## Change Log + +| Date | Change | +|------|--------| +| 2025-01-27 | Initial tracker created from architectural review | +| 2025-01-27 | Deferred task 001 (WebSocket Authorization) until after MVP testing to allow testers to control both sides of games | +| 2025-01-27 | Completed task 004 (Alembic Migrations) - all critical tasks now complete | +| 2025-01-27 | Completed task 007 (Session Expiration) - Socket.io ping/pong, session tracking, background expiration, health endpoint | +| 2025-11-27 | Completed task 005 (Exception Handling) - Custom exception hierarchy (10 types), specific catches in handlers/engine/substitution, FastAPI global handlers, 37 tests | +| 2025-11-27 | Completed task 006 (Rate Limiting) - Token bucket algorithm, per-connection/game/user rate limits, 12 handlers protected, cleanup background task, health endpoint integration, 37 tests | +| 2025-11-27 | Completed task 008 (WebSocket Tests) - 148 WebSocket handler tests covering connect (10), disconnect (4), join/leave (5), substitutions (28), rate limiting (20), locking (11), queries (13), manual outcomes (12), connection manager (39). All HIGH priority tasks complete. | +| 2025-11-27 | Completed task 011 (Database Indexes) - Created migration 005 with 5 composite indexes for plays, lineups, rolls, and games tables. Optimizes game recovery, lineup queries, and status lookups. | +| 2025-11-27 | Completed task 012 (Pool Monitoring) - Created app/monitoring/pool_monitor.py with PoolMonitor class, added /health/pool and /health/full endpoints, background monitoring task, 18 unit tests. 979 total unit tests passing. | +| 2025-11-27 | Completed task 009 (Integration Test Fix) - Implemented session injection pattern in DatabaseOperations, updated game_engine.py transaction handling, rewrote integration test fixtures with NullPool, fixed test data to include required foreign keys. **All active tasks complete (10/10, 100%)** - 32 integration tests + 979 unit tests passing. | diff --git a/CLAUDE.md b/CLAUDE.md index 3c90bcf..ea8f34b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -80,24 +80,82 @@ strat-gameplay-webapp/ │ │ └── api/ # REST endpoints │ └── tests/ ├── frontend-sba/ # SBA League Nuxt app -├── frontend-pd/ # PD League Nuxt app -├── docker-compose.yml # Dev orchestration (all services) -├── docker-compose.prod.yml # Production overrides -└── .env.example # Root environment template +├── frontend-pd/ # PD League Nuxt app (disabled) +├── docker-compose.yml # Base service configuration +├── docker-compose.dev.yml # Development overrides (hot-reload) +├── docker-compose.prod.yml # Production overrides (optimized) +├── start.sh # Single-command startup script +└── scripts/env-switch.sh # Environment profile switcher ``` +## Quick Start (All-Docker Workflow) + +The entire stack runs in Docker with a single command. No local Python or Node.js required. + +```bash +# Development (hot-reload enabled) +./start.sh dev + +# Production (optimized build) +./start.sh prod + +# Stop all services +./start.sh stop + +# View logs +./start.sh logs + +# Check status +./start.sh status + +# Force rebuild +./start.sh rebuild [dev|prod] +``` + +### What Each Mode Does + +| Mode | Backend | Frontend | Use Case | +|------|---------|----------|----------| +| `dev` | Hot-reload (uvicorn --reload) | Hot-reload (nuxt dev) | Active development | +| `prod` | Production build | SSR optimized build | Demo/deployment | + +### Service URLs + +| Service | Dev Mode | Prod Mode | +|---------|----------|-----------| +| Frontend | http://localhost:3000 | https://gameplay-demo.manticorum.com | +| Backend API | http://localhost:8000 | https://gameplay-demo.manticorum.com/api | +| API Docs | http://localhost:8000/docs | https://gameplay-demo.manticorum.com/api/docs | + +### Docker Compose Architecture + +The stack uses layered compose files: + +- **`docker-compose.yml`** - Base services (Redis, Backend, Frontend-SBA) +- **`docker-compose.dev.yml`** - Development overrides (volume mounts, hot-reload) +- **`docker-compose.prod.yml`** - Production overrides (optimized builds, restart policies) + +The `start.sh` script handles composing these correctly. + ## Environment Configuration -### Multi-Domain Support -The project is fully environment-driven for deployment to different domains. All URLs and credentials are externalized. +### Environment Profiles + +| Profile | Backend URL | Frontend URL | +|---------|-------------|--------------| +| `dev` | `http://localhost:8000` | `http://localhost:3000` | +| `prod` | `https://gameplay-demo.manticorum.com` | `https://gameplay-demo.manticorum.com` | ### Environment Files + | File | Purpose | |------|---------| -| `.env.example` | Root template - copy to `.env` | -| `backend/.env.example` | Backend-specific template | -| `frontend-sba/.env.example` | SBA frontend template | -| `frontend-pd/.env.example` | PD frontend template | +| `backend/.env` | Active backend config (gitignored) | +| `backend/.env.dev` | Local development settings | +| `backend/.env.prod` | Production settings | +| `frontend-sba/.env` | Active frontend config (gitignored) | +| `frontend-sba/.env.dev` | Local development settings | +| `frontend-sba/.env.prod` | Production settings | ### Key Environment Variables @@ -106,26 +164,35 @@ The project is fully environment-driven for deployment to different domains. All - `DISCORD_CLIENT_ID/SECRET` - OAuth credentials - `DISCORD_SERVER_REDIRECT_URI` - Server-side OAuth callback - `FRONTEND_URL` - Frontend base URL for redirects -- `CORS_ORIGINS` - Allowed origins (comma-separated) +- `CORS_ORIGINS` - Allowed origins (JSON array) - `ALLOWED_DISCORD_IDS` - User whitelist (comma-separated, empty = all) -**Frontend** (in `frontend-*/env`): -- `NUXT_PUBLIC_API_URL` - Backend API URL +**Frontend** (in `frontend-*/.env`): +- `NUXT_PUBLIC_API_URL` - Backend API URL (public, client-side) +- `NUXT_API_URL_INTERNAL` - Backend URL for SSR (Docker internal: `http://backend:8000`) - `NUXT_PUBLIC_WS_URL` - WebSocket URL - `NUXT_PUBLIC_DISCORD_CLIENT_ID` - OAuth client ID (public) - `NUXT_PUBLIC_DISCORD_REDIRECT_URI` - OAuth callback URL -- `NUXT_ALLOWED_HOSTS` - Vite dev server allowed hosts (comma-separated) -### Docker Deployment +### Discord OAuth Setup + +Both environments require redirect URIs in [Discord Developer Portal](https://discord.com/developers/applications): + +| Environment | Redirect URI | +|-------------|--------------| +| dev | `http://localhost:8000/api/auth/discord/callback/server` | +| prod | `https://gameplay-demo.manticorum.com/api/auth/discord/callback/server` | + +### Manual Environment Switching + +If you need to switch between dev/prod configs without Docker: + ```bash -# Development -docker compose up - -# Production -docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d +./scripts/env-switch.sh dev # Copy .env.dev → .env +./scripts/env-switch.sh prod # Copy .env.prod → .env ``` -See `README.md` "Multi-Domain Deployment" section for full checklist. +The `start.sh` script handles this automatically based on mode. ## Development Guidelines diff --git a/WEBSOCKET_ARCHITECTURE_ANALYSIS.md b/WEBSOCKET_ARCHITECTURE_ANALYSIS.md new file mode 100644 index 0000000..1067cd4 --- /dev/null +++ b/WEBSOCKET_ARCHITECTURE_ANALYSIS.md @@ -0,0 +1,829 @@ +# WebSocket Implementation Analysis Report +## Paper Dynasty Real-Time Game Engine Backend + +**Date**: 2025-11-27 +**Scope**: `/mnt/NV2/Development/strat-gameplay-webapp/backend/app/websocket/` +**Status**: Comprehensive exploration of production-ready implementation + +--- + +## EXECUTIVE SUMMARY + +The WebSocket implementation is **well-architected and production-ready** with 15 event handlers, proper error handling, and recovery mechanisms. However, there are several **identified risk areas** related to state synchronization, memory management, and race conditions that require attention before handling high-concurrency scenarios. + +### Key Findings: +- ✅ **Solid Foundation**: Clean separation of concerns, proper async/await patterns +- ✅ **Good Error Handling**: 157+ error handling statements across handlers +- ⚠️ **State Mutation Risk**: Direct state object mutations without proper locking +- ⚠️ **Memory Leak Potential**: Game resource cleanup not guaranteed on all code paths +- ⚠️ **Race Condition Risk**: Multiple concurrent events could corrupt state +- ⚠️ **Reconnection Gap**: No explicit reconnection handling or session recovery strategy +- ⚠️ **Room Cleanup**: Stale game rooms may persist indefinitely + +--- + +## 1. ARCHITECTURE OVERVIEW + +### Directory Structure +``` +app/websocket/ +├── connection_manager.py (71 lines) # Room & session management +├── handlers.py (1,312 lines) # 15 event handlers +├── __init__.py # Package marker +└── CLAUDE.md # Documentation +``` + +### Socket.io Initialization (main.py) +```python +sio = socketio.AsyncServer( + async_mode="asgi", + cors_allowed_origins=settings.cors_origins, + logger=True, + engineio_logger=False +) +socket_app = socketio.ASGIApp(sio, app) + +connection_manager = ConnectionManager(sio) +register_handlers(sio, connection_manager) +game_engine.set_connection_manager(connection_manager) +``` + +**Critical Point**: Connection manager is a singleton instance passed to all handlers via closure. + +--- + +## 2. CONNECTION MANAGEMENT + +### ConnectionManager Structure +```python +class ConnectionManager: + def __init__(self, sio: socketio.AsyncServer): + self.sio = sio + self.user_sessions: dict[str, str] = {} # sid -> user_id + self.game_rooms: dict[str, set[str]] = {} # game_id -> set of sids +``` + +### Connection Lifecycle + +#### 1. Connect Handler +```python +@sio.event +async def connect(sid, environ, auth): + # Dual auth support: + # 1. HttpOnly cookies (from nginx proxy) + # 2. Auth object (direct JS clients) + token = extract_token(environ, auth) + user_data = verify_token(token) + user_id = user_data.get("user_id") + + await manager.connect(sid, user_id) + await sio.emit("connected", {"user_id": user_id}, room=sid) + return True # Accept connection +``` + +**Status**: ✅ Properly authenticated with fallback support + +#### 2. Disconnect Handler +```python +@sio.event +async def disconnect(sid): + await manager.disconnect(sid) + +async def disconnect(self, sid: str) -> None: + user_id = self.user_sessions.pop(sid, None) + if user_id: + logger.info(f"User {user_id} disconnected (session {sid})") + + # Remove from all game rooms + for game_id, sids in self.game_rooms.items(): + if sid in sids: + sids.remove(sid) + await self.broadcast_to_game( + game_id, "user_disconnected", {"user_id": user_id} + ) +``` + +**Status**: ✅ Removes user from all rooms + +### Room Management + +#### Join Game +```python +async def join_game(self, sid: str, game_id: str, role: str) -> None: + await self.sio.enter_room(sid, game_id) + + if game_id not in self.game_rooms: + self.game_rooms[game_id] = set() + self.game_rooms[game_id].add(sid) +``` + +**Issue #1**: Redundant tracking - Socket.io already maintains room membership +- `self.sio.enter_room()` registers the sid in Socket.io's internal room +- `self.game_rooms[game_id]` duplicates this information +- **Risk**: Synchronization could diverge if Socket.io rooms are accessed directly + +#### Leave Game +```python +async def leave_game(self, sid: str, game_id: str) -> None: + await self.sio.leave_room(sid, game_id) + + if game_id in self.game_rooms: + self.game_rooms[game_id].discard(sid) +``` + +**Issue #2**: Manual set cleanup but no cleanup of empty game rooms +- After last user leaves, `self.game_rooms[game_id]` remains as empty set +- **Memory Impact**: Accumulates empty sets over time +- **Recommendation**: Remove empty game rooms after discard + +--- + +## 3. EVENT HANDLERS (15 Total) + +### Handler Categories + +#### Connection Events (2) +- `connect` - JWT authentication +- `disconnect` - Session cleanup + +#### Game Flow Events (4) +- `join_game` - Join game room +- `leave_game` - Leave game room +- `request_game_state` - Recovery after disconnect/initial load +- `heartbeat` - Keep-alive ping + +#### Decision Submission (2) +- `submit_defensive_decision` - Defense strategy +- `submit_offensive_decision` - Offense strategy + +#### Manual Outcome Flow (2) +- `roll_dice` - Roll dice for play +- `submit_manual_outcome` - Submit card result + +#### Substitutions (3) +- `request_pinch_hitter` - Batting substitution +- `request_pitching_change` - Pitcher substitution +- `request_defensive_replacement` - Field substitution + +#### Query Events (2) +- `get_lineup` - Get team lineup +- `get_box_score` - Get statistics + +### Error Handling Pattern + +**Consistent "Emit or Return" Pattern**: +```python +try: + # Validate input + if not game_id_str: + await manager.emit_to_user(sid, "error", {"message": "Missing game_id"}) + return + + # Parse and validate + try: + game_id = UUID(game_id_str) + except (ValueError, AttributeError): + await manager.emit_to_user(sid, "error", {"message": "Invalid format"}) + return + + # Get state + state = state_manager.get_state(game_id) + if not state: + await manager.emit_to_user(sid, "error", {"message": f"Game not found"}) + return + + # Process action + result = await game_engine.some_operation(...) + + # Broadcast result + await manager.broadcast_to_game(str(game_id), "event_name", data) + +except GameValidationError as e: + await manager.emit_to_user(sid, "outcome_rejected", {"message": str(e)}) +except Exception as e: + logger.error(f"Error: {e}", exc_info=True) + await manager.emit_to_user(sid, "error", {"message": str(e)}) +``` + +**Analysis**: 157+ error handling statements providing comprehensive coverage + +--- + +## 4. STATE SYNCHRONIZATION PATTERNS + +### State Flow Diagram +``` +Handler gets state → Mutates state object → Updates state manager → Broadcasts +``` + +### Critical State Mutation: Dice Roll Example + +```python +# Line 216-217 in handlers.py (roll_dice) +state.pending_manual_roll = ab_roll +state_manager.update_state(game_id, state) +``` + +**Issue #3 - RACE CONDITION RISK**: State mutation without locking +- Multiple concurrent `roll_dice` calls could overwrite `pending_manual_roll` +- `state_manager.update_state()` does NOT use locks +- **Scenario**: Player 1 rolls, Player 2 rolls before Player 1 submits outcome + - `pending_manual_roll` is overwritten + - Player 1's submitted outcome rejects: "No pending dice roll" + +### Similar Risk in Other Handlers + +**All decision submission handlers (lines 271-409)**: +```python +# submit_defensive_decision +async def submit_defensive_decision(sid, data): + # ... + updated_state = await game_engine.submit_defensive_decision(game_id, decision) + # ... + await manager.broadcast_to_game(str(game_id), "defensive_decision_submitted", {...}) +``` + +**Race Condition Risk**: Two simultaneous defensive decision submissions +- Both handlers enter concurrently +- Both call `game_engine.submit_defensive_decision()` +- Both may be processed if game_engine locks are insufficient + +**Analysis of game_engine locking**: +```python +class GameEngine: + def _get_game_lock(self, game_id: UUID) -> asyncio.Lock: + if game_id not in self._game_locks: + self._game_locks[game_id] = asyncio.Lock() + return self._game_locks[game_id] +``` + +**Problem**: Locks are in game_engine, but WebSocket handlers don't use them! +- Handlers call state_manager directly without locks +- Game engine operations are locked, but handler-level mutations are not +- **Two-phase risk**: Handler mutation + Engine operation both need protection + +--- + +## 5. RECONNECTION AND RECOVERY + +### Game State Recovery Handler + +```python +@sio.event +async def request_game_state(sid, data): + """Client requests full game state (recovery after disconnect)""" + game_id_str = data.get("game_id") + game_id = UUID(game_id_str) + + # Try memory first + state = state_manager.get_state(game_id) + + # If not in memory, recover from database + if not state: + logger.info(f"Game {game_id} not in memory, recovering from database") + state = await state_manager.recover_game(game_id) + + if state: + await manager.emit_to_user(sid, "game_state", state.model_dump(mode="json")) + else: + await manager.emit_to_user(sid, "error", {"message": f"Game not found"}) +``` + +**Status**: ✅ Recovery mechanism exists + +**However**: + +### Issue #4 - No Explicit Reconnection Handling +- Socket.io supports client reconnection automatically, but no handler for it +- When client reconnects with same session: + - New socket connection establishes + - Old session ID may still be in `user_sessions` + - **Duplicate session risk**: Same user_id mapped to two sids + +```python +async def connect(sid, environ, auth): + # ... + await manager.connect(sid, user_id) # <- Could overwrite old session +``` + +**Missing Code**: No check for existing sessions +```python +# MISSING: +if user_id in [uid for uid in self.user_sessions.values()]: + # Existing session detected - close old session? + # Or allow multiple sessions per user? +``` + +### Issue #5 - No Session Expiration +- `user_sessions` dictionary grows indefinitely +- If disconnect event doesn't fire (network failure), sessions accumulate +- After 1000 disconnected users: 1000 orphaned entries in memory + +--- + +## 6. MEMORY LEAK POTENTIAL + +### Issue #6 - Game Room Cleanup + +**Current cleanup in disconnect**: +```python +for game_id, sids in self.game_rooms.items(): + if sid in sids: + sids.remove(sid) # <- Leaves empty set in dict +``` + +**Problem**: Empty sets persist indefinitely +```python +# After all players leave game 123: +game_rooms = { + "123": set(), # <- Still in dict, wastes memory + "456": {sid1, sid2} +} +``` + +**Over time**: Hundreds of dead game rooms accumulate + +**Recommendation**: +```python +sids.discard(sid) +if not sids: # If set is now empty + del self.game_rooms[game_id] # Remove the game room +``` + +### Issue #7 - Event Listener Cleanup + +**Socket.io events are registered once at startup**: +```python +def register_handlers(sio: AsyncServer, manager: ConnectionManager) -> None: + @sio.event + async def connect(sid, environ, auth): + # Registered once, called many times +``` + +**Analysis**: ✅ No memory leak from event listeners (they're registered once as closures) + +**However**: Check if state_manager has internal leaks +- `_pending_decisions: dict[tuple, asyncio.Future]` - could hold stale futures +- `_last_access: dict[UUID, pendulum.DateTime]` - could grow indefinitely +- `_lineups: dict[UUID, dict[int, TeamLineupState]]` - needs cleanup when game ends + +--- + +## 7. GAME LIFECYCLE MANAGEMENT + +### Issue #8 - Resource Cleanup Not Guaranteed + +**Where games should cleanup**: +1. ✅ When game completes naturally (27 outs) +2. ❓ When game is manually ended +3. ❓ When user force-closes connection +4. ❓ After idle timeout + +**Current cleanup in game_engine.py**: +```python +async def _cleanup_game_resources(game_id: UUID): + if game_id in self._rolls_this_inning: + del self._rolls_this_inning[game_id] + if game_id in self._game_locks: + del self._game_locks[game_id] +``` + +**Problem**: This cleanup is called only from game completion path +- No cleanup on unexpected disconnection +- No cleanup on timeout +- Game state remains in state_manager indefinitely + +**Recommendation**: +```python +# When game ends (any reason): +await game_engine._cleanup_game_resources(game_id) +await state_manager.remove_game(game_id) +await manager.cleanup_game_room(game_id) +``` + +### Missing endpoint: Force game cleanup + +**No REST endpoint or WebSocket event to cleanup a game** +- Useful for: admin actions, manual cleanup after crashes +- Should be admin-only +- Should emit notification to all players in room + +--- + +## 8. AUTHORIZATION GAPS + +### Issue #9 - Authorization TODOs + +Found 11 security TODOs indicating missing authorization checks: + +```python +# Line 90: join_game +# TODO: Verify user has access to game + +# Line 200: roll_dice +# TODO: Verify user is participant in this game + +# Line 293: submit_manual_outcome +# TODO: Verify user is active batter or authorized to submit + +# Lines 558, 723, 880: Substitution handlers +# TODO: Verify user is authorized to make substitution for this team +``` + +**Risk**: Currently **ANY connected user** can: +- Roll dice for any game +- Submit outcomes +- Make substitutions +- View any lineup +- View any box score + +**Should implement**: +```python +def _verify_game_access(sid: str, game_id: UUID) -> bool: + """Check if user in this session has access to game""" + user_id = manager.user_sessions.get(sid) + # Check against game participant list in DB + +def _verify_is_batter(sid: str, game_id: UUID) -> bool: + """Check if user is the current batter""" + +def _verify_is_pitcher(sid: str, game_id: UUID) -> bool: + """Check if user is the current pitcher""" +``` + +--- + +## 9. POTENTIAL RACE CONDITIONS + +### Race Condition Scenario #1: Concurrent Outcome Submissions + +``` +Timeline: +T1: Player1.submit_manual_outcome() - gets pending_manual_roll +T2: Player2.submit_manual_outcome() - gets SAME pending_manual_roll +T3: Player1 clears pending_manual_roll +T4: Player2 tries to clear it again - but it's already None +``` + +**Actual Problem**: Both players process the same roll! +- Play is resolved twice +- State is corrupted +- Both receive "outcome_accepted" + +**Fix**: Use lock in handlers +```python +async def submit_manual_outcome(sid, data): + lock = game_engine._get_game_lock(game_id) + async with lock: + # Safe: only one handler can be here at a time + state = state_manager.get_state(game_id) + if not state.pending_manual_roll: + # Safely reject +``` + +### Race Condition Scenario #2: Substitution During Play + +``` +Timeline: +T1: substitute_player() - modifies lineup +T2: resolve_play() - reads lineup (sees old player) +T3: substitute_player() completes - broadcasts change +T4: resolve_play() completes - broadcasts different player in play result +``` + +**Result**: Confusion about which player was involved + +### Race Condition Scenario #3: State Recovery During Active Play + +``` +Timeline: +T1: Player A submits outcome +T2: Client B reconnects +T3: Client B requests_game_state() +T4: state_manager.recover_game() loads from DB (sees old state) +T5: Client B receives outdated state while play is resolving +``` + +**Recovery timing issue**: Overlapping with active game operations + +--- + +## 10. BROADCAST PATTERN RISKS + +### Issue #10 - No Guarantees on Broadcast Delivery + +```python +async def broadcast_to_game(self, game_id: str, event: str, data: dict) -> None: + await self.sio.emit(event, data, room=game_id) + logger.debug(f"Broadcast {event} to game {game_id}") +``` + +**Characteristics**: +- No acknowledgment mechanism +- No retry logic +- No delivery confirmation +- Fire-and-forget + +**Risk**: Client doesn't receive "play_resolved" event +- State is updated on server +- Client never knows about it +- Client thinks outcome is still pending +- Timeout eventually kicks in + +**Mitigation**: Socket.io provides: +- Client-side event acknowledgment (ack callbacks) +- Currently not used +- Should implement for critical events + +--- + +## 11. HEARTBEAT AND KEEPALIVE + +### Heartbeat Implementation + +```python +@sio.event +async def heartbeat(sid): + """Handle heartbeat ping""" + await sio.emit("heartbeat_ack", {}, room=sid) +``` + +**Status**: ✅ Basic heartbeat exists + +**However**: +- No server-initiated heartbeat +- No timeout tracking +- No automatic cleanup of idle sessions +- Client must ping at regular intervals + +**Production Issue**: +- Disconnected clients (network issue) may not send disconnect event +- Sessions persist indefinitely +- After 1000 idle connections: significant memory waste + +**Recommendation**: +```python +# Server-side keepalive +async def maintain_heartbeats(): + while True: + await asyncio.sleep(30) # Every 30 seconds + # Check for stale sessions without recent activity + now = pendulum.now('UTC') + for sid, user_id in list(manager.user_sessions.items()): + last_activity = manager.last_activity.get(sid) + if last_activity and (now - last_activity).seconds > 300: + # 5 minutes idle - disconnect + await sio.disconnect(sid) +``` + +--- + +## 12. DATABASE INTERACTION RISKS + +### Issue #11 - Async DB Operations Without Proper Isolation + +```python +@sio.event +async def submit_manual_outcome(sid, data): + # ... + result = await game_engine.resolve_manual_play( + game_id=game_id, + ab_roll=ab_roll, + outcome=outcome, + hit_location=submission.hit_location, + ) + + # Updated state might lag behind DB + updated_state = state_manager.get_state(game_id) + if updated_state: + await manager.broadcast_to_game( + str(game_id), "game_state_update", updated_state.model_dump(mode="json") + ) +``` + +**Issue**: State broadcast happens synchronously, but DB write is async +- Broadcast might happen before DB write completes +- DB connection failure silently fails (logged only) +- Client has state but DB might not have it yet + +**Better approach**: +```python +result = await game_engine.resolve_manual_play(...) # Includes DB write +# Only then broadcast +await manager.broadcast_to_game(...) +``` + +--- + +## 13. SOCKET.IO CONFIGURATION + +### Current Configuration (main.py) +```python +sio = socketio.AsyncServer( + async_mode="asgi", + cors_allowed_origins=settings.cors_origins, + logger=True, + engineio_logger=False, +) +``` + +### Missing Configuration Options + +```python +# Should consider adding: +sio = socketio.AsyncServer( + async_mode="asgi", + cors_allowed_origins=settings.cors_origins, + logger=True, + engineio_logger=False, + + # Connection options + ping_timeout=60, # Disconnect after 60s no response to ping + ping_interval=25, # Send ping every 25 seconds + max_http_buffer_size=1e6, # 1MB max message size + + # Event handling + async_handlers=True, # Already default with async + + # Namespaces + # Could partition different game types into separate namespaces +) +``` + +**Without ping_timeout**: Zombie connections can persist indefinitely + +--- + +## 14. TEST COVERAGE + +### Current Tests +- **File**: `tests/unit/websocket/test_manual_outcome_handlers.py` +- **Status**: Unit tests with mocks for critical handlers + +### Test Coverage Analysis + +✅ **Covered**: +- `roll_dice` success path +- `roll_dice` missing game_id +- `roll_dice` invalid game_id +- `submit_manual_outcome` (partial) + +❌ **Not Covered**: +- Concurrent submissions (race condition tests) +- Connection cleanup +- Game room cleanup after disconnect +- Authorization verification (because TODOs indicate not implemented) +- Reconnection scenarios +- State recovery from DB +- All 15 handlers not fully tested + +**Recommendation**: Add integration tests for: +```python +@pytest.mark.asyncio +async def test_concurrent_outcome_submissions(): + """Two players submit outcomes simultaneously""" + # Create game + # Player 1 starts submit_manual_outcome + # Player 2 starts submit_manual_outcome + # Both should fail or one should win + +@pytest.mark.asyncio +async def test_connection_cleanup_on_disconnect(): + """Game room should be cleaned up after all players disconnect""" + # Connect 3 players to game 123 + # Disconnect all 3 + # Assert game_rooms doesn't contain empty set for game 123 +``` + +--- + +## 15. RECOMMENDATIONS + +### Priority 1: Critical (Production Risk) + +1. **Add Locking to Handler-Level State Mutations** + - Location: All handlers that modify state before calling game_engine + - Impact: Prevents race conditions in critical paths + - Effort: Medium + - Example: Wrap state mutation in game_engine locks + ```python + async def submit_manual_outcome(sid, data): + lock = game_engine._get_game_lock(game_id) + async with lock: + # Safe state modifications + ``` + +2. **Implement Authorization Checks** + - Location: All 11 TODO comments in handlers.py + - Impact: Prevents unauthorized game access + - Effort: Medium + - Add helper functions for permission verification + +3. **Add Session Expiration** + - Location: connection_manager.py, heartbeat handler + - Impact: Prevent memory leak from zombie sessions + - Effort: Low + - Track last_activity per sid, cleanup stale sessions + +### Priority 2: Important (Data Integrity) + +4. **Clean Up Empty Game Rooms** + - Location: disconnect() method in connection_manager + - Impact: Prevent memory leak + - Effort: Low + - Delete empty sets from game_rooms dict + +5. **Implement Game Cleanup Lifecycle** + - Location: game_engine.py + - Impact: Proper resource cleanup + - Effort: High + - Add cleanup calls to state_manager on game completion + +6. **Add Reconnection Handling** + - Location: connect handler + - Impact: Handle clients reconnecting with same user + - Effort: Medium + - Detect duplicate sessions, manage accordingly + +### Priority 3: Recommended (Reliability) + +7. **Implement Broadcast Acknowledgments** + - Location: broadcast_to_game calls + - Impact: Ensure critical events are delivered + - Effort: Medium + - Use Socket.io ack callbacks for game_state_update + +8. **Add Server-Initiated Keepalive** + - Location: Lifespan context manager or separate task + - Impact: Detect disconnected clients promptly + - Effort: Medium + - Periodic server-side health check + +9. **Configure Socket.io Ping Timeouts** + - Location: main.py socket initialization + - Impact: Automatic cleanup of dead connections + - Effort: Low + - Set ping_timeout=60, ping_interval=25 + +10. **Add Comprehensive Integration Tests** + - Location: tests/integration/websocket/ + - Impact: Prevent regressions + - Effort: High + - Test concurrent scenarios, recovery, cleanup + +### Priority 4: Enhancement (Performance) + +11. **Implement Namespace Separation** + - Partition different game instances into separate namespaces + - Reduces broadcast overhead + - Effort: High + +12. **Add Event Compression** + - For large game_state_update broadcasts + - Reduce network overhead + - Effort: Medium + +--- + +## 16. SUMMARY TABLE + +| Issue | Severity | Category | Impact | Fix Effort | +|-------|----------|----------|--------|-----------| +| State mutation race conditions | CRITICAL | Concurrency | Data corruption | Medium | +| Missing authorization | CRITICAL | Security | Unauthorized access | Medium | +| Session expiration | HIGH | Memory | Unbounded memory growth | Low | +| Empty game room cleanup | HIGH | Memory | Memory leak | Low | +| Game resource cleanup | HIGH | Lifecycle | Orphaned resources | High | +| Reconnection handling | HIGH | Reliability | Duplicate sessions | Medium | +| Broadcast delivery guarantees | MEDIUM | Reliability | Missed updates | Medium | +| Integration test coverage | MEDIUM | Testing | Undetected bugs | High | +| Socket.io config optimization | MEDIUM | Reliability | Long disconnection detection | Low | +| DB operation isolation | LOW | Data Integrity | Minor timing issues | Low | + +--- + +## 17. IMPLEMENTATION CHECKLIST + +### Phase 1: Critical Fixes (Week 1) +- [ ] Add locking to handler state mutations +- [ ] Implement authorization middleware +- [ ] Add session expiration logic +- [ ] Clean up empty game rooms + +### Phase 2: Important Fixes (Week 2-3) +- [ ] Implement game cleanup lifecycle +- [ ] Add reconnection handling +- [ ] Add Socket.io ping configuration +- [ ] Implement broadcast acknowledgments + +### Phase 3: Testing & Validation (Week 4) +- [ ] Add concurrency tests +- [ ] Add recovery tests +- [ ] Add cleanup verification tests +- [ ] Load testing with 100+ concurrent connections + +--- + +**Report Generated**: 2025-11-27 +**Analysis Tool**: Claude Code (File Search Specialist) +**Scope**: 2,643 lines analyzed across 3 core files + CLAUDE.md documentation diff --git a/backend/.env.dev b/backend/.env.dev new file mode 100644 index 0000000..ef28d7f --- /dev/null +++ b/backend/.env.dev @@ -0,0 +1,24 @@ +# Application +APP_ENV=development +DEBUG=true +SECRET_KEY=5yIKYt_x20Kk-9PrQVfRzDGc-RFT06vTgIGAz13I8pU + +# Database +DATABASE_URL=postgresql+asyncpg://paperdynasty:Snugly9-Gem-Configure@10.10.0.42:5432/paperdynasty_dev + +# Discord OAuth +DISCORD_CLIENT_ID=1441192438055178420 +DISCORD_CLIENT_SECRET=oj1OzUo4Qksph6mq57zb-qDiMxtb1n48 +DISCORD_REDIRECT_URI=http://localhost:3000/auth/callback +DISCORD_SERVER_REDIRECT_URI=http://localhost:8000/api/auth/discord/callback/server +FRONTEND_URL=http://localhost:3000 + +# League APIs +SBA_API_URL=https://api.sba.manticorum.com +SBA_API_KEY=Tp3aO3jhYve5NJF1IqOmJTmk +PD_API_URL=https://pd-api.example.com +PD_API_KEY=placeholder-pd-api-key + +# CORS +CORS_ORIGINS=["http://localhost:3000","http://localhost:3001"] +ALLOWED_DISCORD_IDS=258104532423147520,139926308644847616 diff --git a/backend/.env.prod b/backend/.env.prod new file mode 100644 index 0000000..b0b113d --- /dev/null +++ b/backend/.env.prod @@ -0,0 +1,24 @@ +# Application +APP_ENV=development +DEBUG=true +SECRET_KEY=5yIKYt_x20Kk-9PrQVfRzDGc-RFT06vTgIGAz13I8pU + +# Database +DATABASE_URL=postgresql+asyncpg://paperdynasty:Snugly9-Gem-Configure@10.10.0.42:5432/paperdynasty_dev + +# Discord OAuth +DISCORD_CLIENT_ID=1441192438055178420 +DISCORD_CLIENT_SECRET=oj1OzUo4Qksph6mq57zb-qDiMxtb1n48 +DISCORD_REDIRECT_URI=https://gameplay-demo.manticorum.com/auth/callback # Legacy +DISCORD_SERVER_REDIRECT_URI=https://gameplay-demo.manticorum.com/api/auth/discord/callback/server +FRONTEND_URL=https://gameplay-demo.manticorum.com + +# League APIs +SBA_API_URL=https://api.sba.manticorum.com +SBA_API_KEY=Tp3aO3jhYve5NJF1IqOmJTmk +PD_API_URL=https://pd-api.example.com +PD_API_KEY=placeholder-pd-api-key + +# CORS +CORS_ORIGINS=["http://localhost:3000","http://localhost:3001","https://gameplay-demo.manticorum.com","https://gameplay-api-demo.manticorum.com"] +ALLOWED_DISCORD_IDS=258104532423147520,139926308644847616 diff --git a/backend/.python-version b/backend/.python-version deleted file mode 100644 index 24ee5b1..0000000 --- a/backend/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.13 diff --git a/backend/app/api/routes/games.py b/backend/app/api/routes/games.py index ebfede3..c79ef12 100644 --- a/backend/app/api/routes/games.py +++ b/backend/app/api/routes/games.py @@ -8,6 +8,7 @@ from app.core.game_engine import game_engine from app.core.state_manager import state_manager from app.database.operations import DatabaseOperations from app.services.lineup_service import lineup_service +from app.services.sba_api_client import sba_api_client logger = logging.getLogger(f"{__name__}.games") @@ -15,13 +16,22 @@ router = APIRouter() class GameListItem(BaseModel): - """Game list item model""" + """Game list item model with enriched team and game state info""" game_id: str league_id: str status: str home_team_id: int away_team_id: int + # Enriched fields + home_team_name: str | None = None + away_team_name: str | None = None + home_team_abbrev: str | None = None + away_team_abbrev: str | None = None + home_score: int = 0 + away_score: int = 0 + inning: int | None = None + half: str | None = None # 'top' or 'bottom' class CreateGameRequest(BaseModel): @@ -152,16 +162,14 @@ class SubmitLineupsResponse(BaseModel): @router.get("/", response_model=list[GameListItem]) async def list_games(): """ - List all games from the database + List all games from the database with enriched team and game state info. - Returns basic game information for all games in the system. + Returns game information including team names, scores, and current inning. TODO: Add user filtering, pagination, and more sophisticated queries """ try: logger.info("Fetching games list from database") - db_ops = DatabaseOperations() - # Get all games from database (for now - later we can add filters) from app.database.session import AsyncSessionLocal from app.models.db_models import Game @@ -173,17 +181,38 @@ async def list_games(): ) games = result.scalars().all() - # Convert to response model - game_list = [ - GameListItem( - game_id=str(game.id), - league_id=game.league_id, - status=game.status, - home_team_id=game.home_team_id, - away_team_id=game.away_team_id, + # Collect unique team IDs for batch lookup + team_ids = set() + for game in games: + team_ids.add(game.home_team_id) + team_ids.add(game.away_team_id) + + # Fetch team data (uses cache) + teams_data = await sba_api_client.get_teams_by_ids(list(team_ids)) + + # Convert to response model with enriched data + game_list = [] + for game in games: + home_team = teams_data.get(game.home_team_id, {}) + away_team = teams_data.get(game.away_team_id, {}) + + game_list.append( + GameListItem( + game_id=str(game.id), + league_id=game.league_id, + status=game.status, + home_team_id=game.home_team_id, + away_team_id=game.away_team_id, + home_team_name=home_team.get("lname"), + away_team_name=away_team.get("lname"), + home_team_abbrev=home_team.get("abbrev"), + away_team_abbrev=away_team.get("abbrev"), + home_score=game.home_score or 0, + away_score=game.away_score or 0, + inning=game.current_inning, + half=game.current_half, + ) ) - for game in games - ] logger.info(f"Retrieved {len(game_list)} games from database") return game_list diff --git a/backend/app/core/game_engine.py b/backend/app/core/game_engine.py index 86ba98f..da33a1e 100644 --- a/backend/app/core/game_engine.py +++ b/backend/app/core/game_engine.py @@ -521,6 +521,12 @@ class GameEngine: # Capture state before applying result outs_before = state.outs # Capture BEFORE _apply_play_result modifies it + # Capture runners BEFORE _apply_play_result modifies them + runners_before = { + "on_first_id": state.on_first.lineup_id if state.on_first else None, + "on_second_id": state.on_second.lineup_id if state.on_second else None, + "on_third_id": state.on_third.lineup_id if state.on_third else None, + } state_before = { "inning": state.inning, "half": state.half, @@ -539,7 +545,13 @@ class GameEngine: db_ops_tx = DatabaseOperations(session) # Save play to DB (uses snapshot from GameState) - await self._save_play_to_db(state, result, outs_before=outs_before, db_ops=db_ops_tx) + await self._save_play_to_db( + state, + result, + outs_before=outs_before, + runners_before=runners_before, + db_ops=db_ops_tx, + ) # Update game state in DB only if something changed if ( @@ -900,12 +912,14 @@ class GameEngine: state.away_team_batter_idx = (current_idx + 1) % 9 batting_team = state.away_team_id fielding_team = state.home_team_id + logger.debug(f"_prepare_next_play: AWAY team batting, idx {current_idx} → {state.away_team_batter_idx}") else: # Home team batting current_idx = state.home_team_batter_idx state.home_team_batter_idx = (current_idx + 1) % 9 batting_team = state.home_team_id fielding_team = state.away_team_id + logger.debug(f"_prepare_next_play: HOME team batting, idx {current_idx} → {state.home_team_batter_idx}") # Try to get lineups from cache first, only fetch from DB if not cached from app.models.game_models import LineupPlayerState @@ -1037,6 +1051,7 @@ class GameEngine: state: GameState, result: PlayResult, outs_before: int, + runners_before: dict[str, int | None], db_ops: DatabaseOperations | None = None, ) -> None: """ @@ -1048,6 +1063,7 @@ class GameEngine: state: Current game state result: Play result to save outs_before: Number of outs BEFORE this play (captured before _apply_play_result) + runners_before: Dict with runner IDs BEFORE play (on_first_id, on_second_id, on_third_id) db_ops: Optional DatabaseOperations for transaction grouping Raises: @@ -1077,10 +1093,10 @@ class GameEngine: f"Game {state.game_id} may need _prepare_next_play() called after recovery." ) - # Runners on base BEFORE play (from state.on_first/second/third) - on_first_id = state.on_first.lineup_id if state.on_first else None - on_second_id = state.on_second.lineup_id if state.on_second else None - on_third_id = state.on_third.lineup_id if state.on_third else None + # Runners on base BEFORE play (captured before _apply_play_result modifies state) + on_first_id = runners_before["on_first_id"] + on_second_id = runners_before["on_second_id"] + on_third_id = runners_before["on_third_id"] # Runners AFTER play (from result.runners_advanced) # Build dict of from_base -> to_base for quick lookup diff --git a/backend/app/services/sba_api_client.py b/backend/app/services/sba_api_client.py index 5097ba0..2153cc6 100644 --- a/backend/app/services/sba_api_client.py +++ b/backend/app/services/sba_api_client.py @@ -12,6 +12,7 @@ import logging from typing import Any import httpx +import pendulum from app.config import get_settings from app.models.player_models import SbaPlayer @@ -19,6 +20,11 @@ from app.models.player_models import SbaPlayer logger = logging.getLogger(f"{__name__}.SbaApiClient") settings = get_settings() +# Module-level team cache (team_id -> team_data) +_team_cache: dict[int, dict[str, Any]] = {} +_team_cache_expiry: pendulum.DateTime | None = None +_CACHE_TTL_HOURS = 1 + class SbaApiClient: """Client for SBA API player and team data lookups.""" @@ -92,6 +98,57 @@ class SbaApiClient: logger.error(f"Unexpected error fetching teams: {e}") raise + async def get_team_by_id(self, team_id: int, season: int = 3) -> dict[str, Any] | None: + """ + Get a single team by ID, using cache when available. + + Args: + team_id: Team ID to look up + season: Season number (default: 3) + + Returns: + Team dictionary with id, lname, abbrev, etc. or None if not found + + Example: + team = await client.get_team_by_id(35) + print(team['lname']) # "Cardinals" + """ + global _team_cache, _team_cache_expiry + + now = pendulum.now("UTC") + + # Check if cache is valid + if _team_cache_expiry is None or now > _team_cache_expiry: + # Refresh cache + try: + teams = await self.get_teams(season=season, active_only=False) + _team_cache = {t["id"]: t for t in teams} + _team_cache_expiry = now.add(hours=_CACHE_TTL_HOURS) + logger.info(f"Refreshed team cache with {len(_team_cache)} teams") + except Exception as e: + logger.warning(f"Failed to refresh team cache: {e}") + # Continue with stale cache if available + + return _team_cache.get(team_id) + + async def get_teams_by_ids(self, team_ids: list[int], season: int = 3) -> dict[int, dict[str, Any]]: + """ + Get multiple teams by ID, using cache. + + Args: + team_ids: List of team IDs to look up + season: Season number (default: 3) + + Returns: + Dictionary mapping team_id to team data + """ + result = {} + for team_id in team_ids: + team = await self.get_team_by_id(team_id, season) + if team: + result[team_id] = team + return result + async def get_player(self, player_id: int) -> SbaPlayer: """ Fetch player data from SBA API. diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml new file mode 100644 index 0000000..1b4d095 --- /dev/null +++ b/docker-compose.dev.yml @@ -0,0 +1,26 @@ +# Development overrides - hot-reload enabled +# Usage: docker compose -f docker-compose.yml -f docker-compose.dev.yml up + +services: + backend: + build: + target: development + environment: + - APP_ENV=development + - DEBUG=true + volumes: + # Mount source code for hot-reload (uvicorn --reload) + - ./backend/app:/app/app + - ./backend/logs:/app/logs + + frontend-sba: + build: + target: development + environment: + - NODE_ENV=development + volumes: + # Mount source for Nuxt hot-reload + - ./frontend-sba:/app + - /app/node_modules + - /app/.nuxt + - /app/.output diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index c87af15..2db3c5e 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -1,4 +1,4 @@ -# Production overrides for Paper Dynasty Game Engine +# Production overrides - optimized builds # Usage: docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d services: @@ -16,13 +16,11 @@ services: environment: - NODE_ENV=production restart: always - - frontend-pd: - build: - target: production - environment: - - NODE_ENV=production - restart: always + healthcheck: + test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000', (r) => {process.exit(r.statusCode === 200 ? 0 : 1)})"] + interval: 30s + timeout: 10s + retries: 3 redis: restart: always diff --git a/docker-compose.yml b/docker-compose.yml index 12a6752..287959b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,9 +1,12 @@ -# Paper Dynasty Game Engine - Full Stack Orchestration -# Use this for integration testing, demos, or when you want everything containerized -# For daily dev, see README.md for local development workflow +# Paper Dynasty Game Engine - Base Configuration +# +# Usage: +# ./start.sh dev - Development with hot-reload +# ./start.sh prod - Production build +# ./start.sh stop - Stop all services services: - # Redis cache (shared dependency) + # Redis cache (required for OAuth state) redis: image: redis:7-alpine ports: @@ -24,49 +27,13 @@ services: dockerfile: Dockerfile ports: - "8000:8000" + env_file: + - ./backend/.env environment: - # Application - - APP_ENV=development - - DEBUG=true - - SECRET_KEY=${SECRET_KEY:-dev-secret-key-change-in-production} - - # Database (using host machine's PostgreSQL) - - DATABASE_URL=${DATABASE_URL} - - # Redis - REDIS_URL=redis://redis:6379 - - # Discord OAuth - - DISCORD_CLIENT_ID=${DISCORD_CLIENT_ID} - - DISCORD_CLIENT_SECRET=${DISCORD_CLIENT_SECRET} - - DISCORD_REDIRECT_URI=${DISCORD_REDIRECT_URI:-http://localhost:3000/auth/callback} - - # League APIs - - SBA_API_URL=${SBA_API_URL} - - SBA_API_KEY=${SBA_API_KEY} - - PD_API_URL=${PD_API_URL} - - PD_API_KEY=${PD_API_KEY} - - # CORS - - CORS_ORIGINS=http://localhost:3000,http://localhost:3001 - - # Server-side OAuth - - FRONTEND_URL=${FRONTEND_URL:-http://localhost:3000} - - DISCORD_SERVER_REDIRECT_URI=${DISCORD_SERVER_REDIRECT_URI:-http://localhost:8000/api/auth/discord/callback/server} - - # Access Control - - ALLOWED_DISCORD_IDS=${ALLOWED_DISCORD_IDS:-} - - # WebSocket Settings - - WS_HEARTBEAT_INTERVAL=${WS_HEARTBEAT_INTERVAL:-30} - - WS_CONNECTION_TIMEOUT=${WS_CONNECTION_TIMEOUT:-60} depends_on: redis: condition: service_healthy - volumes: - # Mount source code for hot-reload during development - - ./backend/app:/app/app:ro - - ./backend/logs:/app/logs restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/api/health"] @@ -81,47 +48,16 @@ services: dockerfile: Dockerfile ports: - "3000:3000" + env_file: + - ./frontend-sba/.env environment: - NUXT_PUBLIC_LEAGUE_ID=sba - NUXT_PUBLIC_LEAGUE_NAME=Stratomatic Baseball Association - - NUXT_PUBLIC_API_URL=http://localhost:8000 - - NUXT_PUBLIC_WS_URL=http://localhost:8000 - - NUXT_PUBLIC_DISCORD_CLIENT_ID=${DISCORD_CLIENT_ID} - - NUXT_PUBLIC_DISCORD_REDIRECT_URI=http://localhost:3000/auth/callback - - NUXT_ALLOWED_HOSTS=${NUXT_ALLOWED_HOSTS:-localhost,127.0.0.1} + # Internal URL for SSR fetches within Docker network + - NUXT_API_URL_INTERNAL=http://backend:8000 depends_on: backend: condition: service_healthy - volumes: - # Mount source for hot-reload - - ./frontend-sba:/app - - /app/node_modules - - /app/.nuxt - restart: unless-stopped - - # PD League Frontend - frontend-pd: - build: - context: ./frontend-pd - dockerfile: Dockerfile - ports: - - "3001:3001" - environment: - - NUXT_PUBLIC_LEAGUE_ID=pd - - NUXT_PUBLIC_LEAGUE_NAME=Paper Dynasty - - NUXT_PUBLIC_API_URL=http://localhost:8000 - - NUXT_PUBLIC_WS_URL=http://localhost:8000 - - NUXT_PUBLIC_DISCORD_CLIENT_ID=${DISCORD_CLIENT_ID} - - NUXT_PUBLIC_DISCORD_REDIRECT_URI=http://localhost:3001/auth/callback - - NUXT_ALLOWED_HOSTS=${NUXT_ALLOWED_HOSTS:-localhost,127.0.0.1} - depends_on: - backend: - condition: service_healthy - volumes: - # Mount source for hot-reload - - ./frontend-pd:/app - - /app/node_modules - - /app/.nuxt restart: unless-stopped volumes: @@ -129,4 +65,4 @@ volumes: networks: default: - name: paperdynasty-network \ No newline at end of file + name: paperdynasty-network diff --git a/frontend-pd/Dockerfile b/frontend-pd/Dockerfile index 178862a..af88a73 100644 --- a/frontend-pd/Dockerfile +++ b/frontend-pd/Dockerfile @@ -1,7 +1,7 @@ # Frontend Dockerfile for PD League # Multi-stage build for optimized production image -FROM node:18-alpine as base +FROM node:22-alpine as base # Set working directory WORKDIR /app diff --git a/frontend-pd/plugins/socket.client.ts b/frontend-pd/plugins/socket.client.ts index 49d0ba4..8cbd396 100644 --- a/frontend-pd/plugins/socket.client.ts +++ b/frontend-pd/plugins/socket.client.ts @@ -7,7 +7,7 @@ export default defineNuxtPlugin((nuxtApp) => { const connect = (token: string) => { if (socket?.connected) return socket - socket = io(config.public.wsUrl, { + socket = io(config.public.wsUrl as string, { auth: { token }, reconnection: true, reconnectionDelay: 1000, diff --git a/frontend-sba/.claude/NUXT4_BREAKING_CHANGES.md b/frontend-sba/.claude/NUXT4_BREAKING_CHANGES.md index eb18d1a..19307d3 100644 --- a/frontend-sba/.claude/NUXT4_BREAKING_CHANGES.md +++ b/frontend-sba/.claude/NUXT4_BREAKING_CHANGES.md @@ -199,4 +199,41 @@ This document relates to: --- +## 🚨 CRITICAL: No `app/` Directory in Project Root + +**Added**: 2025-12-03 + +In Nuxt 4, the `app/` directory is a **reserved special directory** that takes precedence over root-level files like `app.vue`. + +### The Problem + +If an `app/app.vue` file exists, Nuxt will use it instead of the root `app.vue`, even with `srcDir: '.'` configured. This can cause unexpected behavior like the default `NuxtWelcome` component rendering instead of your actual pages. + +### What To Avoid + +``` +frontend-sba/ +├── app/ ← ❌ DO NOT CREATE THIS +│ └── app.vue ← Will override root app.vue! +├── app.vue ← ✅ Your actual app entry point +├── pages/ +│ └── index.vue +└── nuxt.config.ts +``` + +### If You See NuxtWelcome in Production + +1. Check if `app/` directory exists: `ls -la app/` +2. If it contains `app.vue` with ``, delete the entire directory: + ```bash + rm -rf app/ + ``` +3. Rebuild: `./start.sh rebuild prod` + +### Why This Happens + +Nuxt 4's initialization (`npx nuxi init`) creates an `app/app.vue` with the default welcome component. If you then set `srcDir: '.'` to use root-level directories, both files exist and `app/` takes priority. + +--- + **Always import your stores explicitly!** diff --git a/frontend-sba/.dockerignore b/frontend-sba/.dockerignore index 8b0d10c..9637f91 100644 --- a/frontend-sba/.dockerignore +++ b/frontend-sba/.dockerignore @@ -49,4 +49,7 @@ Thumbs.db # Nuxt specific .nuxt/ .output/ -nuxt.d.ts \ No newline at end of file +nuxt.d.ts + +# Nuxt 4: Exclude app/ directory (conflicts with root app.vue) +app/ \ No newline at end of file diff --git a/frontend-sba/.gitignore b/frontend-sba/.gitignore index 4a7f73a..a52b671 100644 --- a/frontend-sba/.gitignore +++ b/frontend-sba/.gitignore @@ -6,6 +6,10 @@ .cache dist +# Nuxt 4: Prevent accidental app/ directory (conflicts with root app.vue) +# See .claude/NUXT4_BREAKING_CHANGES.md for details +app/ + # Node dependencies node_modules diff --git a/frontend-sba/CLAUDE.md b/frontend-sba/CLAUDE.md index 634f77f..85d9229 100644 --- a/frontend-sba/CLAUDE.md +++ b/frontend-sba/CLAUDE.md @@ -10,6 +10,8 @@ Vue 3 + Nuxt 3 frontend for the SBA league. Real-time game interface with WebSoc **MUST READ**: `.claude/NUXT4_BREAKING_CHANGES.md` +### 1. Explicit Store Imports Required + All Pinia stores MUST be explicitly imported: ```typescript @@ -21,6 +23,10 @@ import { useAuthStore } from '~/store/auth' const authStore = useAuthStore() ``` +### 2. No `app/` Directory Allowed + +**CRITICAL**: Do NOT create an `app/` directory in the frontend root. Nuxt 4 treats this as a special directory that overrides `app.vue`. If NuxtWelcome appears instead of your app, delete `app/` and rebuild. + ## Project Structure ``` diff --git a/frontend-sba/Dockerfile b/frontend-sba/Dockerfile index 73e75da..a430f4b 100644 --- a/frontend-sba/Dockerfile +++ b/frontend-sba/Dockerfile @@ -1,7 +1,7 @@ # Frontend Dockerfile for SBA League # Multi-stage build for optimized production image -FROM node:18-alpine as base +FROM node:22-alpine as base # Set working directory WORKDIR /app @@ -45,22 +45,21 @@ RUN npm run build # Production stage FROM base as production +# Create non-root user FIRST (before copying files) +RUN addgroup -g 1001 -S nodejs && \ + adduser -S nuxt -u 1001 + # Set production environment ENV NODE_ENV=production -# Copy package files -COPY package*.json ./ +# Set ownership during COPY (much faster than chown -R) +COPY --chown=nuxt:nodejs package*.json ./ # Install production dependencies only -RUN npm ci --omit=dev +RUN npm ci --omit=dev && chown -R nuxt:nodejs node_modules -# Copy built application from builder -COPY --from=builder /app/.output /app/.output - -# Create non-root user -RUN addgroup -g 1001 -S nodejs && \ - adduser -S nuxt -u 1001 && \ - chown -R nuxt:nodejs /app +# Copy built application with correct ownership (avoids slow chown -R) +COPY --from=builder --chown=nuxt:nodejs /app/.output /app/.output # Switch to non-root user USER nuxt diff --git a/frontend-sba/app.vue b/frontend-sba/app.vue index 2b1be09..f8eacfa 100644 --- a/frontend-sba/app.vue +++ b/frontend-sba/app.vue @@ -1,5 +1,5 @@ diff --git a/frontend-sba/app/app.vue b/frontend-sba/app/app.vue deleted file mode 100644 index 09f935b..0000000 --- a/frontend-sba/app/app.vue +++ /dev/null @@ -1,6 +0,0 @@ - diff --git a/frontend-sba/components/Game/CurrentSituation.vue b/frontend-sba/components/Game/CurrentSituation.vue index 55952e7..ce6b6fc 100644 --- a/frontend-sba/components/Game/CurrentSituation.vue +++ b/frontend-sba/components/Game/CurrentSituation.vue @@ -168,7 +168,7 @@ diff --git a/frontend-sba/plugins/socket.client.ts.disabled b/frontend-sba/plugins/socket.client.ts.disabled new file mode 100644 index 0000000..ba0ab4b --- /dev/null +++ b/frontend-sba/plugins/socket.client.ts.disabled @@ -0,0 +1,49 @@ +import type { Socket } from 'socket.io-client'; +import { io } from 'socket.io-client' + +export default defineNuxtPlugin((nuxtApp) => { + const config = useRuntimeConfig() + let socket: Socket | null = null + + const connect = (token: string) => { + if (socket?.connected) return socket + + socket = io(config.public.wsUrl, { + auth: { token }, + reconnection: true, + reconnectionDelay: 1000, + reconnectionAttempts: 5 + }) + + socket.on('connect', () => { + console.log('WebSocket connected') + }) + + socket.on('disconnect', () => { + console.log('WebSocket disconnected') + }) + + socket.on('connect_error', (error) => { + console.error('WebSocket connection error:', error) + }) + + return socket + } + + const disconnect = () => { + socket?.disconnect() + socket = null + } + + return { + provide: { + socket: { + connect, + disconnect, + get instance() { + return socket + } + } + } + } +}) \ No newline at end of file diff --git a/frontend-sba/store/game.ts b/frontend-sba/store/game.ts index 055c825..4003106 100644 --- a/frontend-sba/store/game.ts +++ b/frontend-sba/store/game.ts @@ -145,6 +145,15 @@ export const useGameStore = defineStore('game', () => { * Set complete game state (from server) */ function setGameState(state: GameState) { + const oldBatter = gameState.value?.current_batter + const newBatter = state?.current_batter + const oldBatterInfo = oldBatter + ? `lineup_id=${oldBatter.lineup_id}, batting_order=${oldBatter.batting_order}` + : 'None' + const newBatterInfo = newBatter + ? `lineup_id=${newBatter.lineup_id}, batting_order=${newBatter.batting_order}` + : 'None' + console.log('[GameStore] setGameState - current_batter:', oldBatterInfo, '->', newBatterInfo) gameState.value = state error.value = null } diff --git a/scripts/env-switch.sh b/scripts/env-switch.sh new file mode 100755 index 0000000..11080e4 --- /dev/null +++ b/scripts/env-switch.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Switch between dev and prod environments +# Usage: ./scripts/env-switch.sh [dev|prod] + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" + +ENV=${1:-dev} + +if [[ "$ENV" != "dev" && "$ENV" != "prod" ]]; then + echo "Usage: $0 [dev|prod]" + echo " dev - localhost URLs for local development" + echo " prod - gameplay-demo.manticorum.com URLs" + exit 1 +fi + +echo "Switching to $ENV environment..." + +# Backend +if [[ -f "$PROJECT_ROOT/backend/.env.$ENV" ]]; then + cp "$PROJECT_ROOT/backend/.env.$ENV" "$PROJECT_ROOT/backend/.env" + echo " ✓ backend/.env -> $ENV" +else + echo " ✗ backend/.env.$ENV not found" + exit 1 +fi + +# Frontend SBA +if [[ -f "$PROJECT_ROOT/frontend-sba/.env.$ENV" ]]; then + cp "$PROJECT_ROOT/frontend-sba/.env.$ENV" "$PROJECT_ROOT/frontend-sba/.env" + echo " ✓ frontend-sba/.env -> $ENV" +else + echo " ✗ frontend-sba/.env.$ENV not found" + exit 1 +fi + +# Frontend PD (if exists) +if [[ -f "$PROJECT_ROOT/frontend-pd/.env.$ENV" ]]; then + cp "$PROJECT_ROOT/frontend-pd/.env.$ENV" "$PROJECT_ROOT/frontend-pd/.env" + echo " ✓ frontend-pd/.env -> $ENV" +fi + +echo "" +echo "Environment switched to: $ENV" + +if [[ "$ENV" == "dev" ]]; then + echo "" + echo "Local URLs:" + echo " Backend: http://localhost:8000" + echo " Frontend SBA: http://localhost:3000" + echo " Frontend PD: http://localhost:3001" + echo "" + echo "Start manually:" + echo " cd backend && uv run python -m app.main" + echo " cd frontend-sba && npm run dev" + echo "" + echo "Or with Docker:" + echo " docker compose up" +else + echo "" + echo "Production URLs:" + echo " https://gameplay-demo.manticorum.com" + echo "" + echo "Start manually:" + echo " cd backend && uv run python -m app.main" + echo " cd frontend-sba && npm run dev" + echo "" + echo "Or with Docker:" + echo " docker compose up -d" +fi diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..731e3ca --- /dev/null +++ b/start.sh @@ -0,0 +1,255 @@ +#!/bin/bash +# +# Paper Dynasty Game Engine - Single Command Startup +# +# Usage: +# ./start.sh dev Start in development mode (hot-reload) +# ./start.sh prod Start in production mode +# ./start.sh stop Stop all services +# ./start.sh logs Tail all logs +# ./start.sh rebuild Force rebuild and start +# ./start.sh status Show service status +# +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +print_status() { echo -e "${BLUE}[INFO]${NC} $1"; } +print_success() { echo -e "${GREEN}[OK]${NC} $1"; } +print_warning() { echo -e "${YELLOW}[WARN]${NC} $1"; } +print_error() { echo -e "${RED}[ERROR]${NC} $1"; } + +# Check Docker is running +check_docker() { + if ! docker info > /dev/null 2>&1; then + print_error "Docker is not running. Please start Docker Desktop." + exit 1 + fi + print_success "Docker is running" +} + +# Check required env files exist +check_env_files() { + local missing=0 + + if [[ ! -f "backend/.env" ]]; then + print_error "backend/.env not found" + print_status " Run: cp backend/.env.example backend/.env" + missing=1 + fi + + if [[ ! -f "frontend-sba/.env" ]]; then + print_error "frontend-sba/.env not found" + print_status " Run: cp frontend-sba/.env.example frontend-sba/.env" + missing=1 + fi + + if [[ $missing -eq 1 ]]; then + echo "" + print_status "Quick fix: ./scripts/env-switch.sh dev" + exit 1 + fi + + print_success "Environment files found" +} + +# Wait for service to be healthy +wait_for_health() { + local service=$1 + local max_wait=60 + local waited=0 + + print_status "Waiting for $service to be healthy..." + + while [[ $waited -lt $max_wait ]]; do + if docker compose ps "$service" 2>/dev/null | grep -q "healthy"; then + print_success "$service is healthy" + return 0 + fi + sleep 2 + waited=$((waited + 2)) + done + + print_error "$service failed to become healthy after ${max_wait}s" + docker compose logs "$service" --tail 20 + return 1 +} + +# Start in development mode +start_dev() { + print_status "Starting in DEVELOPMENT mode..." + echo "" + + check_docker + check_env_files + + # Switch to dev env if not already + if grep -q "gameplay-demo.manticorum.com" backend/.env 2>/dev/null; then + print_warning "Detected production .env - switching to dev" + ./scripts/env-switch.sh dev + fi + + print_status "Building and starting containers..." + docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --build + + echo "" + wait_for_health "redis" + wait_for_health "backend" + + echo "" + print_success "All services started!" + echo "" + echo -e "${GREEN}========================================${NC}" + echo -e "${GREEN} Development Environment Ready${NC}" + echo -e "${GREEN}========================================${NC}" + echo "" + echo " Backend API: http://localhost:8000" + echo " Frontend: http://localhost:3000" + echo " API Docs: http://localhost:8000/docs" + echo "" + echo " Hot-reload is enabled for both backend and frontend." + echo "" + echo " Commands:" + echo " ./start.sh logs View logs" + echo " ./start.sh stop Stop services" + echo "" +} + +# Start in production mode +start_prod() { + print_status "Starting in PRODUCTION mode..." + echo "" + + check_docker + check_env_files + + # Switch to prod env if not already + if grep -q "localhost:8000" backend/.env 2>/dev/null; then + print_warning "Detected dev .env - switching to prod" + ./scripts/env-switch.sh prod + fi + + print_status "Building and starting containers..." + docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d --build + + echo "" + wait_for_health "redis" + wait_for_health "backend" + + echo "" + print_success "All services started!" + echo "" + echo -e "${GREEN}========================================${NC}" + echo -e "${GREEN} Production Environment Ready${NC}" + echo -e "${GREEN}========================================${NC}" + echo "" + echo " Public URL: https://gameplay-demo.manticorum.com" + echo "" + echo " Commands:" + echo " ./start.sh logs View logs" + echo " ./start.sh stop Stop services" + echo "" +} + +# Stop all services +stop_services() { + print_status "Stopping all services..." + docker compose down --remove-orphans + print_success "All services stopped" +} + +# Show logs +show_logs() { + docker compose logs -f +} + +# Force rebuild +rebuild() { + local mode=${1:-dev} + print_status "Force rebuilding containers..." + docker compose down --remove-orphans + + if [[ "$mode" == "prod" ]]; then + docker compose -f docker-compose.yml -f docker-compose.prod.yml build --no-cache + start_prod + else + docker compose -f docker-compose.yml -f docker-compose.dev.yml build --no-cache + start_dev + fi +} + +# Show status +show_status() { + print_status "Service Status:" + echo "" + docker compose ps + echo "" + + # Quick health check + if curl -sf http://localhost:8000/api/health > /dev/null 2>&1; then + print_success "Backend: responding" + else + print_error "Backend: not responding" + fi + + if curl -sf http://localhost:3000 > /dev/null 2>&1; then + print_success "Frontend: responding" + else + print_error "Frontend: not responding" + fi +} + +# Show usage +show_usage() { + echo "Paper Dynasty Game Engine" + echo "" + echo "Usage: ./start.sh " + echo "" + echo "Commands:" + echo " dev Start in development mode (hot-reload enabled)" + echo " prod Start in production mode (optimized build)" + echo " stop Stop all services" + echo " logs Tail logs from all services" + echo " rebuild Force rebuild containers (add 'prod' for production)" + echo " status Show service status and health" + echo "" + echo "Examples:" + echo " ./start.sh dev # Start development environment" + echo " ./start.sh prod # Start production environment" + echo " ./start.sh rebuild prod # Force rebuild production" + echo "" +} + +# Main +case "${1:-}" in + dev) + start_dev + ;; + prod) + start_prod + ;; + stop) + stop_services + ;; + logs) + show_logs + ;; + rebuild) + rebuild "${2:-dev}" + ;; + status) + show_status + ;; + *) + show_usage + exit 1 + ;; +esac