Frontend UX improvements: - Single-click Discord OAuth from home page (no intermediate /auth page) - Auto-redirect authenticated users from home to /games - Fixed Nuxt layout system - app.vue now wraps NuxtPage with NuxtLayout - Games page now has proper card container with shadow/border styling - Layout header includes working logout with API cookie clearing Games list enhancements: - Display team names (lname) instead of just team IDs - Show current score for each team - Show inning indicator (Top/Bot X) for active games - Responsive header with wrapped buttons on mobile Backend improvements: - Added team caching to SbaApiClient (1-hour TTL) - Enhanced GameListItem with team names, scores, inning data - Games endpoint now enriches response with SBA API team data Docker optimizations: - Optimized Dockerfile using --chown flag on COPY (faster than chown -R) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
9.9 KiB
9.9 KiB
Plan 003: Idle Game Eviction
Priority: CRITICAL Effort: 1-2 hours Status: NOT STARTED Risk Level: HIGH - Memory leak / OOM
Problem Statement
The StateManager tracks _last_access[game_id] for each game but never uses it for eviction. Games remain in memory indefinitely, causing unbounded memory growth.
# backend/app/core/state_manager.py
self._last_access[game_id] = pendulum.now("UTC") # Tracked but never used!
Impact
- Memory: ~50KB per game × 1000 games = 50MB+ after a month
- Stability: OOM crash after days/weeks of operation
- Performance: Degraded performance as dictionaries grow
Files to Modify
| File | Changes |
|---|---|
backend/app/core/state_manager.py |
Add eviction logic |
backend/app/main.py |
Start background eviction task |
backend/app/config.py |
Add eviction configuration |
Implementation Steps
Step 1: Add Configuration (10 min)
Update backend/app/config.py:
class Settings(BaseSettings):
# ... existing settings ...
# Game eviction settings
game_idle_timeout_hours: int = 24 # Evict games idle > 24 hours
game_eviction_interval_minutes: int = 60 # Check every hour
game_max_in_memory: int = 500 # Hard limit on in-memory games
Step 2: Implement Eviction Logic (30 min)
Update backend/app/core/state_manager.py:
import pendulum
from app.config import settings
class StateManager:
# ... existing code ...
async def evict_idle_games(self) -> list[UUID]:
"""
Remove games that have been idle beyond the timeout threshold.
Returns list of evicted game IDs.
"""
now = pendulum.now("UTC")
timeout_seconds = settings.game_idle_timeout_hours * 3600
evicted = []
# Find idle games
for game_id, last_access in list(self._last_access.items()):
idle_seconds = (now - last_access).total_seconds()
if idle_seconds > timeout_seconds:
evicted.append(game_id)
# Evict them
for game_id in evicted:
await self._evict_game(game_id)
logger.info(f"Evicted idle game {game_id} (idle {idle_seconds/3600:.1f} hours)")
if evicted:
logger.info(f"Evicted {len(evicted)} idle games. Active: {len(self._games)}")
return evicted
async def _evict_game(self, game_id: UUID) -> None:
"""
Remove a single game from memory.
Persists final state to database before removal.
"""
# Persist final state
if game_id in self._games:
game_state = self._games[game_id]
try:
await db_ops.save_game_state(game_id, game_state)
logger.debug(f"Persisted game {game_id} before eviction")
except Exception as e:
logger.error(f"Failed to persist game {game_id}: {e}")
# Remove from all tracking dictionaries
self._games.pop(game_id, None)
self._lineups.pop(game_id, None)
self._last_access.pop(game_id, None)
self._game_locks.pop(game_id, None)
async def enforce_memory_limit(self) -> list[UUID]:
"""
Enforce hard limit on in-memory games.
Evicts oldest games if limit exceeded.
"""
if len(self._games) <= settings.game_max_in_memory:
return []
# Sort by last access time (oldest first)
sorted_games = sorted(
self._last_access.items(),
key=lambda x: x[1]
)
# Evict oldest until under limit
to_evict = len(self._games) - settings.game_max_in_memory
evicted = []
for game_id, _ in sorted_games[:to_evict]:
await self._evict_game(game_id)
evicted.append(game_id)
logger.warning(f"Force-evicted game {game_id} (memory limit)")
return evicted
def get_memory_stats(self) -> dict:
"""Return memory usage statistics."""
return {
"active_games": len(self._games),
"max_games": settings.game_max_in_memory,
"oldest_game_hours": self._get_oldest_game_age_hours(),
"total_lineups_cached": sum(len(l) for l in self._lineups.values())
}
def _get_oldest_game_age_hours(self) -> float:
if not self._last_access:
return 0.0
oldest = min(self._last_access.values())
return (pendulum.now("UTC") - oldest).total_seconds() / 3600
Step 3: Create Background Task (30 min)
Update backend/app/main.py:
import asyncio
from contextlib import asynccontextmanager
from app.core.state_manager import state_manager
from app.config import settings
# Background task handle
eviction_task: asyncio.Task | None = None
async def periodic_eviction():
"""Background task to periodically evict idle games."""
interval = settings.game_eviction_interval_minutes * 60
while True:
try:
await asyncio.sleep(interval)
# Run eviction
evicted = await state_manager.evict_idle_games()
# Enforce memory limit
force_evicted = await state_manager.enforce_memory_limit()
# Log stats
stats = state_manager.get_memory_stats()
logger.info(f"Memory stats: {stats}")
except asyncio.CancelledError:
logger.info("Eviction task cancelled")
break
except Exception as e:
logger.error(f"Eviction task error: {e}")
# Continue running despite errors
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan handler."""
global eviction_task
# Startup
logger.info("Starting background eviction task")
eviction_task = asyncio.create_task(periodic_eviction())
yield
# Shutdown
logger.info("Stopping background eviction task")
if eviction_task:
eviction_task.cancel()
try:
await eviction_task
except asyncio.CancelledError:
pass
app = FastAPI(lifespan=lifespan)
Step 4: Add Health Endpoint (15 min)
Add to backend/app/api/routes.py:
@router.get("/health/memory")
async def memory_health():
"""Return memory usage statistics."""
stats = state_manager.get_memory_stats()
# Determine health status
usage_pct = stats["active_games"] / stats["max_games"] * 100
if usage_pct > 90:
status = "critical"
elif usage_pct > 75:
status = "warning"
else:
status = "healthy"
return {
"status": status,
"usage_percent": round(usage_pct, 1),
**stats
}
Step 5: Write Tests (30 min)
Create backend/tests/unit/core/test_game_eviction.py:
import pytest
import pendulum
from uuid import uuid4
from unittest.mock import patch, AsyncMock
class TestGameEviction:
"""Tests for idle game eviction."""
@pytest.fixture
def state_manager(self):
from app.core.state_manager import StateManager
return StateManager()
@pytest.mark.asyncio
async def test_evict_idle_games_removes_old_games(self, state_manager):
"""Games idle beyond threshold are evicted."""
game_id = uuid4()
# Create game with old timestamp
state_manager._games[game_id] = MockGameState()
state_manager._last_access[game_id] = pendulum.now("UTC").subtract(hours=25)
with patch.object(state_manager, '_evict_game', new_callable=AsyncMock) as mock:
evicted = await state_manager.evict_idle_games()
assert game_id in evicted
mock.assert_called_once_with(game_id)
@pytest.mark.asyncio
async def test_evict_idle_games_keeps_active_games(self, state_manager):
"""Recently accessed games are not evicted."""
game_id = uuid4()
state_manager._games[game_id] = MockGameState()
state_manager._last_access[game_id] = pendulum.now("UTC").subtract(hours=1)
evicted = await state_manager.evict_idle_games()
assert game_id not in evicted
assert game_id in state_manager._games
@pytest.mark.asyncio
async def test_enforce_memory_limit_evicts_oldest(self, state_manager):
"""Oldest games evicted when memory limit exceeded."""
# Create games at different times
for i in range(10):
game_id = uuid4()
state_manager._games[game_id] = MockGameState()
state_manager._last_access[game_id] = pendulum.now("UTC").subtract(hours=i)
with patch.object(settings, 'game_max_in_memory', 5):
evicted = await state_manager.enforce_memory_limit()
assert len(evicted) == 5
assert len(state_manager._games) == 5
@pytest.mark.asyncio
async def test_evict_game_persists_state(self, state_manager):
"""Game state is persisted before eviction."""
game_id = uuid4()
game_state = MockGameState()
state_manager._games[game_id] = game_state
with patch('app.database.operations.db_ops.save_game_state', new_callable=AsyncMock) as mock:
await state_manager._evict_game(game_id)
mock.assert_called_once_with(game_id, game_state)
Verification Checklist
- Idle games are evicted after 24 hours
- Memory limit is enforced
- Game state is persisted before eviction
- Background task runs without errors
- Health endpoint shows accurate stats
- Tests pass
Monitoring
After deployment, monitor:
/health/memoryendpoint- Log messages for eviction events
- Memory usage of the process
Rollback Plan
If issues arise:
- Increase
game_idle_timeout_hoursto reduce evictions - Increase
game_max_in_memorylimit - Disable eviction task (comment out in lifespan)
Dependencies
- None (can be implemented independently)
Notes
- Consider adding WebSocket notification before eviction
- May want to add "extend session" API for active users
- Future: Add Redis-backed state for horizontal scaling