Database Infrastructure: - Added Alembic migration system (alembic.ini, env.py) - Migration 001: Initial schema - Migration 004: Stat materialized views (enhanced) - Migration 005: Composite indexes for performance - operations.py: Session injection support for test isolation - session.py: Enhanced session management Application Updates: - main.py: Integration with new database infrastructure - health.py: Enhanced health checks with pool monitoring Integration Tests: - conftest.py: Session injection pattern for reliable tests - test_operations.py: Database operations tests - test_migrations.py: Migration verification tests Session injection pattern enables: - Production: Auto-commit per operation - Testing: Shared session with automatic rollback - Transactions: Multiple ops, single commit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
205 lines
6.0 KiB
Python
205 lines
6.0 KiB
Python
import logging
|
|
|
|
import pendulum
|
|
from fastapi import APIRouter
|
|
|
|
from app.config import get_settings
|
|
|
|
logger = logging.getLogger(f"{__name__}.health")
|
|
|
|
router = APIRouter()
|
|
settings = get_settings()
|
|
|
|
|
|
@router.get("/health")
|
|
async def health_check():
|
|
"""Health check endpoint"""
|
|
return {
|
|
"status": "healthy",
|
|
"timestamp": pendulum.now("UTC").to_iso8601_string(),
|
|
"environment": settings.app_env,
|
|
"version": "1.0.0",
|
|
}
|
|
|
|
|
|
@router.get("/health/db")
|
|
async def database_health():
|
|
"""Database health check"""
|
|
from sqlalchemy import text
|
|
|
|
from app.database.session import engine
|
|
|
|
try:
|
|
async with engine.connect() as conn:
|
|
await conn.execute(text("SELECT 1"))
|
|
|
|
return {
|
|
"status": "healthy",
|
|
"database": "connected",
|
|
"timestamp": pendulum.now("UTC").to_iso8601_string(),
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Database health check failed: {e}")
|
|
return {
|
|
"status": "unhealthy",
|
|
"database": "disconnected",
|
|
"error": str(e),
|
|
"timestamp": pendulum.now("UTC").to_iso8601_string(),
|
|
}
|
|
|
|
|
|
@router.get("/health/memory")
|
|
async def memory_health():
|
|
"""
|
|
Memory usage health check for game state management.
|
|
|
|
Returns memory statistics including:
|
|
- active_games: Number of games currently in memory
|
|
- max_games: Configured limit
|
|
- usage_percent: Current usage as percentage of limit
|
|
- oldest_game_hours: Age of oldest game in hours
|
|
- status: healthy (<75%), warning (75-90%), critical (>90%)
|
|
"""
|
|
from app.core.state_manager import state_manager
|
|
|
|
stats = state_manager.get_memory_stats()
|
|
|
|
# Determine health status based on usage percentage
|
|
if stats["max_games"] > 0:
|
|
usage_pct = (stats["active_games"] / stats["max_games"]) * 100
|
|
else:
|
|
usage_pct = 0
|
|
|
|
if usage_pct > 90:
|
|
status = "critical"
|
|
elif usage_pct > 75:
|
|
status = "warning"
|
|
else:
|
|
status = "healthy"
|
|
|
|
return {
|
|
"status": status,
|
|
"usage_percent": round(usage_pct, 1),
|
|
"active_games": stats["active_games"],
|
|
"max_games": stats["max_games"],
|
|
"oldest_game_hours": round(stats["oldest_game_hours"], 2),
|
|
"total_lineups_cached": stats["total_lineups_cached"],
|
|
"total_locks": stats["total_locks"],
|
|
"idle_timeout_hours": settings.game_idle_timeout_hours,
|
|
"eviction_interval_minutes": settings.game_eviction_interval_minutes,
|
|
"timestamp": pendulum.now("UTC").to_iso8601_string(),
|
|
}
|
|
|
|
|
|
@router.get("/health/pool")
|
|
async def connection_pool_health():
|
|
"""
|
|
Database connection pool health check.
|
|
|
|
Returns pool statistics including:
|
|
- pool_size: Configured base pool size
|
|
- max_overflow: Maximum overflow connections allowed
|
|
- available: Connections available for use
|
|
- in_use: Connections currently checked out
|
|
- overflow_active: Overflow connections in use
|
|
- usage_percent: Current usage as percentage of total capacity
|
|
- status: healthy (<75%), warning (75-90%), critical (>90%)
|
|
- recent_history: Last 5 usage snapshots
|
|
"""
|
|
from app.monitoring.pool_monitor import pool_monitor
|
|
|
|
if not pool_monitor:
|
|
return {
|
|
"status": "unknown",
|
|
"message": "Pool monitor not initialized",
|
|
"timestamp": pendulum.now("UTC").to_iso8601_string(),
|
|
}
|
|
|
|
health = pool_monitor.get_health_status()
|
|
history = pool_monitor.get_history(limit=5)
|
|
|
|
return {
|
|
**health,
|
|
"recent_history": history,
|
|
}
|
|
|
|
|
|
@router.get("/health/full")
|
|
async def full_health():
|
|
"""
|
|
Comprehensive health check aggregating all subsystems.
|
|
|
|
Returns overall status based on worst-case component status.
|
|
Components checked: database connectivity, memory usage, connection pool.
|
|
"""
|
|
from sqlalchemy import text
|
|
|
|
from app.core.state_manager import state_manager
|
|
from app.database.session import engine
|
|
from app.monitoring.pool_monitor import pool_monitor
|
|
|
|
components = {}
|
|
|
|
# Database connectivity
|
|
try:
|
|
async with engine.connect() as conn:
|
|
await conn.execute(text("SELECT 1"))
|
|
components["database"] = {"status": "healthy"}
|
|
except Exception as e:
|
|
logger.error(f"Database health check failed: {e}")
|
|
components["database"] = {"status": "critical", "error": str(e)}
|
|
|
|
# Memory/game state
|
|
try:
|
|
stats = state_manager.get_memory_stats()
|
|
if stats["max_games"] > 0:
|
|
usage_pct = (stats["active_games"] / stats["max_games"]) * 100
|
|
else:
|
|
usage_pct = 0
|
|
|
|
if usage_pct > 90:
|
|
mem_status = "critical"
|
|
elif usage_pct > 75:
|
|
mem_status = "warning"
|
|
else:
|
|
mem_status = "healthy"
|
|
|
|
components["memory"] = {
|
|
"status": mem_status,
|
|
"usage_percent": round(usage_pct, 1),
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Memory health check failed: {e}")
|
|
components["memory"] = {"status": "unknown", "error": str(e)}
|
|
|
|
# Connection pool
|
|
if pool_monitor:
|
|
try:
|
|
pool_health = pool_monitor.get_health_status()
|
|
components["pool"] = {
|
|
"status": pool_health["status"],
|
|
"usage_percent": pool_health["usage_percent"],
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Pool health check failed: {e}")
|
|
components["pool"] = {"status": "unknown", "error": str(e)}
|
|
else:
|
|
components["pool"] = {"status": "unknown", "message": "Not initialized"}
|
|
|
|
# Aggregate status (worst case wins)
|
|
statuses = [c.get("status", "unknown") for c in components.values()]
|
|
if "critical" in statuses:
|
|
overall = "critical"
|
|
elif "warning" in statuses:
|
|
overall = "warning"
|
|
elif "unknown" in statuses:
|
|
overall = "degraded"
|
|
else:
|
|
overall = "healthy"
|
|
|
|
return {
|
|
"status": overall,
|
|
"components": components,
|
|
"timestamp": pendulum.now("UTC").to_iso8601_string(),
|
|
}
|