import logging import pendulum from fastapi import APIRouter from app.config import get_settings logger = logging.getLogger(f"{__name__}.health") router = APIRouter() settings = get_settings() @router.get("/health") async def health_check(): """Health check endpoint""" return { "status": "healthy", "timestamp": pendulum.now("UTC").to_iso8601_string(), "environment": settings.app_env, "version": "1.0.0", } @router.get("/health/db") async def database_health(): """Database health check""" from sqlalchemy import text from app.database.session import engine try: async with engine.connect() as conn: await conn.execute(text("SELECT 1")) return { "status": "healthy", "database": "connected", "timestamp": pendulum.now("UTC").to_iso8601_string(), } except Exception as e: logger.error(f"Database health check failed: {e}") return { "status": "unhealthy", "database": "disconnected", "error": str(e), "timestamp": pendulum.now("UTC").to_iso8601_string(), } @router.get("/health/memory") async def memory_health(): """ Memory usage health check for game state management. Returns memory statistics including: - active_games: Number of games currently in memory - max_games: Configured limit - usage_percent: Current usage as percentage of limit - oldest_game_hours: Age of oldest game in hours - status: healthy (<75%), warning (75-90%), critical (>90%) """ from app.core.state_manager import state_manager stats = state_manager.get_memory_stats() # Determine health status based on usage percentage if stats["max_games"] > 0: usage_pct = (stats["active_games"] / stats["max_games"]) * 100 else: usage_pct = 0 if usage_pct > 90: status = "critical" elif usage_pct > 75: status = "warning" else: status = "healthy" return { "status": status, "usage_percent": round(usage_pct, 1), "active_games": stats["active_games"], "max_games": stats["max_games"], "oldest_game_hours": round(stats["oldest_game_hours"], 2), "total_lineups_cached": stats["total_lineups_cached"], "total_locks": stats["total_locks"], "idle_timeout_hours": settings.game_idle_timeout_hours, "eviction_interval_minutes": settings.game_eviction_interval_minutes, "timestamp": pendulum.now("UTC").to_iso8601_string(), } @router.get("/health/pool") async def connection_pool_health(): """ Database connection pool health check. Returns pool statistics including: - pool_size: Configured base pool size - max_overflow: Maximum overflow connections allowed - available: Connections available for use - in_use: Connections currently checked out - overflow_active: Overflow connections in use - usage_percent: Current usage as percentage of total capacity - status: healthy (<75%), warning (75-90%), critical (>90%) - recent_history: Last 5 usage snapshots """ from app.monitoring.pool_monitor import pool_monitor if not pool_monitor: return { "status": "unknown", "message": "Pool monitor not initialized", "timestamp": pendulum.now("UTC").to_iso8601_string(), } health = pool_monitor.get_health_status() history = pool_monitor.get_history(limit=5) return { **health, "recent_history": history, } @router.get("/health/full") async def full_health(): """ Comprehensive health check aggregating all subsystems. Returns overall status based on worst-case component status. Components checked: database connectivity, memory usage, connection pool. """ from sqlalchemy import text from app.core.state_manager import state_manager from app.database.session import engine from app.monitoring.pool_monitor import pool_monitor components = {} # Database connectivity try: async with engine.connect() as conn: await conn.execute(text("SELECT 1")) components["database"] = {"status": "healthy"} except Exception as e: logger.error(f"Database health check failed: {e}") components["database"] = {"status": "critical", "error": str(e)} # Memory/game state try: stats = state_manager.get_memory_stats() if stats["max_games"] > 0: usage_pct = (stats["active_games"] / stats["max_games"]) * 100 else: usage_pct = 0 if usage_pct > 90: mem_status = "critical" elif usage_pct > 75: mem_status = "warning" else: mem_status = "healthy" components["memory"] = { "status": mem_status, "usage_percent": round(usage_pct, 1), } except Exception as e: logger.error(f"Memory health check failed: {e}") components["memory"] = {"status": "unknown", "error": str(e)} # Connection pool if pool_monitor: try: pool_health = pool_monitor.get_health_status() components["pool"] = { "status": pool_health["status"], "usage_percent": pool_health["usage_percent"], } except Exception as e: logger.error(f"Pool health check failed: {e}") components["pool"] = {"status": "unknown", "error": str(e)} else: components["pool"] = {"status": "unknown", "message": "Not initialized"} # Aggregate status (worst case wins) statuses = [c.get("status", "unknown") for c in components.values()] if "critical" in statuses: overall = "critical" elif "warning" in statuses: overall = "warning" elif "unknown" in statuses: overall = "degraded" else: overall = "healthy" return { "status": overall, "components": components, "timestamp": pendulum.now("UTC").to_iso8601_string(), }