""" Database connection pool monitoring. Monitors SQLAlchemy async connection pool health and provides statistics for observability and alerting. Key features: - Real-time pool statistics (checked in/out, overflow) - Health status classification (healthy/warning/critical) - Historical stats tracking - Background monitoring with configurable interval - Warning logs when pool usage exceeds threshold Author: Claude Date: 2025-11-27 """ import asyncio import logging from dataclasses import dataclass, field from typing import Optional import pendulum from sqlalchemy.ext.asyncio import AsyncEngine from app.config import get_settings logger = logging.getLogger(f"{__name__}.PoolMonitor") @dataclass class PoolStats: """ Connection pool statistics snapshot. Captures the current state of the database connection pool for monitoring and alerting purposes. """ pool_size: int max_overflow: int checkedin: int # Available connections checkedout: int # In-use connections overflow: int # Overflow connections in use total_capacity: int usage_percent: float timestamp: pendulum.DateTime = field(default_factory=lambda: pendulum.now("UTC")) class PoolMonitor: """ Monitor database connection pool health. Provides real-time statistics and health status for the SQLAlchemy connection pool. Useful for detecting pool exhaustion before it causes request failures. Usage: monitor = PoolMonitor(engine) stats = monitor.get_stats() health = monitor.get_health_status() """ def __init__( self, engine: AsyncEngine, alert_threshold: float = 0.8, max_history: int = 100, ): """ Initialize pool monitor. Args: engine: SQLAlchemy async engine to monitor alert_threshold: Usage percentage to trigger warning (0.8 = 80%) max_history: Maximum stats snapshots to keep in history """ self._engine = engine self._stats_history: list[PoolStats] = [] self._max_history = max_history self._alert_threshold = alert_threshold self._settings = get_settings() def get_stats(self) -> PoolStats: """ Get current pool statistics. Returns: PoolStats with current pool state """ pool = self._engine.pool checkedin = pool.checkedin() checkedout = pool.checkedout() overflow = pool.overflow() total_capacity = self._settings.db_pool_size + self._settings.db_max_overflow usage_percent = checkedout / total_capacity if total_capacity > 0 else 0 stats = PoolStats( pool_size=self._settings.db_pool_size, max_overflow=self._settings.db_max_overflow, checkedin=checkedin, checkedout=checkedout, overflow=overflow, total_capacity=total_capacity, usage_percent=usage_percent, ) # Record history self._stats_history.append(stats) if len(self._stats_history) > self._max_history: self._stats_history.pop(0) # Check for alerts if usage_percent >= self._alert_threshold: logger.warning( f"Connection pool usage high: {usage_percent:.1%} " f"({checkedout}/{total_capacity})" ) if overflow > 0: logger.info(f"Pool overflow active: {overflow} overflow connections") return stats def get_health_status(self) -> dict: """ Get pool health status for monitoring endpoint. Returns: Dict with status, statistics, and timestamp """ stats = self.get_stats() if stats.usage_percent >= 0.9: status = "critical" elif stats.usage_percent >= 0.75: status = "warning" else: status = "healthy" return { "status": status, "pool_size": stats.pool_size, "max_overflow": stats.max_overflow, "available": stats.checkedin, "in_use": stats.checkedout, "overflow_active": stats.overflow, "total_capacity": stats.total_capacity, "usage_percent": round(stats.usage_percent * 100, 1), "timestamp": stats.timestamp.isoformat(), } def get_history(self, limit: int = 10) -> list[dict]: """ Get recent stats history. Args: limit: Maximum number of history entries to return Returns: List of stats snapshots """ return [ { "checkedout": s.checkedout, "usage_percent": round(s.usage_percent * 100, 1), "timestamp": s.timestamp.isoformat(), } for s in self._stats_history[-limit:] ] async def start_monitoring(self, interval_seconds: int = 60): """ Background task to periodically collect stats. Useful for continuous logging and alerting. Runs until cancelled. Args: interval_seconds: Seconds between stat collections """ logger.info(f"Starting pool monitoring (interval: {interval_seconds}s)") while True: try: stats = self.get_stats() logger.debug( f"Pool stats: {stats.checkedout}/{stats.total_capacity} " f"({stats.usage_percent:.1%})" ) await asyncio.sleep(interval_seconds) except asyncio.CancelledError: logger.info("Pool monitoring stopped") break except Exception as e: logger.error(f"Pool monitoring error: {e}") await asyncio.sleep(interval_seconds) # Global instance (initialized in main.py) pool_monitor: Optional[PoolMonitor] = None def init_pool_monitor(engine: AsyncEngine) -> PoolMonitor: """ Initialize global pool monitor. Should be called during application startup. Args: engine: SQLAlchemy async engine to monitor Returns: Initialized PoolMonitor instance """ global pool_monitor pool_monitor = PoolMonitor(engine) logger.info("Pool monitor initialized") return pool_monitor