- Add rate_limit.py middleware with per-client throttling and cleanup task - Add pool_monitor.py for database connection pool health monitoring - Add custom exceptions module (GameEngineError, DatabaseError, etc.) - Add config settings for eviction intervals, session timeouts, memory limits - Add unit tests for rate limiting and pool monitoring 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
221 lines
6.2 KiB
Python
221 lines
6.2 KiB
Python
"""
|
|
Database connection pool monitoring.
|
|
|
|
Monitors SQLAlchemy async connection pool health and provides
|
|
statistics for observability and alerting.
|
|
|
|
Key features:
|
|
- Real-time pool statistics (checked in/out, overflow)
|
|
- Health status classification (healthy/warning/critical)
|
|
- Historical stats tracking
|
|
- Background monitoring with configurable interval
|
|
- Warning logs when pool usage exceeds threshold
|
|
|
|
Author: Claude
|
|
Date: 2025-11-27
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from typing import Optional
|
|
|
|
import pendulum
|
|
from sqlalchemy.ext.asyncio import AsyncEngine
|
|
|
|
from app.config import get_settings
|
|
|
|
logger = logging.getLogger(f"{__name__}.PoolMonitor")
|
|
|
|
|
|
@dataclass
|
|
class PoolStats:
|
|
"""
|
|
Connection pool statistics snapshot.
|
|
|
|
Captures the current state of the database connection pool
|
|
for monitoring and alerting purposes.
|
|
"""
|
|
|
|
pool_size: int
|
|
max_overflow: int
|
|
checkedin: int # Available connections
|
|
checkedout: int # In-use connections
|
|
overflow: int # Overflow connections in use
|
|
total_capacity: int
|
|
usage_percent: float
|
|
timestamp: pendulum.DateTime = field(default_factory=lambda: pendulum.now("UTC"))
|
|
|
|
|
|
class PoolMonitor:
|
|
"""
|
|
Monitor database connection pool health.
|
|
|
|
Provides real-time statistics and health status for the SQLAlchemy
|
|
connection pool. Useful for detecting pool exhaustion before it
|
|
causes request failures.
|
|
|
|
Usage:
|
|
monitor = PoolMonitor(engine)
|
|
stats = monitor.get_stats()
|
|
health = monitor.get_health_status()
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
engine: AsyncEngine,
|
|
alert_threshold: float = 0.8,
|
|
max_history: int = 100,
|
|
):
|
|
"""
|
|
Initialize pool monitor.
|
|
|
|
Args:
|
|
engine: SQLAlchemy async engine to monitor
|
|
alert_threshold: Usage percentage to trigger warning (0.8 = 80%)
|
|
max_history: Maximum stats snapshots to keep in history
|
|
"""
|
|
self._engine = engine
|
|
self._stats_history: list[PoolStats] = []
|
|
self._max_history = max_history
|
|
self._alert_threshold = alert_threshold
|
|
self._settings = get_settings()
|
|
|
|
def get_stats(self) -> PoolStats:
|
|
"""
|
|
Get current pool statistics.
|
|
|
|
Returns:
|
|
PoolStats with current pool state
|
|
"""
|
|
pool = self._engine.pool
|
|
|
|
checkedin = pool.checkedin()
|
|
checkedout = pool.checkedout()
|
|
overflow = pool.overflow()
|
|
total_capacity = self._settings.db_pool_size + self._settings.db_max_overflow
|
|
|
|
usage_percent = checkedout / total_capacity if total_capacity > 0 else 0
|
|
|
|
stats = PoolStats(
|
|
pool_size=self._settings.db_pool_size,
|
|
max_overflow=self._settings.db_max_overflow,
|
|
checkedin=checkedin,
|
|
checkedout=checkedout,
|
|
overflow=overflow,
|
|
total_capacity=total_capacity,
|
|
usage_percent=usage_percent,
|
|
)
|
|
|
|
# Record history
|
|
self._stats_history.append(stats)
|
|
if len(self._stats_history) > self._max_history:
|
|
self._stats_history.pop(0)
|
|
|
|
# Check for alerts
|
|
if usage_percent >= self._alert_threshold:
|
|
logger.warning(
|
|
f"Connection pool usage high: {usage_percent:.1%} "
|
|
f"({checkedout}/{total_capacity})"
|
|
)
|
|
|
|
if overflow > 0:
|
|
logger.info(f"Pool overflow active: {overflow} overflow connections")
|
|
|
|
return stats
|
|
|
|
def get_health_status(self) -> dict:
|
|
"""
|
|
Get pool health status for monitoring endpoint.
|
|
|
|
Returns:
|
|
Dict with status, statistics, and timestamp
|
|
"""
|
|
stats = self.get_stats()
|
|
|
|
if stats.usage_percent >= 0.9:
|
|
status = "critical"
|
|
elif stats.usage_percent >= 0.75:
|
|
status = "warning"
|
|
else:
|
|
status = "healthy"
|
|
|
|
return {
|
|
"status": status,
|
|
"pool_size": stats.pool_size,
|
|
"max_overflow": stats.max_overflow,
|
|
"available": stats.checkedin,
|
|
"in_use": stats.checkedout,
|
|
"overflow_active": stats.overflow,
|
|
"total_capacity": stats.total_capacity,
|
|
"usage_percent": round(stats.usage_percent * 100, 1),
|
|
"timestamp": stats.timestamp.isoformat(),
|
|
}
|
|
|
|
def get_history(self, limit: int = 10) -> list[dict]:
|
|
"""
|
|
Get recent stats history.
|
|
|
|
Args:
|
|
limit: Maximum number of history entries to return
|
|
|
|
Returns:
|
|
List of stats snapshots
|
|
"""
|
|
return [
|
|
{
|
|
"checkedout": s.checkedout,
|
|
"usage_percent": round(s.usage_percent * 100, 1),
|
|
"timestamp": s.timestamp.isoformat(),
|
|
}
|
|
for s in self._stats_history[-limit:]
|
|
]
|
|
|
|
async def start_monitoring(self, interval_seconds: int = 60):
|
|
"""
|
|
Background task to periodically collect stats.
|
|
|
|
Useful for continuous logging and alerting. Runs until cancelled.
|
|
|
|
Args:
|
|
interval_seconds: Seconds between stat collections
|
|
"""
|
|
logger.info(f"Starting pool monitoring (interval: {interval_seconds}s)")
|
|
|
|
while True:
|
|
try:
|
|
stats = self.get_stats()
|
|
logger.debug(
|
|
f"Pool stats: {stats.checkedout}/{stats.total_capacity} "
|
|
f"({stats.usage_percent:.1%})"
|
|
)
|
|
await asyncio.sleep(interval_seconds)
|
|
except asyncio.CancelledError:
|
|
logger.info("Pool monitoring stopped")
|
|
break
|
|
except Exception as e:
|
|
logger.error(f"Pool monitoring error: {e}")
|
|
await asyncio.sleep(interval_seconds)
|
|
|
|
|
|
# Global instance (initialized in main.py)
|
|
pool_monitor: Optional[PoolMonitor] = None
|
|
|
|
|
|
def init_pool_monitor(engine: AsyncEngine) -> PoolMonitor:
|
|
"""
|
|
Initialize global pool monitor.
|
|
|
|
Should be called during application startup.
|
|
|
|
Args:
|
|
engine: SQLAlchemy async engine to monitor
|
|
|
|
Returns:
|
|
Initialized PoolMonitor instance
|
|
"""
|
|
global pool_monitor
|
|
pool_monitor = PoolMonitor(engine)
|
|
logger.info("Pool monitor initialized")
|
|
return pool_monitor
|