Run Black formatter across 83 files and fix 1514 ruff violations: - E722: bare except → typed exceptions (17 fixes) - E711/E712/E721: comparison style fixes with noqa for SQLAlchemy (44 fixes) - F841: unused variable assignments (70 fixes) - F541/F401: f-string and import cleanup (1383 auto-fixes) Remaining 925 errors are all F403/F405 (star imports) — structural, requires converting to explicit imports in a separate effort. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
168 lines
5.3 KiB
Python
168 lines
5.3 KiB
Python
"""
|
|
HTTP health check server for Paper Dynasty Discord bot.
|
|
|
|
Provides health and readiness endpoints for container monitoring and orchestration.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from typing import Optional
|
|
from aiohttp import web
|
|
from discord.ext import commands
|
|
|
|
logger = logging.getLogger("discord_app.health")
|
|
|
|
|
|
class HealthServer:
|
|
"""HTTP server for health checks and metrics."""
|
|
|
|
def __init__(self, bot: commands.Bot, host: str = "0.0.0.0", port: int = 8080):
|
|
"""
|
|
Initialize health server.
|
|
|
|
Args:
|
|
bot: Discord bot instance to monitor
|
|
host: Host to bind to (default: 0.0.0.0 for container access)
|
|
port: Port to listen on (default: 8080)
|
|
"""
|
|
self.bot = bot
|
|
self.host = host
|
|
self.port = port
|
|
self.app = web.Application()
|
|
self.runner: Optional[web.AppRunner] = None
|
|
self.site: Optional[web.TCPSite] = None
|
|
|
|
# Setup routes
|
|
self.app.router.add_get("/health", self.health_check)
|
|
self.app.router.add_get("/ready", self.readiness_check)
|
|
self.app.router.add_get("/metrics", self.metrics)
|
|
self.app.router.add_get("/diagnostics", self.diagnostics)
|
|
|
|
async def health_check(self, request: web.Request) -> web.Response:
|
|
"""
|
|
Basic liveness check - is the process running?
|
|
|
|
Returns 200 if the server is responsive.
|
|
"""
|
|
return web.json_response(
|
|
{"status": "healthy", "service": "paper-dynasty-discord-bot"}
|
|
)
|
|
|
|
async def readiness_check(self, request: web.Request) -> web.Response:
|
|
"""
|
|
Readiness check - is the bot ready to serve requests?
|
|
|
|
Returns:
|
|
200 if bot is connected to Discord
|
|
503 if bot is not ready
|
|
"""
|
|
if self.bot.is_ready():
|
|
return web.json_response(
|
|
{
|
|
"status": "ready",
|
|
"discord_connected": True,
|
|
"latency_ms": (
|
|
round(self.bot.latency * 1000, 2) if self.bot.latency else None
|
|
),
|
|
}
|
|
)
|
|
else:
|
|
return web.json_response(
|
|
{"status": "not_ready", "discord_connected": False}, status=503
|
|
)
|
|
|
|
async def metrics(self, request: web.Request) -> web.Response:
|
|
"""
|
|
Return bot metrics for monitoring.
|
|
|
|
Provides detailed information about bot state for external monitoring systems.
|
|
"""
|
|
metrics_data = {
|
|
"bot": {
|
|
"is_ready": self.bot.is_ready(),
|
|
"is_closed": self.bot.is_closed(),
|
|
"latency_ms": (
|
|
round(self.bot.latency * 1000, 2) if self.bot.latency else None
|
|
),
|
|
},
|
|
"guilds": {
|
|
"count": len(self.bot.guilds),
|
|
"guild_ids": [g.id for g in self.bot.guilds],
|
|
},
|
|
"users": {"count": len(self.bot.users)},
|
|
"cogs": {"loaded": list(self.bot.cogs.keys()), "count": len(self.bot.cogs)},
|
|
}
|
|
|
|
return web.json_response(metrics_data)
|
|
|
|
async def diagnostics(self, request: web.Request) -> web.Response:
|
|
"""
|
|
Detailed diagnostics for troubleshooting frozen bot.
|
|
Captures state before container restart.
|
|
"""
|
|
import sys
|
|
|
|
tasks_info = []
|
|
try:
|
|
for task in asyncio.all_tasks():
|
|
tasks_info.append(
|
|
{
|
|
"name": task.get_name(),
|
|
"done": task.done(),
|
|
"cancelled": task.cancelled(),
|
|
}
|
|
)
|
|
except Exception as e:
|
|
tasks_info = [f"Error capturing tasks: {e}"]
|
|
|
|
diagnostics_data = {
|
|
"bot": {
|
|
"is_ready": self.bot.is_ready(),
|
|
"is_closed": self.bot.is_closed(),
|
|
"latency_ms": (
|
|
round(self.bot.latency * 1000, 2) if self.bot.latency else None
|
|
),
|
|
},
|
|
"tasks": {"count": len(tasks_info), "tasks": tasks_info[:20]},
|
|
"cogs": {"loaded": list(self.bot.cogs.keys()), "count": len(self.bot.cogs)},
|
|
"python_version": sys.version,
|
|
}
|
|
|
|
return web.json_response(diagnostics_data)
|
|
|
|
async def start(self):
|
|
"""Start the health check server."""
|
|
self.runner = web.AppRunner(self.app)
|
|
await self.runner.setup()
|
|
self.site = web.TCPSite(self.runner, self.host, self.port)
|
|
await self.site.start()
|
|
logger.info(f"Health check server started on {self.host}:{self.port}")
|
|
|
|
async def stop(self):
|
|
"""Stop the health check server."""
|
|
if self.site:
|
|
await self.site.stop()
|
|
if self.runner:
|
|
await self.runner.cleanup()
|
|
logger.info("Health check server stopped")
|
|
|
|
|
|
async def run_health_server(bot: commands.Bot, host: str = "0.0.0.0", port: int = 8080):
|
|
"""
|
|
Run health server as a background task.
|
|
|
|
Args:
|
|
bot: Discord bot instance
|
|
host: Host to bind to
|
|
port: Port to listen on
|
|
"""
|
|
server = HealthServer(bot, host, port)
|
|
await server.start()
|
|
|
|
# Keep the server running until bot is closed
|
|
try:
|
|
while not bot.is_closed():
|
|
await asyncio.sleep(1)
|
|
finally:
|
|
await server.stop()
|