paper-dynasty-discord/health_server.py
Cal Corum ee80cd72ae fix: apply Black formatting and resolve ruff lint violations
Run Black formatter across 83 files and fix 1514 ruff violations:
- E722: bare except → typed exceptions (17 fixes)
- E711/E712/E721: comparison style fixes with noqa for SQLAlchemy (44 fixes)
- F841: unused variable assignments (70 fixes)
- F541/F401: f-string and import cleanup (1383 auto-fixes)

Remaining 925 errors are all F403/F405 (star imports) — structural,
requires converting to explicit imports in a separate effort.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 11:37:46 -05:00

168 lines
5.3 KiB
Python

"""
HTTP health check server for Paper Dynasty Discord bot.
Provides health and readiness endpoints for container monitoring and orchestration.
"""
import asyncio
import logging
from typing import Optional
from aiohttp import web
from discord.ext import commands
logger = logging.getLogger("discord_app.health")
class HealthServer:
"""HTTP server for health checks and metrics."""
def __init__(self, bot: commands.Bot, host: str = "0.0.0.0", port: int = 8080):
"""
Initialize health server.
Args:
bot: Discord bot instance to monitor
host: Host to bind to (default: 0.0.0.0 for container access)
port: Port to listen on (default: 8080)
"""
self.bot = bot
self.host = host
self.port = port
self.app = web.Application()
self.runner: Optional[web.AppRunner] = None
self.site: Optional[web.TCPSite] = None
# Setup routes
self.app.router.add_get("/health", self.health_check)
self.app.router.add_get("/ready", self.readiness_check)
self.app.router.add_get("/metrics", self.metrics)
self.app.router.add_get("/diagnostics", self.diagnostics)
async def health_check(self, request: web.Request) -> web.Response:
"""
Basic liveness check - is the process running?
Returns 200 if the server is responsive.
"""
return web.json_response(
{"status": "healthy", "service": "paper-dynasty-discord-bot"}
)
async def readiness_check(self, request: web.Request) -> web.Response:
"""
Readiness check - is the bot ready to serve requests?
Returns:
200 if bot is connected to Discord
503 if bot is not ready
"""
if self.bot.is_ready():
return web.json_response(
{
"status": "ready",
"discord_connected": True,
"latency_ms": (
round(self.bot.latency * 1000, 2) if self.bot.latency else None
),
}
)
else:
return web.json_response(
{"status": "not_ready", "discord_connected": False}, status=503
)
async def metrics(self, request: web.Request) -> web.Response:
"""
Return bot metrics for monitoring.
Provides detailed information about bot state for external monitoring systems.
"""
metrics_data = {
"bot": {
"is_ready": self.bot.is_ready(),
"is_closed": self.bot.is_closed(),
"latency_ms": (
round(self.bot.latency * 1000, 2) if self.bot.latency else None
),
},
"guilds": {
"count": len(self.bot.guilds),
"guild_ids": [g.id for g in self.bot.guilds],
},
"users": {"count": len(self.bot.users)},
"cogs": {"loaded": list(self.bot.cogs.keys()), "count": len(self.bot.cogs)},
}
return web.json_response(metrics_data)
async def diagnostics(self, request: web.Request) -> web.Response:
"""
Detailed diagnostics for troubleshooting frozen bot.
Captures state before container restart.
"""
import sys
tasks_info = []
try:
for task in asyncio.all_tasks():
tasks_info.append(
{
"name": task.get_name(),
"done": task.done(),
"cancelled": task.cancelled(),
}
)
except Exception as e:
tasks_info = [f"Error capturing tasks: {e}"]
diagnostics_data = {
"bot": {
"is_ready": self.bot.is_ready(),
"is_closed": self.bot.is_closed(),
"latency_ms": (
round(self.bot.latency * 1000, 2) if self.bot.latency else None
),
},
"tasks": {"count": len(tasks_info), "tasks": tasks_info[:20]},
"cogs": {"loaded": list(self.bot.cogs.keys()), "count": len(self.bot.cogs)},
"python_version": sys.version,
}
return web.json_response(diagnostics_data)
async def start(self):
"""Start the health check server."""
self.runner = web.AppRunner(self.app)
await self.runner.setup()
self.site = web.TCPSite(self.runner, self.host, self.port)
await self.site.start()
logger.info(f"Health check server started on {self.host}:{self.port}")
async def stop(self):
"""Stop the health check server."""
if self.site:
await self.site.stop()
if self.runner:
await self.runner.cleanup()
logger.info("Health check server stopped")
async def run_health_server(bot: commands.Bot, host: str = "0.0.0.0", port: int = 8080):
"""
Run health server as a background task.
Args:
bot: Discord bot instance
host: Host to bind to
port: Port to listen on
"""
server = HealthServer(bot, host, port)
await server.start()
# Keep the server running until bot is closed
try:
while not bot.is_closed():
await asyncio.sleep(1)
finally:
await server.stop()