Implements a comprehensive health check system using aiohttp to support container orchestration and external monitoring systems. Features: - /health endpoint: Basic liveness check (is process running?) - /ready endpoint: Readiness check (is bot connected to Discord?) - /metrics endpoint: Detailed bot metrics (guilds, users, cogs, latency) Changes: - Add aiohttp to requirements.txt - Create health_server.py module with HTTP server - Update paperdynasty.py to run health server alongside bot - Update docker-compose.yml with HTTP-based healthcheck - Fix deploy.sh Docker image name Benefits: - Auto-restart on bot hangs/deadlocks - Foundation for external monitoring (Prometheus, Grafana, etc.) - Detailed diagnostics for troubleshooting - Industry-standard health check pattern 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
133 lines
4.0 KiB
Python
133 lines
4.0 KiB
Python
"""
|
|
HTTP health check server for Paper Dynasty Discord bot.
|
|
|
|
Provides health and readiness endpoints for container monitoring and orchestration.
|
|
"""
|
|
import asyncio
|
|
import logging
|
|
from typing import Optional
|
|
from aiohttp import web
|
|
import discord
|
|
from discord.ext import commands
|
|
|
|
logger = logging.getLogger('discord_app.health')
|
|
|
|
|
|
class HealthServer:
|
|
"""HTTP server for health checks and metrics."""
|
|
|
|
def __init__(self, bot: commands.Bot, host: str = '0.0.0.0', port: int = 8080):
|
|
"""
|
|
Initialize health server.
|
|
|
|
Args:
|
|
bot: Discord bot instance to monitor
|
|
host: Host to bind to (default: 0.0.0.0 for container access)
|
|
port: Port to listen on (default: 8080)
|
|
"""
|
|
self.bot = bot
|
|
self.host = host
|
|
self.port = port
|
|
self.app = web.Application()
|
|
self.runner: Optional[web.AppRunner] = None
|
|
self.site: Optional[web.TCPSite] = None
|
|
|
|
# Setup routes
|
|
self.app.router.add_get('/health', self.health_check)
|
|
self.app.router.add_get('/ready', self.readiness_check)
|
|
self.app.router.add_get('/metrics', self.metrics)
|
|
|
|
async def health_check(self, request: web.Request) -> web.Response:
|
|
"""
|
|
Basic liveness check - is the process running?
|
|
|
|
Returns 200 if the server is responsive.
|
|
"""
|
|
return web.json_response({
|
|
'status': 'healthy',
|
|
'service': 'paper-dynasty-discord-bot'
|
|
})
|
|
|
|
async def readiness_check(self, request: web.Request) -> web.Response:
|
|
"""
|
|
Readiness check - is the bot ready to serve requests?
|
|
|
|
Returns:
|
|
200 if bot is connected to Discord
|
|
503 if bot is not ready
|
|
"""
|
|
if self.bot.is_ready():
|
|
return web.json_response({
|
|
'status': 'ready',
|
|
'discord_connected': True,
|
|
'latency_ms': round(self.bot.latency * 1000, 2) if self.bot.latency else None
|
|
})
|
|
else:
|
|
return web.json_response({
|
|
'status': 'not_ready',
|
|
'discord_connected': False
|
|
}, status=503)
|
|
|
|
async def metrics(self, request: web.Request) -> web.Response:
|
|
"""
|
|
Return bot metrics for monitoring.
|
|
|
|
Provides detailed information about bot state for external monitoring systems.
|
|
"""
|
|
metrics_data = {
|
|
'bot': {
|
|
'is_ready': self.bot.is_ready(),
|
|
'is_closed': self.bot.is_closed(),
|
|
'latency_ms': round(self.bot.latency * 1000, 2) if self.bot.latency else None,
|
|
},
|
|
'guilds': {
|
|
'count': len(self.bot.guilds),
|
|
'guild_ids': [g.id for g in self.bot.guilds]
|
|
},
|
|
'users': {
|
|
'count': len(self.bot.users)
|
|
},
|
|
'cogs': {
|
|
'loaded': list(self.bot.cogs.keys()),
|
|
'count': len(self.bot.cogs)
|
|
}
|
|
}
|
|
|
|
return web.json_response(metrics_data)
|
|
|
|
async def start(self):
|
|
"""Start the health check server."""
|
|
self.runner = web.AppRunner(self.app)
|
|
await self.runner.setup()
|
|
self.site = web.TCPSite(self.runner, self.host, self.port)
|
|
await self.site.start()
|
|
logger.info(f'Health check server started on {self.host}:{self.port}')
|
|
|
|
async def stop(self):
|
|
"""Stop the health check server."""
|
|
if self.site:
|
|
await self.site.stop()
|
|
if self.runner:
|
|
await self.runner.cleanup()
|
|
logger.info('Health check server stopped')
|
|
|
|
|
|
async def run_health_server(bot: commands.Bot, host: str = '0.0.0.0', port: int = 8080):
|
|
"""
|
|
Run health server as a background task.
|
|
|
|
Args:
|
|
bot: Discord bot instance
|
|
host: Host to bind to
|
|
port: Port to listen on
|
|
"""
|
|
server = HealthServer(bot, host, port)
|
|
await server.start()
|
|
|
|
# Keep the server running until bot is closed
|
|
try:
|
|
while not bot.is_closed():
|
|
await asyncio.sleep(1)
|
|
finally:
|
|
await server.stop()
|