Implement play locking to prevent concurrent command processing
Adds idempotency guard to prevent race conditions when multiple users submit commands for the same play simultaneously. Changes: - Add PlayLockedException for locked play detection - Implement lock check in checks_log_interaction() - Acquire lock (play.locked = True) before processing commands - Release lock (play.locked = False) after play completion - Add warning logs for rejected duplicate submissions - Add /diagnostics endpoint to health server for debugging This prevents database corruption and duplicate processing when users spam commands like "log xcheck" while the first is still processing. Tested successfully in Discord - duplicate commands now properly return PlayLockedException with instructions to wait. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
00ed42befd
commit
90d7345850
File diff suppressed because it is too large
Load Diff
@ -129,3 +129,14 @@ class LegalityCheckNotRequired(GameException):
|
||||
|
||||
class InvalidResponder(GameException):
|
||||
pass
|
||||
|
||||
|
||||
class PlayLockedException(GameException):
|
||||
"""
|
||||
Raised when attempting to process a play that is already locked by another interaction.
|
||||
|
||||
This prevents concurrent modification of the same play record, which could cause
|
||||
database deadlocks or data corruption.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
106
health_server.py
106
health_server.py
@ -3,6 +3,7 @@ HTTP health check server for Paper Dynasty Discord bot.
|
||||
|
||||
Provides health and readiness endpoints for container monitoring and orchestration.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Optional
|
||||
@ -10,13 +11,13 @@ from aiohttp import web
|
||||
import discord
|
||||
from discord.ext import commands
|
||||
|
||||
logger = logging.getLogger('discord_app.health')
|
||||
logger = logging.getLogger("discord_app.health")
|
||||
|
||||
|
||||
class HealthServer:
|
||||
"""HTTP server for health checks and metrics."""
|
||||
|
||||
def __init__(self, bot: commands.Bot, host: str = '0.0.0.0', port: int = 8080):
|
||||
def __init__(self, bot: commands.Bot, host: str = "0.0.0.0", port: int = 8080):
|
||||
"""
|
||||
Initialize health server.
|
||||
|
||||
@ -33,9 +34,10 @@ class HealthServer:
|
||||
self.site: Optional[web.TCPSite] = None
|
||||
|
||||
# Setup routes
|
||||
self.app.router.add_get('/health', self.health_check)
|
||||
self.app.router.add_get('/ready', self.readiness_check)
|
||||
self.app.router.add_get('/metrics', self.metrics)
|
||||
self.app.router.add_get("/health", self.health_check)
|
||||
self.app.router.add_get("/ready", self.readiness_check)
|
||||
self.app.router.add_get("/metrics", self.metrics)
|
||||
self.app.router.add_get("/diagnostics", self.diagnostics)
|
||||
|
||||
async def health_check(self, request: web.Request) -> web.Response:
|
||||
"""
|
||||
@ -43,10 +45,9 @@ class HealthServer:
|
||||
|
||||
Returns 200 if the server is responsive.
|
||||
"""
|
||||
return web.json_response({
|
||||
'status': 'healthy',
|
||||
'service': 'paper-dynasty-discord-bot'
|
||||
})
|
||||
return web.json_response(
|
||||
{"status": "healthy", "service": "paper-dynasty-discord-bot"}
|
||||
)
|
||||
|
||||
async def readiness_check(self, request: web.Request) -> web.Response:
|
||||
"""
|
||||
@ -57,16 +58,19 @@ class HealthServer:
|
||||
503 if bot is not ready
|
||||
"""
|
||||
if self.bot.is_ready():
|
||||
return web.json_response({
|
||||
'status': 'ready',
|
||||
'discord_connected': True,
|
||||
'latency_ms': round(self.bot.latency * 1000, 2) if self.bot.latency else None
|
||||
})
|
||||
return web.json_response(
|
||||
{
|
||||
"status": "ready",
|
||||
"discord_connected": True,
|
||||
"latency_ms": round(self.bot.latency * 1000, 2)
|
||||
if self.bot.latency
|
||||
else None,
|
||||
}
|
||||
)
|
||||
else:
|
||||
return web.json_response({
|
||||
'status': 'not_ready',
|
||||
'discord_connected': False
|
||||
}, status=503)
|
||||
return web.json_response(
|
||||
{"status": "not_ready", "discord_connected": False}, status=503
|
||||
)
|
||||
|
||||
async def metrics(self, request: web.Request) -> web.Response:
|
||||
"""
|
||||
@ -75,33 +79,65 @@ class HealthServer:
|
||||
Provides detailed information about bot state for external monitoring systems.
|
||||
"""
|
||||
metrics_data = {
|
||||
'bot': {
|
||||
'is_ready': self.bot.is_ready(),
|
||||
'is_closed': self.bot.is_closed(),
|
||||
'latency_ms': round(self.bot.latency * 1000, 2) if self.bot.latency else None,
|
||||
"bot": {
|
||||
"is_ready": self.bot.is_ready(),
|
||||
"is_closed": self.bot.is_closed(),
|
||||
"latency_ms": round(self.bot.latency * 1000, 2)
|
||||
if self.bot.latency
|
||||
else None,
|
||||
},
|
||||
'guilds': {
|
||||
'count': len(self.bot.guilds),
|
||||
'guild_ids': [g.id for g in self.bot.guilds]
|
||||
"guilds": {
|
||||
"count": len(self.bot.guilds),
|
||||
"guild_ids": [g.id for g in self.bot.guilds],
|
||||
},
|
||||
'users': {
|
||||
'count': len(self.bot.users)
|
||||
},
|
||||
'cogs': {
|
||||
'loaded': list(self.bot.cogs.keys()),
|
||||
'count': len(self.bot.cogs)
|
||||
}
|
||||
"users": {"count": len(self.bot.users)},
|
||||
"cogs": {"loaded": list(self.bot.cogs.keys()), "count": len(self.bot.cogs)},
|
||||
}
|
||||
|
||||
return web.json_response(metrics_data)
|
||||
|
||||
async def diagnostics(self, request: web.Request) -> web.Response:
|
||||
"""
|
||||
Detailed diagnostics for troubleshooting frozen bot.
|
||||
Captures state before container restart.
|
||||
"""
|
||||
import sys
|
||||
|
||||
tasks_info = []
|
||||
try:
|
||||
for task in asyncio.all_tasks():
|
||||
tasks_info.append(
|
||||
{
|
||||
"name": task.get_name(),
|
||||
"done": task.done(),
|
||||
"cancelled": task.cancelled(),
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
tasks_info = [f"Error capturing tasks: {e}"]
|
||||
|
||||
diagnostics_data = {
|
||||
"bot": {
|
||||
"is_ready": self.bot.is_ready(),
|
||||
"is_closed": self.bot.is_closed(),
|
||||
"latency_ms": round(self.bot.latency * 1000, 2)
|
||||
if self.bot.latency
|
||||
else None,
|
||||
},
|
||||
"tasks": {"count": len(tasks_info), "tasks": tasks_info[:20]},
|
||||
"cogs": {"loaded": list(self.bot.cogs.keys()), "count": len(self.bot.cogs)},
|
||||
"python_version": sys.version,
|
||||
}
|
||||
|
||||
return web.json_response(diagnostics_data)
|
||||
|
||||
async def start(self):
|
||||
"""Start the health check server."""
|
||||
self.runner = web.AppRunner(self.app)
|
||||
await self.runner.setup()
|
||||
self.site = web.TCPSite(self.runner, self.host, self.port)
|
||||
await self.site.start()
|
||||
logger.info(f'Health check server started on {self.host}:{self.port}')
|
||||
logger.info(f"Health check server started on {self.host}:{self.port}")
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the health check server."""
|
||||
@ -109,10 +145,10 @@ class HealthServer:
|
||||
await self.site.stop()
|
||||
if self.runner:
|
||||
await self.runner.cleanup()
|
||||
logger.info('Health check server stopped')
|
||||
logger.info("Health check server stopped")
|
||||
|
||||
|
||||
async def run_health_server(bot: commands.Bot, host: str = '0.0.0.0', port: int = 8080):
|
||||
async def run_health_server(bot: commands.Bot, host: str = "0.0.0.0", port: int = 8080):
|
||||
"""
|
||||
Run health server as a background task.
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user