API authentication: - Add X-API-Secret shared-secret header validation on /chat and /stats - /health remains public for monitoring - Auth is a no-op when API_SECRET is empty (dev mode) Rate limiting: - Add per-user sliding-window rate limiter on /chat (10 req/60s default) - Returns 429 with clear message when exceeded - Self-cleaning memory (prunes expired entries on each check) Exception sanitization: - Discord bot no longer exposes raw exception text to users - Error embeds show generic "Something went wrong" message - Full exception details logged server-side with context - query_chat_api RuntimeError no longer includes response body Async correctness: - Wrap synchronous RuleRepository.search() in run_in_executor() to prevent blocking the event loop during SentenceTransformer inference - Port contract stays synchronous; service owns the async boundary Test coverage: 101 passed, 1 skipped (11 new tests for auth + rate limiting) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
114 lines
3.7 KiB
Python
114 lines
3.7 KiB
Python
"""Domain services — core business logic with no framework dependencies.
|
|
|
|
ChatService orchestrates the Q&A flow using only domain ports.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from typing import Optional
|
|
|
|
from .models import ChatResult
|
|
from .ports import RuleRepository, LLMPort, ConversationStore, IssueTracker
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
CONFIDENCE_THRESHOLD = 0.4
|
|
|
|
|
|
class ChatService:
|
|
"""Orchestrates the rules Q&A use case.
|
|
|
|
All external dependencies are injected via ports — this class has zero
|
|
knowledge of ChromaDB, OpenRouter, SQLite, or Gitea.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
rules: RuleRepository,
|
|
llm: LLMPort,
|
|
conversations: ConversationStore,
|
|
issues: Optional[IssueTracker] = None,
|
|
top_k_rules: int = 10,
|
|
):
|
|
self.rules = rules
|
|
self.llm = llm
|
|
self.conversations = conversations
|
|
self.issues = issues
|
|
self.top_k_rules = top_k_rules
|
|
|
|
async def answer_question(
|
|
self,
|
|
message: str,
|
|
user_id: str,
|
|
channel_id: str,
|
|
conversation_id: Optional[str] = None,
|
|
parent_message_id: Optional[str] = None,
|
|
) -> ChatResult:
|
|
"""Full Q&A flow: search rules → get history → call LLM → persist → maybe create issue."""
|
|
# Get or create conversation
|
|
conv_id = await self.conversations.get_or_create_conversation(
|
|
user_id=user_id,
|
|
channel_id=channel_id,
|
|
conversation_id=conversation_id,
|
|
)
|
|
|
|
# Save user message
|
|
user_msg_id = await self.conversations.add_message(
|
|
conversation_id=conv_id,
|
|
content=message,
|
|
is_user=True,
|
|
parent_id=parent_message_id,
|
|
)
|
|
|
|
# Search for relevant rules — offload the synchronous (CPU-bound)
|
|
# RuleRepository.search() to a thread so the event loop is not blocked
|
|
# while SentenceTransformer encodes the query.
|
|
loop = asyncio.get_running_loop()
|
|
search_results = await loop.run_in_executor(
|
|
None,
|
|
lambda: self.rules.search(query=message, top_k=self.top_k_rules),
|
|
)
|
|
|
|
# Get conversation history for context
|
|
history = await self.conversations.get_conversation_history(conv_id, limit=10)
|
|
|
|
# Generate response from LLM
|
|
llm_response = await self.llm.generate_response(
|
|
question=message,
|
|
rules=search_results,
|
|
conversation_history=history,
|
|
)
|
|
|
|
# Save assistant message
|
|
assistant_msg_id = await self.conversations.add_message(
|
|
conversation_id=conv_id,
|
|
content=llm_response.answer,
|
|
is_user=False,
|
|
parent_id=user_msg_id,
|
|
)
|
|
|
|
# Create issue if confidence is low or human review needed
|
|
if self.issues and (
|
|
llm_response.needs_human or llm_response.confidence < CONFIDENCE_THRESHOLD
|
|
):
|
|
try:
|
|
await self.issues.create_unanswered_issue(
|
|
question=message,
|
|
user_id=user_id,
|
|
channel_id=channel_id,
|
|
attempted_rules=[r.rule_id for r in search_results],
|
|
conversation_id=conv_id,
|
|
)
|
|
except Exception:
|
|
logger.exception("Failed to create issue for unanswered question")
|
|
|
|
return ChatResult(
|
|
response=llm_response.answer,
|
|
conversation_id=conv_id,
|
|
message_id=assistant_msg_id,
|
|
parent_message_id=user_msg_id,
|
|
cited_rules=llm_response.cited_rules,
|
|
confidence=llm_response.confidence,
|
|
needs_human=llm_response.needs_human,
|
|
)
|