strat-chatbot/domain/services.py
Cal Corum 43d36ce439 fix: resolve HIGH-severity issues from code review
API authentication:
- Add X-API-Secret shared-secret header validation on /chat and /stats
- /health remains public for monitoring
- Auth is a no-op when API_SECRET is empty (dev mode)

Rate limiting:
- Add per-user sliding-window rate limiter on /chat (10 req/60s default)
- Returns 429 with clear message when exceeded
- Self-cleaning memory (prunes expired entries on each check)

Exception sanitization:
- Discord bot no longer exposes raw exception text to users
- Error embeds show generic "Something went wrong" message
- Full exception details logged server-side with context
- query_chat_api RuntimeError no longer includes response body

Async correctness:
- Wrap synchronous RuleRepository.search() in run_in_executor()
  to prevent blocking the event loop during SentenceTransformer inference
- Port contract stays synchronous; service owns the async boundary

Test coverage: 101 passed, 1 skipped (11 new tests for auth + rate limiting)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 16:00:26 -05:00

114 lines
3.7 KiB
Python

"""Domain services — core business logic with no framework dependencies.
ChatService orchestrates the Q&A flow using only domain ports.
"""
import asyncio
import logging
from typing import Optional
from .models import ChatResult
from .ports import RuleRepository, LLMPort, ConversationStore, IssueTracker
logger = logging.getLogger(__name__)
CONFIDENCE_THRESHOLD = 0.4
class ChatService:
"""Orchestrates the rules Q&A use case.
All external dependencies are injected via ports — this class has zero
knowledge of ChromaDB, OpenRouter, SQLite, or Gitea.
"""
def __init__(
self,
rules: RuleRepository,
llm: LLMPort,
conversations: ConversationStore,
issues: Optional[IssueTracker] = None,
top_k_rules: int = 10,
):
self.rules = rules
self.llm = llm
self.conversations = conversations
self.issues = issues
self.top_k_rules = top_k_rules
async def answer_question(
self,
message: str,
user_id: str,
channel_id: str,
conversation_id: Optional[str] = None,
parent_message_id: Optional[str] = None,
) -> ChatResult:
"""Full Q&A flow: search rules → get history → call LLM → persist → maybe create issue."""
# Get or create conversation
conv_id = await self.conversations.get_or_create_conversation(
user_id=user_id,
channel_id=channel_id,
conversation_id=conversation_id,
)
# Save user message
user_msg_id = await self.conversations.add_message(
conversation_id=conv_id,
content=message,
is_user=True,
parent_id=parent_message_id,
)
# Search for relevant rules — offload the synchronous (CPU-bound)
# RuleRepository.search() to a thread so the event loop is not blocked
# while SentenceTransformer encodes the query.
loop = asyncio.get_running_loop()
search_results = await loop.run_in_executor(
None,
lambda: self.rules.search(query=message, top_k=self.top_k_rules),
)
# Get conversation history for context
history = await self.conversations.get_conversation_history(conv_id, limit=10)
# Generate response from LLM
llm_response = await self.llm.generate_response(
question=message,
rules=search_results,
conversation_history=history,
)
# Save assistant message
assistant_msg_id = await self.conversations.add_message(
conversation_id=conv_id,
content=llm_response.answer,
is_user=False,
parent_id=user_msg_id,
)
# Create issue if confidence is low or human review needed
if self.issues and (
llm_response.needs_human or llm_response.confidence < CONFIDENCE_THRESHOLD
):
try:
await self.issues.create_unanswered_issue(
question=message,
user_id=user_id,
channel_id=channel_id,
attempted_rules=[r.rule_id for r in search_results],
conversation_id=conv_id,
)
except Exception:
logger.exception("Failed to create issue for unanswered question")
return ChatResult(
response=llm_response.answer,
conversation_id=conv_id,
message_id=assistant_msg_id,
parent_message_id=user_msg_id,
cited_rules=llm_response.cited_rules,
confidence=llm_response.confidence,
needs_human=llm_response.needs_human,
)