strat-chatbot/app/llm.py
Cal Corum c42fea66ba feat: initial chatbot implementation with FastAPI, ChromaDB, Discord bot, and Gitea integration
- Add vector store with sentence-transformers for semantic search
- FastAPI backend with /chat and /health endpoints
- Conversation state persistence via SQLite
- OpenRouter integration with structured JSON responses
- Discord bot with /ask slash command and reply-based follow-ups
- Automated Gitea issue creation for unanswered questions
- Docker support with docker-compose for easy deployment
- Example rule file and ingestion script
- Comprehensive documentation in README
2026-03-08 15:19:26 -05:00

180 lines
6.5 KiB
Python

"""OpenRouter LLM integration for answering rules questions."""
from typing import Optional
import json
import httpx
from .config import settings
from .models import RuleSearchResult, ChatResponse
SYSTEM_PROMPT = """You are a helpful assistant for a Strat-O-Matic baseball league.
Your job is to answer questions about league rules and procedures using the provided rule excerpts.
CRITICAL RULES:
1. ONLY use information from the provided rules. If the rules don't contain the answer, say so clearly.
2. ALWAYS cite rule IDs when referencing a rule (e.g., "Rule 5.2.1(b) states that...")
3. If multiple rules are relevant, cite all of them.
4. If you're uncertain or the rules are ambiguous, say so and suggest asking a league administrator.
5. Keep responses concise but complete. Use examples when helpful from the rules.
6. Do NOT make up rules or infer beyond what's explicitly stated.
When answering:
- Start with a direct answer to the question
- Support with rule citations
- Include relevant details from the rules
- If no relevant rules found, explicitly state: "I don't have a rule that addresses this question."
Response format (JSON):
{
"answer": "Your response text",
"cited_rules": ["rule_id_1", "rule_id_2"],
"confidence": 0.0-1.0,
"needs_human": boolean
}
Higher confidence (0.8-1.0) when rules clearly answer the question.
Lower confidence (0.3-0.7) when rules partially address the question or are ambiguous.
Very low confidence (0.0-0.2) when rules don't address the question at all.
"""
class OpenRouterClient:
"""Client for OpenRouter API."""
def __init__(self):
"""Initialize the client."""
self.api_key = settings.openrouter_api_key
if not self.api_key:
raise ValueError("OPENROUTER_API_KEY is required")
self.model = settings.openrouter_model
self.base_url = "https://openrouter.ai/api/v1/chat/completions"
async def generate_response(
self,
question: str,
rules: list[RuleSearchResult],
conversation_history: Optional[list[dict]] = None,
) -> ChatResponse:
"""Generate a response using the LLM with retrieved rules as context."""
# Build context from rules
rules_context = "\n\n".join(
[f"Rule {r.rule_id}: {r.title}\n{r.content}" for r in rules]
)
if rules:
context_msg = (
f"Here are the relevant rules for the question:\n\n{rules_context}"
)
else:
context_msg = "No relevant rules were found in the knowledge base."
# Build conversation history
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
if conversation_history:
# Add last few turns of conversation (limit to avoid token overflow)
messages.extend(
conversation_history[-6:]
) # Last 3 exchanges (user+assistant)
# Add current question with context
user_message = f"{context_msg}\n\nUser question: {question}\n\nAnswer the question based on the rules provided."
messages.append({"role": "user", "content": user_message})
# Call OpenRouter API
async with httpx.AsyncClient(timeout=120.0) as client:
response = await client.post(
self.base_url,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
json={
"model": self.model,
"messages": messages,
"temperature": 0.3,
"max_tokens": 1000,
"top_p": 0.9,
},
)
if response.status_code != 200:
error_detail = response.text
raise RuntimeError(
f"OpenRouter API error: {response.status_code} - {error_detail}"
)
result = response.json()
content = result["choices"][0]["message"]["content"]
# Parse the JSON response
try:
# Extract JSON from response (LLM might add markdown formatting)
if "```json" in content:
json_str = content.split("```json")[1].split("```")[0].strip()
else:
json_str = content.strip()
parsed = json.loads(json_str)
cited_rules = parsed.get("cited_rules", [])
if not cited_rules and rules:
# Fallback: extract rule IDs from the text if not properly returned
import re
rule_ids = re.findall(
r"Rule\s+([\d\.\(\)a-b]+)", parsed.get("answer", "")
)
cited_rules = list(set(rule_ids))
return ChatResponse(
response=parsed["answer"],
conversation_id="", # Will be set by caller
message_id="", # Will be set by caller
cited_rules=cited_rules,
confidence=float(parsed.get("confidence", 0.5)),
needs_human=bool(parsed.get("needs_human", False)),
)
except (json.JSONDecodeError, KeyError) as e:
# If parsing fails, return what we can extract
return ChatResponse(
response=content,
conversation_id="",
message_id="",
cited_rules=[],
confidence=0.5,
needs_human=False,
)
class MockLLMClient:
"""Mock LLM client for testing without API calls."""
async def generate_response(
self,
question: str,
rules: list[RuleSearchResult],
conversation_history: Optional[list[dict]] = None,
) -> ChatResponse:
"""Return a mock response."""
if rules:
rule_list = ", ".join([r.rule_id for r in rules])
answer = f"Based on rule(s) {rule_list}, here's what you need to know..."
else:
answer = "I don't have a rule that addresses this question. You should ask a league administrator."
return ChatResponse(
response=answer,
conversation_id="",
message_id="",
cited_rules=[r.rule_id for r in rules],
confidence=1.0 if rules else 0.0,
needs_human=not rules,
)
def get_llm_client(use_mock: bool = False):
"""Factory to get the appropriate LLM client."""
if use_mock or not settings.openrouter_api_key:
return MockLLMClient()
return OpenRouterClient()