- Add vector store with sentence-transformers for semantic search - FastAPI backend with /chat and /health endpoints - Conversation state persistence via SQLite - OpenRouter integration with structured JSON responses - Discord bot with /ask slash command and reply-based follow-ups - Automated Gitea issue creation for unanswered questions - Docker support with docker-compose for easy deployment - Example rule file and ingestion script - Comprehensive documentation in README
180 lines
6.5 KiB
Python
180 lines
6.5 KiB
Python
"""OpenRouter LLM integration for answering rules questions."""
|
|
|
|
from typing import Optional
|
|
import json
|
|
import httpx
|
|
from .config import settings
|
|
from .models import RuleSearchResult, ChatResponse
|
|
|
|
SYSTEM_PROMPT = """You are a helpful assistant for a Strat-O-Matic baseball league.
|
|
Your job is to answer questions about league rules and procedures using the provided rule excerpts.
|
|
|
|
CRITICAL RULES:
|
|
1. ONLY use information from the provided rules. If the rules don't contain the answer, say so clearly.
|
|
2. ALWAYS cite rule IDs when referencing a rule (e.g., "Rule 5.2.1(b) states that...")
|
|
3. If multiple rules are relevant, cite all of them.
|
|
4. If you're uncertain or the rules are ambiguous, say so and suggest asking a league administrator.
|
|
5. Keep responses concise but complete. Use examples when helpful from the rules.
|
|
6. Do NOT make up rules or infer beyond what's explicitly stated.
|
|
|
|
When answering:
|
|
- Start with a direct answer to the question
|
|
- Support with rule citations
|
|
- Include relevant details from the rules
|
|
- If no relevant rules found, explicitly state: "I don't have a rule that addresses this question."
|
|
|
|
Response format (JSON):
|
|
{
|
|
"answer": "Your response text",
|
|
"cited_rules": ["rule_id_1", "rule_id_2"],
|
|
"confidence": 0.0-1.0,
|
|
"needs_human": boolean
|
|
}
|
|
|
|
Higher confidence (0.8-1.0) when rules clearly answer the question.
|
|
Lower confidence (0.3-0.7) when rules partially address the question or are ambiguous.
|
|
Very low confidence (0.0-0.2) when rules don't address the question at all.
|
|
"""
|
|
|
|
|
|
class OpenRouterClient:
|
|
"""Client for OpenRouter API."""
|
|
|
|
def __init__(self):
|
|
"""Initialize the client."""
|
|
self.api_key = settings.openrouter_api_key
|
|
if not self.api_key:
|
|
raise ValueError("OPENROUTER_API_KEY is required")
|
|
self.model = settings.openrouter_model
|
|
self.base_url = "https://openrouter.ai/api/v1/chat/completions"
|
|
|
|
async def generate_response(
|
|
self,
|
|
question: str,
|
|
rules: list[RuleSearchResult],
|
|
conversation_history: Optional[list[dict]] = None,
|
|
) -> ChatResponse:
|
|
"""Generate a response using the LLM with retrieved rules as context."""
|
|
# Build context from rules
|
|
rules_context = "\n\n".join(
|
|
[f"Rule {r.rule_id}: {r.title}\n{r.content}" for r in rules]
|
|
)
|
|
|
|
if rules:
|
|
context_msg = (
|
|
f"Here are the relevant rules for the question:\n\n{rules_context}"
|
|
)
|
|
else:
|
|
context_msg = "No relevant rules were found in the knowledge base."
|
|
|
|
# Build conversation history
|
|
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
|
|
|
if conversation_history:
|
|
# Add last few turns of conversation (limit to avoid token overflow)
|
|
messages.extend(
|
|
conversation_history[-6:]
|
|
) # Last 3 exchanges (user+assistant)
|
|
|
|
# Add current question with context
|
|
user_message = f"{context_msg}\n\nUser question: {question}\n\nAnswer the question based on the rules provided."
|
|
messages.append({"role": "user", "content": user_message})
|
|
|
|
# Call OpenRouter API
|
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
response = await client.post(
|
|
self.base_url,
|
|
headers={
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
json={
|
|
"model": self.model,
|
|
"messages": messages,
|
|
"temperature": 0.3,
|
|
"max_tokens": 1000,
|
|
"top_p": 0.9,
|
|
},
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
error_detail = response.text
|
|
raise RuntimeError(
|
|
f"OpenRouter API error: {response.status_code} - {error_detail}"
|
|
)
|
|
|
|
result = response.json()
|
|
content = result["choices"][0]["message"]["content"]
|
|
|
|
# Parse the JSON response
|
|
try:
|
|
# Extract JSON from response (LLM might add markdown formatting)
|
|
if "```json" in content:
|
|
json_str = content.split("```json")[1].split("```")[0].strip()
|
|
else:
|
|
json_str = content.strip()
|
|
|
|
parsed = json.loads(json_str)
|
|
|
|
cited_rules = parsed.get("cited_rules", [])
|
|
if not cited_rules and rules:
|
|
# Fallback: extract rule IDs from the text if not properly returned
|
|
import re
|
|
|
|
rule_ids = re.findall(
|
|
r"Rule\s+([\d\.\(\)a-b]+)", parsed.get("answer", "")
|
|
)
|
|
cited_rules = list(set(rule_ids))
|
|
|
|
return ChatResponse(
|
|
response=parsed["answer"],
|
|
conversation_id="", # Will be set by caller
|
|
message_id="", # Will be set by caller
|
|
cited_rules=cited_rules,
|
|
confidence=float(parsed.get("confidence", 0.5)),
|
|
needs_human=bool(parsed.get("needs_human", False)),
|
|
)
|
|
except (json.JSONDecodeError, KeyError) as e:
|
|
# If parsing fails, return what we can extract
|
|
return ChatResponse(
|
|
response=content,
|
|
conversation_id="",
|
|
message_id="",
|
|
cited_rules=[],
|
|
confidence=0.5,
|
|
needs_human=False,
|
|
)
|
|
|
|
|
|
class MockLLMClient:
|
|
"""Mock LLM client for testing without API calls."""
|
|
|
|
async def generate_response(
|
|
self,
|
|
question: str,
|
|
rules: list[RuleSearchResult],
|
|
conversation_history: Optional[list[dict]] = None,
|
|
) -> ChatResponse:
|
|
"""Return a mock response."""
|
|
if rules:
|
|
rule_list = ", ".join([r.rule_id for r in rules])
|
|
answer = f"Based on rule(s) {rule_list}, here's what you need to know..."
|
|
else:
|
|
answer = "I don't have a rule that addresses this question. You should ask a league administrator."
|
|
|
|
return ChatResponse(
|
|
response=answer,
|
|
conversation_id="",
|
|
message_id="",
|
|
cited_rules=[r.rule_id for r in rules],
|
|
confidence=1.0 if rules else 0.0,
|
|
needs_human=not rules,
|
|
)
|
|
|
|
|
|
def get_llm_client(use_mock: bool = False):
|
|
"""Factory to get the appropriate LLM client."""
|
|
if use_mock or not settings.openrouter_api_key:
|
|
return MockLLMClient()
|
|
return OpenRouterClient()
|