strat-chatbot/app/llm.py

"""OpenRouter LLM integration for answering rules questions."""

from typing import Optional
import json
import httpx
from .config import settings
from .models import RuleSearchResult, ChatResponse

SYSTEM_PROMPT = """You are a helpful assistant for a Strat-O-Matic baseball league.
Your job is to answer questions about league rules and procedures using the provided rule excerpts.

CRITICAL RULES:
1. ONLY use information from the provided rules. If the rules don't contain the answer, say so clearly.
2. ALWAYS cite rule IDs when referencing a rule (e.g., "Rule 5.2.1(b) states that...")
3. If multiple rules are relevant, cite all of them.
4. If you're uncertain or the rules are ambiguous, say so and suggest asking a league administrator.
5. Keep responses concise but complete. Use examples when helpful from the rules.
6. Do NOT make up rules or infer beyond what's explicitly stated.

When answering:
- Start with a direct answer to the question
- Support with rule citations
- Include relevant details from the rules
- If no relevant rules found, explicitly state: "I don't have a rule that addresses this question."

Response format (JSON):
{
    "answer": "Your response text",
    "cited_rules": ["rule_id_1", "rule_id_2"],
    "confidence": 0.0-1.0,
    "needs_human": boolean
}

Higher confidence (0.8-1.0) when rules clearly answer the question.
Lower confidence (0.3-0.7) when rules partially address the question or are ambiguous.
Very low confidence (0.0-0.2) when rules don't address the question at all.
"""


class OpenRouterClient:
    """Client for OpenRouter API."""

    def __init__(self):
        """Initialize the client."""
        self.api_key = settings.openrouter_api_key
        if not self.api_key:
            raise ValueError("OPENROUTER_API_KEY is required")
        self.model = settings.openrouter_model
        self.base_url = "https://openrouter.ai/api/v1/chat/completions"

    async def generate_response(
        self,
        question: str,
        rules: list[RuleSearchResult],
        conversation_history: Optional[list[dict]] = None,
    ) -> ChatResponse:
        """Generate a response using the LLM with retrieved rules as context."""
        # Build context from rules
        rules_context = "\n\n".join(
            [f"Rule {r.rule_id}: {r.title}\n{r.content}" for r in rules]
        )

        if rules:
            context_msg = (
                f"Here are the relevant rules for the question:\n\n{rules_context}"
            )
        else:
            context_msg = "No relevant rules were found in the knowledge base."

        # Build conversation history
        messages = [{"role": "system", "content": SYSTEM_PROMPT}]

        if conversation_history:
            # Add last few turns of conversation (limit to avoid token overflow)
            messages.extend(
                conversation_history[-6:]
            )  # Last 3 exchanges (user+assistant)

        # Add current question with context
        user_message = f"{context_msg}\n\nUser question: {question}\n\nAnswer the question based on the rules provided."
        messages.append({"role": "user", "content": user_message})

        # Call OpenRouter API
        async with httpx.AsyncClient(timeout=120.0) as client:
            response = await client.post(
                self.base_url,
                headers={
                    "Authorization": f"Bearer {self.api_key}",
                    "Content-Type": "application/json",
                },
                json={
                    "model": self.model,
                    "messages": messages,
                    "temperature": 0.3,
                    "max_tokens": 1000,
                    "top_p": 0.9,
                },
            )

            if response.status_code != 200:
                error_detail = response.text
                raise RuntimeError(
                    f"OpenRouter API error: {response.status_code} - {error_detail}"
                )

            result = response.json()
            content = result["choices"][0]["message"]["content"]

        # Parse the JSON response
        try:
            # Extract JSON from response (LLM might add markdown formatting)
            if "```json" in content:
                json_str = content.split("```json")[1].split("```")[0].strip()
            else:
                json_str = content.strip()

            parsed = json.loads(json_str)

            cited_rules = parsed.get("cited_rules", [])
            if not cited_rules and rules:
                # Fallback: extract rule IDs from the text if not properly returned
                import re

                rule_ids = re.findall(
                    r"Rule\s+([\d\.\(\)a-b]+)", parsed.get("answer", "")
                )
                cited_rules = list(set(rule_ids))

            return ChatResponse(
                response=parsed["answer"],
                conversation_id="",  # Will be set by caller
                message_id="",  # Will be set by caller
                cited_rules=cited_rules,
                confidence=float(parsed.get("confidence", 0.5)),
                needs_human=bool(parsed.get("needs_human", False)),
            )
        except (json.JSONDecodeError, KeyError) as e:
            # If parsing fails, return what we can extract
            return ChatResponse(
                response=content,
                conversation_id="",
                message_id="",
                cited_rules=[],
                confidence=0.5,
                needs_human=False,
            )


class MockLLMClient:
    """Mock LLM client for testing without API calls."""

    async def generate_response(
        self,
        question: str,
        rules: list[RuleSearchResult],
        conversation_history: Optional[list[dict]] = None,
    ) -> ChatResponse:
        """Return a mock response."""
        if rules:
            rule_list = ", ".join([r.rule_id for r in rules])
            answer = f"Based on rule(s) {rule_list}, here's what you need to know..."
        else:
            answer = "I don't have a rule that addresses this question. You should ask a league administrator."

        return ChatResponse(
            response=answer,
            conversation_id="",
            message_id="",
            cited_rules=[r.rule_id for r in rules],
            confidence=1.0 if rules else 0.0,
            needs_human=not rules,
        )


def get_llm_client(use_mock: bool = False):
    """Factory to get the appropriate LLM client."""
    if use_mock or not settings.openrouter_api_key:
        return MockLLMClient()
    return OpenRouterClient()