"""OpenRouter outbound adapter — implements LLMPort via the OpenRouter API. This module is the sole owner of: - The SYSTEM_PROMPT for the Strat-O-Matic rules assistant - All JSON parsing / extraction logic for LLM responses - The persistent httpx.AsyncClient connection pool It returns domain.models.LLMResponse exclusively; no legacy app.* types leak through this boundary. """ from __future__ import annotations import json import logging import re from typing import Optional import httpx from domain.models import LLMResponse, RuleSearchResult from domain.ports import LLMPort logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # System prompt # --------------------------------------------------------------------------- SYSTEM_PROMPT = """You are a helpful assistant for a Strat-O-Matic baseball league. Your job is to answer questions about league rules and procedures using the provided rule excerpts. CRITICAL RULES: 1. ONLY use information from the provided rules. If the rules don't contain the answer, say so clearly. 2. ALWAYS cite rule IDs when referencing a rule (e.g., "Rule 5.2.1(b) states that...") 3. If multiple rules are relevant, cite all of them. 4. If you're uncertain or the rules are ambiguous, say so and suggest asking a league administrator. 5. Keep responses concise but complete. Use examples when helpful from the rules. 6. Do NOT make up rules or infer beyond what's explicitly stated. When answering: - Start with a direct answer to the question - Support with rule citations - Include relevant details from the rules - If no relevant rules found, explicitly state: "I don't have a rule that addresses this question." Response format (JSON): { "answer": "Your response text", "cited_rules": ["rule_id_1", "rule_id_2"], "confidence": 0.0-1.0, "needs_human": boolean } Higher confidence (0.8-1.0) when rules clearly answer the question. Lower confidence (0.3-0.7) when rules partially address the question or are ambiguous. Very low confidence (0.0-0.2) when rules don't address the question at all. """ # Regex for extracting rule IDs from free-text answers when cited_rules is empty. # Matches patterns like "Rule 5.2.1(b)" or "Rule 7.4". # The character class includes '.' so a sentence-ending period may be captured # (e.g. "Rule 7.4." → raw match "7.4."). Matches are stripped of a trailing # dot at the extraction site to normalise IDs like "7.4." → "7.4". _RULE_ID_PATTERN = re.compile(r"Rule\s+([\d\.\(\)a-b]+)") # --------------------------------------------------------------------------- # Adapter # --------------------------------------------------------------------------- class OpenRouterLLM(LLMPort): """Outbound adapter that calls the OpenRouter chat completions API. A single httpx.AsyncClient is reused across all calls (connection pooling). Call ``await adapter.close()`` when tearing down to release the pool. Args: api_key: Bearer token for the OpenRouter API. model: OpenRouter model identifier, e.g. ``"openai/gpt-4o-mini"``. base_url: Full URL for the chat completions endpoint. http_client: Optional pre-built httpx.AsyncClient (useful for testing). When *None* a new client is created with a 120-second timeout. """ def __init__( self, api_key: str, model: str, base_url: str = "https://openrouter.ai/api/v1/chat/completions", http_client: Optional[httpx.AsyncClient] = None, ) -> None: if not api_key: raise ValueError("api_key must not be empty") self._api_key = api_key self._model = model self._base_url = base_url self._http: httpx.AsyncClient = http_client or httpx.AsyncClient(timeout=120.0) # ------------------------------------------------------------------ # LLMPort implementation # ------------------------------------------------------------------ async def generate_response( self, question: str, rules: list[RuleSearchResult], conversation_history: Optional[list[dict[str, str]]] = None, ) -> LLMResponse: """Call the OpenRouter API and return a structured LLMResponse. Args: question: The user's natural-language question. rules: Relevant rule excerpts retrieved from the knowledge base. conversation_history: Optional list of prior ``{"role": ..., "content": ...}`` dicts. At most the last 6 messages are forwarded to stay within token budgets. Returns: LLMResponse with ``answer``, ``cited_rules``, ``confidence``, and ``needs_human`` populated from the LLM's JSON reply. On parse failure ``confidence=0.0`` and ``needs_human=True`` signal that the raw response could not be structured reliably. Raises: RuntimeError: When the API returns a non-200 HTTP status. """ messages = self._build_messages(question, rules, conversation_history) logger.debug( "Sending request to OpenRouter model=%s messages=%d", self._model, len(messages), ) response = await self._http.post( self._base_url, headers={ "Authorization": f"Bearer {self._api_key}", "Content-Type": "application/json", }, json={ "model": self._model, "messages": messages, "temperature": 0.3, "max_tokens": 1000, "top_p": 0.9, }, ) if response.status_code != 200: raise RuntimeError( f"OpenRouter API error: {response.status_code} - {response.text}" ) result = response.json() content: str = result["choices"][0]["message"]["content"] logger.debug("Received response content length=%d", len(content)) return self._parse_content(content, rules) async def close(self) -> None: """Release the underlying HTTP connection pool. Should be called when the adapter is no longer needed (e.g. on application shutdown) to avoid resource leaks. """ await self._http.aclose() # ------------------------------------------------------------------ # Private helpers # ------------------------------------------------------------------ def _build_messages( self, question: str, rules: list[RuleSearchResult], conversation_history: Optional[list[dict[str, str]]], ) -> list[dict[str, str]]: """Assemble the messages list for the API request.""" if rules: rules_context = "\n\n".join( f"Rule {r.rule_id}: {r.title}\n{r.content}" for r in rules ) context_msg = ( f"Here are the relevant rules for the question:\n\n{rules_context}" ) else: context_msg = "No relevant rules were found in the knowledge base." messages: list[dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}] if conversation_history: # Limit to last 6 messages (3 exchanges) to avoid token overflow messages.extend(conversation_history[-6:]) user_message = ( f"{context_msg}\n\nUser question: {question}\n\n" "Answer the question based on the rules provided." ) messages.append({"role": "user", "content": user_message}) return messages def _parse_content( self, content: str, rules: list[RuleSearchResult] ) -> LLMResponse: """Parse the raw LLM content string into an LLMResponse. Handles three cases in order: 1. JSON wrapped in a ```json ... ``` markdown fence. 2. Bare JSON string. 3. Plain text (fallback) — sets confidence=0.0, needs_human=True. """ try: json_str = self._extract_json_string(content) parsed = json.loads(json_str) except (json.JSONDecodeError, KeyError, IndexError) as exc: logger.warning("Failed to parse LLM response as JSON: %s", exc) return LLMResponse( answer=content, cited_rules=[], confidence=0.0, needs_human=True, ) cited_rules: list[str] = parsed.get("cited_rules", []) # Regex fallback: if the model omitted cited_rules but mentioned rule # IDs inline, extract them from the answer text so callers have # attribution without losing information. if not cited_rules and rules: answer_text: str = parsed.get("answer", "") # Strip a trailing dot from each match to handle sentence-ending # punctuation (e.g. "Rule 7.4." → "7.4"). matches = [m.rstrip(".") for m in _RULE_ID_PATTERN.findall(answer_text)] cited_rules = list(dict.fromkeys(matches)) # deduplicate, preserve order return LLMResponse( answer=parsed["answer"], cited_rules=cited_rules, confidence=float(parsed.get("confidence", 0.5)), needs_human=bool(parsed.get("needs_human", False)), ) @staticmethod def _extract_json_string(content: str) -> str: """Strip optional markdown fences and return the raw JSON string.""" if "```json" in content: return content.split("```json")[1].split("```")[0].strip() return content.strip()