strat-chatbot/adapters/outbound/openrouter.py
Cal Corum c3218f70c4 refactor: hexagonal architecture with ports & adapters, DI, and test-first development
Domain layer (zero framework imports):
- domain/models.py: pure dataclasses (RuleDocument, RuleSearchResult,
  Conversation, ChatMessage, LLMResponse, ChatResult)
- domain/ports.py: ABC interfaces (RuleRepository, LLMPort,
  ConversationStore, IssueTracker)
- domain/services.py: ChatService orchestrates Q&A flow using only ports

Outbound adapters (implement domain ports):
- adapters/outbound/openrouter.py: OpenRouterLLM with persistent httpx
  client, robust JSON parsing, regex citation fallback
- adapters/outbound/sqlite_convos.py: SQLiteConversationStore with
  async_sessionmaker, timezone-aware datetimes, cleanup support
- adapters/outbound/gitea_issues.py: GiteaIssueTracker with markdown
  injection protection (fenced code blocks)
- adapters/outbound/chroma_rules.py: ChromaRuleRepository with clamped
  similarity scores

Inbound adapter:
- adapters/inbound/api.py: thin FastAPI router with input validation
  (max_length constraints), proper HTTP status codes (503 for missing LLM)

Configuration & wiring:
- config/settings.py: Pydantic v2 SettingsConfigDict (no module-level singleton)
- config/container.py: create_app() factory with lifespan-managed DI
- main.py: minimal entry point

Test infrastructure (90 tests, all passing):
- tests/fakes/: in-memory implementations of all 4 ports
- tests/domain/: 26 tests for models and ChatService
- tests/adapters/: 64 tests for all adapters using fakes/mocks
- No real API calls, no model downloads, no disk I/O in fast tests

Also fixes: aiosqlite version constraint (>=0.19.0), adds hatch build
targets for new package layout.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 15:51:16 -05:00

254 lines
9.5 KiB
Python

"""OpenRouter outbound adapter — implements LLMPort via the OpenRouter API.
This module is the sole owner of:
- The SYSTEM_PROMPT for the Strat-O-Matic rules assistant
- All JSON parsing / extraction logic for LLM responses
- The persistent httpx.AsyncClient connection pool
It returns domain.models.LLMResponse exclusively; no legacy app.* types leak
through this boundary.
"""
from __future__ import annotations
import json
import logging
import re
from typing import Optional
import httpx
from domain.models import LLMResponse, RuleSearchResult
from domain.ports import LLMPort
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# System prompt
# ---------------------------------------------------------------------------
SYSTEM_PROMPT = """You are a helpful assistant for a Strat-O-Matic baseball league.
Your job is to answer questions about league rules and procedures using the provided rule excerpts.
CRITICAL RULES:
1. ONLY use information from the provided rules. If the rules don't contain the answer, say so clearly.
2. ALWAYS cite rule IDs when referencing a rule (e.g., "Rule 5.2.1(b) states that...")
3. If multiple rules are relevant, cite all of them.
4. If you're uncertain or the rules are ambiguous, say so and suggest asking a league administrator.
5. Keep responses concise but complete. Use examples when helpful from the rules.
6. Do NOT make up rules or infer beyond what's explicitly stated.
When answering:
- Start with a direct answer to the question
- Support with rule citations
- Include relevant details from the rules
- If no relevant rules found, explicitly state: "I don't have a rule that addresses this question."
Response format (JSON):
{
"answer": "Your response text",
"cited_rules": ["rule_id_1", "rule_id_2"],
"confidence": 0.0-1.0,
"needs_human": boolean
}
Higher confidence (0.8-1.0) when rules clearly answer the question.
Lower confidence (0.3-0.7) when rules partially address the question or are ambiguous.
Very low confidence (0.0-0.2) when rules don't address the question at all.
"""
# Regex for extracting rule IDs from free-text answers when cited_rules is empty.
# Matches patterns like "Rule 5.2.1(b)" or "Rule 7.4".
# The character class includes '.' so a sentence-ending period may be captured
# (e.g. "Rule 7.4." → raw match "7.4."). Matches are stripped of a trailing
# dot at the extraction site to normalise IDs like "7.4." → "7.4".
_RULE_ID_PATTERN = re.compile(r"Rule\s+([\d\.\(\)a-b]+)")
# ---------------------------------------------------------------------------
# Adapter
# ---------------------------------------------------------------------------
class OpenRouterLLM(LLMPort):
"""Outbound adapter that calls the OpenRouter chat completions API.
A single httpx.AsyncClient is reused across all calls (connection pooling).
Call ``await adapter.close()`` when tearing down to release the pool.
Args:
api_key: Bearer token for the OpenRouter API.
model: OpenRouter model identifier, e.g. ``"openai/gpt-4o-mini"``.
base_url: Full URL for the chat completions endpoint.
http_client: Optional pre-built httpx.AsyncClient (useful for testing).
When *None* a new client is created with a 120-second timeout.
"""
def __init__(
self,
api_key: str,
model: str,
base_url: str = "https://openrouter.ai/api/v1/chat/completions",
http_client: Optional[httpx.AsyncClient] = None,
) -> None:
if not api_key:
raise ValueError("api_key must not be empty")
self._api_key = api_key
self._model = model
self._base_url = base_url
self._http: httpx.AsyncClient = http_client or httpx.AsyncClient(timeout=120.0)
# ------------------------------------------------------------------
# LLMPort implementation
# ------------------------------------------------------------------
async def generate_response(
self,
question: str,
rules: list[RuleSearchResult],
conversation_history: Optional[list[dict[str, str]]] = None,
) -> LLMResponse:
"""Call the OpenRouter API and return a structured LLMResponse.
Args:
question: The user's natural-language question.
rules: Relevant rule excerpts retrieved from the knowledge base.
conversation_history: Optional list of prior ``{"role": ..., "content": ...}``
dicts. At most the last 6 messages are forwarded to stay within
token budgets.
Returns:
LLMResponse with ``answer``, ``cited_rules``, ``confidence``, and
``needs_human`` populated from the LLM's JSON reply. On parse
failure ``confidence=0.0`` and ``needs_human=True`` signal that
the raw response could not be structured reliably.
Raises:
RuntimeError: When the API returns a non-200 HTTP status.
"""
messages = self._build_messages(question, rules, conversation_history)
logger.debug(
"Sending request to OpenRouter model=%s messages=%d",
self._model,
len(messages),
)
response = await self._http.post(
self._base_url,
headers={
"Authorization": f"Bearer {self._api_key}",
"Content-Type": "application/json",
},
json={
"model": self._model,
"messages": messages,
"temperature": 0.3,
"max_tokens": 1000,
"top_p": 0.9,
},
)
if response.status_code != 200:
raise RuntimeError(
f"OpenRouter API error: {response.status_code} - {response.text}"
)
result = response.json()
content: str = result["choices"][0]["message"]["content"]
logger.debug("Received response content length=%d", len(content))
return self._parse_content(content, rules)
async def close(self) -> None:
"""Release the underlying HTTP connection pool.
Should be called when the adapter is no longer needed (e.g. on
application shutdown) to avoid resource leaks.
"""
await self._http.aclose()
# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------
def _build_messages(
self,
question: str,
rules: list[RuleSearchResult],
conversation_history: Optional[list[dict[str, str]]],
) -> list[dict[str, str]]:
"""Assemble the messages list for the API request."""
if rules:
rules_context = "\n\n".join(
f"Rule {r.rule_id}: {r.title}\n{r.content}" for r in rules
)
context_msg = (
f"Here are the relevant rules for the question:\n\n{rules_context}"
)
else:
context_msg = "No relevant rules were found in the knowledge base."
messages: list[dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}]
if conversation_history:
# Limit to last 6 messages (3 exchanges) to avoid token overflow
messages.extend(conversation_history[-6:])
user_message = (
f"{context_msg}\n\nUser question: {question}\n\n"
"Answer the question based on the rules provided."
)
messages.append({"role": "user", "content": user_message})
return messages
def _parse_content(
self, content: str, rules: list[RuleSearchResult]
) -> LLMResponse:
"""Parse the raw LLM content string into an LLMResponse.
Handles three cases in order:
1. JSON wrapped in a ```json ... ``` markdown fence.
2. Bare JSON string.
3. Plain text (fallback) — sets confidence=0.0, needs_human=True.
"""
try:
json_str = self._extract_json_string(content)
parsed = json.loads(json_str)
except (json.JSONDecodeError, KeyError, IndexError) as exc:
logger.warning("Failed to parse LLM response as JSON: %s", exc)
return LLMResponse(
answer=content,
cited_rules=[],
confidence=0.0,
needs_human=True,
)
cited_rules: list[str] = parsed.get("cited_rules", [])
# Regex fallback: if the model omitted cited_rules but mentioned rule
# IDs inline, extract them from the answer text so callers have
# attribution without losing information.
if not cited_rules and rules:
answer_text: str = parsed.get("answer", "")
# Strip a trailing dot from each match to handle sentence-ending
# punctuation (e.g. "Rule 7.4." → "7.4").
matches = [m.rstrip(".") for m in _RULE_ID_PATTERN.findall(answer_text)]
cited_rules = list(dict.fromkeys(matches)) # deduplicate, preserve order
return LLMResponse(
answer=parsed["answer"],
cited_rules=cited_rules,
confidence=float(parsed.get("confidence", 0.5)),
needs_human=bool(parsed.get("needs_human", False)),
)
@staticmethod
def _extract_json_string(content: str) -> str:
"""Strip optional markdown fences and return the raw JSON string."""
if "```json" in content:
return content.split("```json")[1].split("```")[0].strip()
return content.strip()