Domain layer (zero framework imports): - domain/models.py: pure dataclasses (RuleDocument, RuleSearchResult, Conversation, ChatMessage, LLMResponse, ChatResult) - domain/ports.py: ABC interfaces (RuleRepository, LLMPort, ConversationStore, IssueTracker) - domain/services.py: ChatService orchestrates Q&A flow using only ports Outbound adapters (implement domain ports): - adapters/outbound/openrouter.py: OpenRouterLLM with persistent httpx client, robust JSON parsing, regex citation fallback - adapters/outbound/sqlite_convos.py: SQLiteConversationStore with async_sessionmaker, timezone-aware datetimes, cleanup support - adapters/outbound/gitea_issues.py: GiteaIssueTracker with markdown injection protection (fenced code blocks) - adapters/outbound/chroma_rules.py: ChromaRuleRepository with clamped similarity scores Inbound adapter: - adapters/inbound/api.py: thin FastAPI router with input validation (max_length constraints), proper HTTP status codes (503 for missing LLM) Configuration & wiring: - config/settings.py: Pydantic v2 SettingsConfigDict (no module-level singleton) - config/container.py: create_app() factory with lifespan-managed DI - main.py: minimal entry point Test infrastructure (90 tests, all passing): - tests/fakes/: in-memory implementations of all 4 ports - tests/domain/: 26 tests for models and ChatService - tests/adapters/: 64 tests for all adapters using fakes/mocks - No real API calls, no model downloads, no disk I/O in fast tests Also fixes: aiosqlite version constraint (>=0.19.0), adds hatch build targets for new package layout. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
254 lines
9.5 KiB
Python
254 lines
9.5 KiB
Python
"""OpenRouter outbound adapter — implements LLMPort via the OpenRouter API.
|
|
|
|
This module is the sole owner of:
|
|
- The SYSTEM_PROMPT for the Strat-O-Matic rules assistant
|
|
- All JSON parsing / extraction logic for LLM responses
|
|
- The persistent httpx.AsyncClient connection pool
|
|
|
|
It returns domain.models.LLMResponse exclusively; no legacy app.* types leak
|
|
through this boundary.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
from typing import Optional
|
|
|
|
import httpx
|
|
|
|
from domain.models import LLMResponse, RuleSearchResult
|
|
from domain.ports import LLMPort
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# System prompt
|
|
# ---------------------------------------------------------------------------
|
|
|
|
SYSTEM_PROMPT = """You are a helpful assistant for a Strat-O-Matic baseball league.
|
|
Your job is to answer questions about league rules and procedures using the provided rule excerpts.
|
|
|
|
CRITICAL RULES:
|
|
1. ONLY use information from the provided rules. If the rules don't contain the answer, say so clearly.
|
|
2. ALWAYS cite rule IDs when referencing a rule (e.g., "Rule 5.2.1(b) states that...")
|
|
3. If multiple rules are relevant, cite all of them.
|
|
4. If you're uncertain or the rules are ambiguous, say so and suggest asking a league administrator.
|
|
5. Keep responses concise but complete. Use examples when helpful from the rules.
|
|
6. Do NOT make up rules or infer beyond what's explicitly stated.
|
|
|
|
When answering:
|
|
- Start with a direct answer to the question
|
|
- Support with rule citations
|
|
- Include relevant details from the rules
|
|
- If no relevant rules found, explicitly state: "I don't have a rule that addresses this question."
|
|
|
|
Response format (JSON):
|
|
{
|
|
"answer": "Your response text",
|
|
"cited_rules": ["rule_id_1", "rule_id_2"],
|
|
"confidence": 0.0-1.0,
|
|
"needs_human": boolean
|
|
}
|
|
|
|
Higher confidence (0.8-1.0) when rules clearly answer the question.
|
|
Lower confidence (0.3-0.7) when rules partially address the question or are ambiguous.
|
|
Very low confidence (0.0-0.2) when rules don't address the question at all.
|
|
"""
|
|
|
|
# Regex for extracting rule IDs from free-text answers when cited_rules is empty.
|
|
# Matches patterns like "Rule 5.2.1(b)" or "Rule 7.4".
|
|
# The character class includes '.' so a sentence-ending period may be captured
|
|
# (e.g. "Rule 7.4." → raw match "7.4."). Matches are stripped of a trailing
|
|
# dot at the extraction site to normalise IDs like "7.4." → "7.4".
|
|
_RULE_ID_PATTERN = re.compile(r"Rule\s+([\d\.\(\)a-b]+)")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Adapter
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class OpenRouterLLM(LLMPort):
|
|
"""Outbound adapter that calls the OpenRouter chat completions API.
|
|
|
|
A single httpx.AsyncClient is reused across all calls (connection pooling).
|
|
Call ``await adapter.close()`` when tearing down to release the pool.
|
|
|
|
Args:
|
|
api_key: Bearer token for the OpenRouter API.
|
|
model: OpenRouter model identifier, e.g. ``"openai/gpt-4o-mini"``.
|
|
base_url: Full URL for the chat completions endpoint.
|
|
http_client: Optional pre-built httpx.AsyncClient (useful for testing).
|
|
When *None* a new client is created with a 120-second timeout.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
api_key: str,
|
|
model: str,
|
|
base_url: str = "https://openrouter.ai/api/v1/chat/completions",
|
|
http_client: Optional[httpx.AsyncClient] = None,
|
|
) -> None:
|
|
if not api_key:
|
|
raise ValueError("api_key must not be empty")
|
|
self._api_key = api_key
|
|
self._model = model
|
|
self._base_url = base_url
|
|
self._http: httpx.AsyncClient = http_client or httpx.AsyncClient(timeout=120.0)
|
|
|
|
# ------------------------------------------------------------------
|
|
# LLMPort implementation
|
|
# ------------------------------------------------------------------
|
|
|
|
async def generate_response(
|
|
self,
|
|
question: str,
|
|
rules: list[RuleSearchResult],
|
|
conversation_history: Optional[list[dict[str, str]]] = None,
|
|
) -> LLMResponse:
|
|
"""Call the OpenRouter API and return a structured LLMResponse.
|
|
|
|
Args:
|
|
question: The user's natural-language question.
|
|
rules: Relevant rule excerpts retrieved from the knowledge base.
|
|
conversation_history: Optional list of prior ``{"role": ..., "content": ...}``
|
|
dicts. At most the last 6 messages are forwarded to stay within
|
|
token budgets.
|
|
|
|
Returns:
|
|
LLMResponse with ``answer``, ``cited_rules``, ``confidence``, and
|
|
``needs_human`` populated from the LLM's JSON reply. On parse
|
|
failure ``confidence=0.0`` and ``needs_human=True`` signal that
|
|
the raw response could not be structured reliably.
|
|
|
|
Raises:
|
|
RuntimeError: When the API returns a non-200 HTTP status.
|
|
"""
|
|
messages = self._build_messages(question, rules, conversation_history)
|
|
|
|
logger.debug(
|
|
"Sending request to OpenRouter model=%s messages=%d",
|
|
self._model,
|
|
len(messages),
|
|
)
|
|
|
|
response = await self._http.post(
|
|
self._base_url,
|
|
headers={
|
|
"Authorization": f"Bearer {self._api_key}",
|
|
"Content-Type": "application/json",
|
|
},
|
|
json={
|
|
"model": self._model,
|
|
"messages": messages,
|
|
"temperature": 0.3,
|
|
"max_tokens": 1000,
|
|
"top_p": 0.9,
|
|
},
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
raise RuntimeError(
|
|
f"OpenRouter API error: {response.status_code} - {response.text}"
|
|
)
|
|
|
|
result = response.json()
|
|
content: str = result["choices"][0]["message"]["content"]
|
|
|
|
logger.debug("Received response content length=%d", len(content))
|
|
|
|
return self._parse_content(content, rules)
|
|
|
|
async def close(self) -> None:
|
|
"""Release the underlying HTTP connection pool.
|
|
|
|
Should be called when the adapter is no longer needed (e.g. on
|
|
application shutdown) to avoid resource leaks.
|
|
"""
|
|
await self._http.aclose()
|
|
|
|
# ------------------------------------------------------------------
|
|
# Private helpers
|
|
# ------------------------------------------------------------------
|
|
|
|
def _build_messages(
|
|
self,
|
|
question: str,
|
|
rules: list[RuleSearchResult],
|
|
conversation_history: Optional[list[dict[str, str]]],
|
|
) -> list[dict[str, str]]:
|
|
"""Assemble the messages list for the API request."""
|
|
if rules:
|
|
rules_context = "\n\n".join(
|
|
f"Rule {r.rule_id}: {r.title}\n{r.content}" for r in rules
|
|
)
|
|
context_msg = (
|
|
f"Here are the relevant rules for the question:\n\n{rules_context}"
|
|
)
|
|
else:
|
|
context_msg = "No relevant rules were found in the knowledge base."
|
|
|
|
messages: list[dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}]
|
|
|
|
if conversation_history:
|
|
# Limit to last 6 messages (3 exchanges) to avoid token overflow
|
|
messages.extend(conversation_history[-6:])
|
|
|
|
user_message = (
|
|
f"{context_msg}\n\nUser question: {question}\n\n"
|
|
"Answer the question based on the rules provided."
|
|
)
|
|
messages.append({"role": "user", "content": user_message})
|
|
|
|
return messages
|
|
|
|
def _parse_content(
|
|
self, content: str, rules: list[RuleSearchResult]
|
|
) -> LLMResponse:
|
|
"""Parse the raw LLM content string into an LLMResponse.
|
|
|
|
Handles three cases in order:
|
|
1. JSON wrapped in a ```json ... ``` markdown fence.
|
|
2. Bare JSON string.
|
|
3. Plain text (fallback) — sets confidence=0.0, needs_human=True.
|
|
"""
|
|
try:
|
|
json_str = self._extract_json_string(content)
|
|
parsed = json.loads(json_str)
|
|
except (json.JSONDecodeError, KeyError, IndexError) as exc:
|
|
logger.warning("Failed to parse LLM response as JSON: %s", exc)
|
|
return LLMResponse(
|
|
answer=content,
|
|
cited_rules=[],
|
|
confidence=0.0,
|
|
needs_human=True,
|
|
)
|
|
|
|
cited_rules: list[str] = parsed.get("cited_rules", [])
|
|
|
|
# Regex fallback: if the model omitted cited_rules but mentioned rule
|
|
# IDs inline, extract them from the answer text so callers have
|
|
# attribution without losing information.
|
|
if not cited_rules and rules:
|
|
answer_text: str = parsed.get("answer", "")
|
|
# Strip a trailing dot from each match to handle sentence-ending
|
|
# punctuation (e.g. "Rule 7.4." → "7.4").
|
|
matches = [m.rstrip(".") for m in _RULE_ID_PATTERN.findall(answer_text)]
|
|
cited_rules = list(dict.fromkeys(matches)) # deduplicate, preserve order
|
|
|
|
return LLMResponse(
|
|
answer=parsed["answer"],
|
|
cited_rules=cited_rules,
|
|
confidence=float(parsed.get("confidence", 0.5)),
|
|
needs_human=bool(parsed.get("needs_human", False)),
|
|
)
|
|
|
|
@staticmethod
|
|
def _extract_json_string(content: str) -> str:
|
|
"""Strip optional markdown fences and return the raw JSON string."""
|
|
if "```json" in content:
|
|
return content.split("```json")[1].split("```")[0].strip()
|
|
return content.strip()
|