Domain layer (zero framework imports): - domain/models.py: pure dataclasses (RuleDocument, RuleSearchResult, Conversation, ChatMessage, LLMResponse, ChatResult) - domain/ports.py: ABC interfaces (RuleRepository, LLMPort, ConversationStore, IssueTracker) - domain/services.py: ChatService orchestrates Q&A flow using only ports Outbound adapters (implement domain ports): - adapters/outbound/openrouter.py: OpenRouterLLM with persistent httpx client, robust JSON parsing, regex citation fallback - adapters/outbound/sqlite_convos.py: SQLiteConversationStore with async_sessionmaker, timezone-aware datetimes, cleanup support - adapters/outbound/gitea_issues.py: GiteaIssueTracker with markdown injection protection (fenced code blocks) - adapters/outbound/chroma_rules.py: ChromaRuleRepository with clamped similarity scores Inbound adapter: - adapters/inbound/api.py: thin FastAPI router with input validation (max_length constraints), proper HTTP status codes (503 for missing LLM) Configuration & wiring: - config/settings.py: Pydantic v2 SettingsConfigDict (no module-level singleton) - config/container.py: create_app() factory with lifespan-managed DI - main.py: minimal entry point Test infrastructure (90 tests, all passing): - tests/fakes/: in-memory implementations of all 4 ports - tests/domain/: 26 tests for models and ChatService - tests/adapters/: 64 tests for all adapters using fakes/mocks - No real API calls, no model downloads, no disk I/O in fast tests Also fixes: aiosqlite version constraint (>=0.19.0), adds hatch build targets for new package layout. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
393 lines
14 KiB
Python
393 lines
14 KiB
Python
"""Tests for the OpenRouterLLM outbound adapter.
|
|
|
|
Tests cover:
|
|
- Successful JSON response parsing from the LLM
|
|
- JSON embedded in markdown code fences (```json ... ```)
|
|
- Plain-text fallback when JSON parsing fails completely
|
|
- HTTP error status codes raising RuntimeError
|
|
- Regex fallback for cited_rules when the LLM omits them but mentions rules in text
|
|
- Conversation history is forwarded correctly to the API
|
|
- The adapter returns domain.models.LLMResponse, not any legacy type
|
|
- close() shuts down the underlying httpx client
|
|
|
|
All HTTP calls are intercepted via unittest.mock so no real API key is needed.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from domain.models import LLMResponse, RuleSearchResult
|
|
from domain.ports import LLMPort
|
|
from adapters.outbound.openrouter import OpenRouterLLM
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _make_rules(*rule_ids: str) -> list[RuleSearchResult]:
|
|
"""Create minimal RuleSearchResult fixtures."""
|
|
return [
|
|
RuleSearchResult(
|
|
rule_id=rid,
|
|
title=f"Title for {rid}",
|
|
content=f"Content for rule {rid}.",
|
|
section="General",
|
|
similarity=0.9,
|
|
)
|
|
for rid in rule_ids
|
|
]
|
|
|
|
|
|
def _api_payload(content: str) -> dict:
|
|
"""Wrap a content string in the OpenRouter / OpenAI response envelope."""
|
|
return {"choices": [{"message": {"content": content}}]}
|
|
|
|
|
|
def _mock_http_response(
|
|
status_code: int = 200, body: dict | str | None = None
|
|
) -> MagicMock:
|
|
"""Build a mock httpx.Response with the given status and JSON body."""
|
|
resp = MagicMock()
|
|
resp.status_code = status_code
|
|
if isinstance(body, dict):
|
|
resp.json.return_value = body
|
|
resp.text = json.dumps(body)
|
|
else:
|
|
resp.json.side_effect = ValueError("not JSON")
|
|
resp.text = body or ""
|
|
return resp
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fixtures
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture()
|
|
def adapter() -> OpenRouterLLM:
|
|
"""Return an OpenRouterLLM with a mocked internal httpx.AsyncClient.
|
|
|
|
We patch httpx.AsyncClient so the adapter's __init__ wires up a mock
|
|
that we can control per-test through the returned instance.
|
|
"""
|
|
mock_client = AsyncMock()
|
|
with patch(
|
|
"adapters.outbound.openrouter.httpx.AsyncClient", return_value=mock_client
|
|
):
|
|
inst = OpenRouterLLM(api_key="test-key", model="test-model")
|
|
inst._http = mock_client
|
|
return inst
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Interface compliance
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_openrouter_llm_implements_port():
|
|
"""OpenRouterLLM must be a concrete implementation of LLMPort.
|
|
|
|
This catches missing abstract method overrides at class-definition time,
|
|
not just at instantiation time.
|
|
"""
|
|
assert issubclass(OpenRouterLLM, LLMPort)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Successful JSON response
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_successful_json_response(adapter: OpenRouterLLM):
|
|
"""A well-formed JSON body from the LLM should be parsed into LLMResponse.
|
|
|
|
Verifies that answer, cited_rules, confidence, and needs_human are all
|
|
mapped correctly from the parsed JSON.
|
|
"""
|
|
llm_json = {
|
|
"answer": "The runner advances one base.",
|
|
"cited_rules": ["5.2.1(b)", "5.2.2"],
|
|
"confidence": 0.9,
|
|
"needs_human": False,
|
|
}
|
|
api_body = _api_payload(json.dumps(llm_json))
|
|
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
|
|
|
|
result = await adapter.generate_response(
|
|
"Can the runner advance?", _make_rules("5.2.1(b)", "5.2.2")
|
|
)
|
|
|
|
assert isinstance(result, LLMResponse)
|
|
assert result.answer == "The runner advances one base."
|
|
assert "5.2.1(b)" in result.cited_rules
|
|
assert "5.2.2" in result.cited_rules
|
|
assert result.confidence == pytest.approx(0.9)
|
|
assert result.needs_human is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Markdown-fenced JSON response
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_markdown_fenced_json_response(adapter: OpenRouterLLM):
|
|
"""LLMs often wrap JSON in ```json ... ``` fences.
|
|
|
|
The adapter must strip the fences before parsing so responses formatted
|
|
this way are handled identically to bare JSON.
|
|
"""
|
|
llm_json = {
|
|
"answer": "No, the batter is out.",
|
|
"cited_rules": ["3.1"],
|
|
"confidence": 0.85,
|
|
"needs_human": False,
|
|
}
|
|
fenced_content = f"```json\n{json.dumps(llm_json)}\n```"
|
|
api_body = _api_payload(fenced_content)
|
|
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
|
|
|
|
result = await adapter.generate_response("Is the batter out?", _make_rules("3.1"))
|
|
|
|
assert isinstance(result, LLMResponse)
|
|
assert result.answer == "No, the batter is out."
|
|
assert result.cited_rules == ["3.1"]
|
|
assert result.confidence == pytest.approx(0.85)
|
|
assert result.needs_human is False
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Plain-text fallback (JSON parse failure)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_plain_text_fallback_on_parse_failure(adapter: OpenRouterLLM):
|
|
"""When the LLM returns plain text that cannot be parsed as JSON, the
|
|
adapter falls back gracefully:
|
|
- answer = raw content string
|
|
- cited_rules = []
|
|
- confidence = 0.0 (not 0.5, signalling unreliable parse)
|
|
- needs_human = True (not False, signalling human review needed)
|
|
"""
|
|
plain_text = "I'm not sure which rule covers this situation."
|
|
api_body = _api_payload(plain_text)
|
|
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
|
|
|
|
result = await adapter.generate_response("Which rule applies?", [])
|
|
|
|
assert isinstance(result, LLMResponse)
|
|
assert result.answer == plain_text
|
|
assert result.cited_rules == []
|
|
assert result.confidence == pytest.approx(0.0)
|
|
assert result.needs_human is True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# HTTP error codes
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_http_error_raises_runtime_error(adapter: OpenRouterLLM):
|
|
"""Non-200 HTTP status codes from the API must raise RuntimeError.
|
|
|
|
This ensures upstream callers (the service layer) can catch a predictable
|
|
exception type and decide whether to retry or surface an error message.
|
|
"""
|
|
error_body_text = "Rate limit exceeded"
|
|
resp = _mock_http_response(429, error_body_text)
|
|
adapter._http.post = AsyncMock(return_value=resp)
|
|
|
|
with pytest.raises(RuntimeError, match="429"):
|
|
await adapter.generate_response("Any question", [])
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_http_500_raises_runtime_error(adapter: OpenRouterLLM):
|
|
"""500 Internal Server Error from OpenRouter should also raise RuntimeError."""
|
|
resp = _mock_http_response(500, "Internal server error")
|
|
adapter._http.post = AsyncMock(return_value=resp)
|
|
|
|
with pytest.raises(RuntimeError, match="500"):
|
|
await adapter.generate_response("Any question", [])
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# cited_rules regex fallback
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cited_rules_regex_fallback(adapter: OpenRouterLLM):
|
|
"""When the LLM returns valid JSON but omits cited_rules (empty list),
|
|
the adapter should extract rule IDs mentioned in the answer text via regex
|
|
and populate cited_rules from those matches.
|
|
|
|
This preserves rule attribution even when the model forgets the field.
|
|
"""
|
|
llm_json = {
|
|
"answer": "According to Rule 5.2.1(b) the runner must advance. See also Rule 7.4.",
|
|
"cited_rules": [],
|
|
"confidence": 0.75,
|
|
"needs_human": False,
|
|
}
|
|
api_body = _api_payload(json.dumps(llm_json))
|
|
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
|
|
|
|
result = await adapter.generate_response(
|
|
"Advance question?", _make_rules("5.2.1(b)", "7.4")
|
|
)
|
|
|
|
assert isinstance(result, LLMResponse)
|
|
# Regex should have extracted both rule IDs from the answer text
|
|
assert "5.2.1(b)" in result.cited_rules
|
|
assert "7.4" in result.cited_rules
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cited_rules_regex_not_triggered_when_rules_present(
|
|
adapter: OpenRouterLLM,
|
|
):
|
|
"""When cited_rules is already populated by the LLM, the regex fallback
|
|
must NOT override it — to avoid double-adding or mangling IDs.
|
|
"""
|
|
llm_json = {
|
|
"answer": "Rule 5.2.1(b) says the runner advances.",
|
|
"cited_rules": ["5.2.1(b)"],
|
|
"confidence": 0.8,
|
|
"needs_human": False,
|
|
}
|
|
api_body = _api_payload(json.dumps(llm_json))
|
|
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
|
|
|
|
result = await adapter.generate_response(
|
|
"Advance question?", _make_rules("5.2.1(b)")
|
|
)
|
|
|
|
assert result.cited_rules == ["5.2.1(b)"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Conversation history forwarded correctly
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_conversation_history_included_in_request(adapter: OpenRouterLLM):
|
|
"""When conversation_history is provided it must appear in the messages list
|
|
sent to the API, interleaved between the system prompt and the new user turn.
|
|
|
|
We inspect the captured POST body to assert ordering and content.
|
|
"""
|
|
history = [
|
|
{"role": "user", "content": "Who bats first?"},
|
|
{"role": "assistant", "content": "The home team bats last."},
|
|
]
|
|
|
|
llm_json = {
|
|
"answer": "Yes, that is correct.",
|
|
"cited_rules": [],
|
|
"confidence": 0.8,
|
|
"needs_human": False,
|
|
}
|
|
api_body = _api_payload(json.dumps(llm_json))
|
|
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
|
|
|
|
await adapter.generate_response(
|
|
"Follow-up question?", [], conversation_history=history
|
|
)
|
|
|
|
call_kwargs = adapter._http.post.call_args
|
|
sent_json = (
|
|
call_kwargs.kwargs.get("json") or call_kwargs.args[1]
|
|
if call_kwargs.args
|
|
else call_kwargs.kwargs["json"]
|
|
)
|
|
messages = sent_json["messages"]
|
|
|
|
roles = [m["role"] for m in messages]
|
|
# system prompt first, history next, new user message last
|
|
assert roles[0] == "system"
|
|
assert {"role": "user", "content": "Who bats first?"} in messages
|
|
assert {"role": "assistant", "content": "The home team bats last."} in messages
|
|
# final message should be the new user turn
|
|
assert messages[-1]["role"] == "user"
|
|
assert "Follow-up question?" in messages[-1]["content"]
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_no_conversation_history_omitted_from_request(adapter: OpenRouterLLM):
|
|
"""When conversation_history is None or empty the messages list must only
|
|
contain the system prompt and the new user message — no history entries.
|
|
"""
|
|
llm_json = {
|
|
"answer": "Yes.",
|
|
"cited_rules": [],
|
|
"confidence": 0.9,
|
|
"needs_human": False,
|
|
}
|
|
api_body = _api_payload(json.dumps(llm_json))
|
|
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
|
|
|
|
await adapter.generate_response("Simple question?", [], conversation_history=None)
|
|
|
|
call_kwargs = adapter._http.post.call_args
|
|
sent_json = call_kwargs.kwargs.get("json") or call_kwargs.kwargs["json"]
|
|
messages = sent_json["messages"]
|
|
|
|
assert len(messages) == 2
|
|
assert messages[0]["role"] == "system"
|
|
assert messages[1]["role"] == "user"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# No rules context
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_no_rules_uses_not_found_message(adapter: OpenRouterLLM):
|
|
"""When rules is an empty list the user message sent to the API should
|
|
contain a clear indication that no relevant rules were found, rather than
|
|
an empty or misleading context block.
|
|
"""
|
|
llm_json = {
|
|
"answer": "I don't have a rule for this.",
|
|
"cited_rules": [],
|
|
"confidence": 0.1,
|
|
"needs_human": True,
|
|
}
|
|
api_body = _api_payload(json.dumps(llm_json))
|
|
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
|
|
|
|
await adapter.generate_response("Unknown rule question?", [])
|
|
|
|
call_kwargs = adapter._http.post.call_args
|
|
sent_json = call_kwargs.kwargs.get("json") or call_kwargs.kwargs["json"]
|
|
user_message = next(
|
|
m["content"] for m in sent_json["messages"] if m["role"] == "user"
|
|
)
|
|
assert "No relevant rules" in user_message
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# close()
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_close_shuts_down_http_client(adapter: OpenRouterLLM):
|
|
"""close() must await the underlying httpx.AsyncClient.aclose() so that
|
|
connection pools are released cleanly without leaving open sockets.
|
|
"""
|
|
adapter._http.aclose = AsyncMock()
|
|
await adapter.close()
|
|
adapter._http.aclose.assert_awaited_once()
|