strat-chatbot/tests/adapters/test_openrouter.py
Cal Corum c3218f70c4 refactor: hexagonal architecture with ports & adapters, DI, and test-first development
Domain layer (zero framework imports):
- domain/models.py: pure dataclasses (RuleDocument, RuleSearchResult,
  Conversation, ChatMessage, LLMResponse, ChatResult)
- domain/ports.py: ABC interfaces (RuleRepository, LLMPort,
  ConversationStore, IssueTracker)
- domain/services.py: ChatService orchestrates Q&A flow using only ports

Outbound adapters (implement domain ports):
- adapters/outbound/openrouter.py: OpenRouterLLM with persistent httpx
  client, robust JSON parsing, regex citation fallback
- adapters/outbound/sqlite_convos.py: SQLiteConversationStore with
  async_sessionmaker, timezone-aware datetimes, cleanup support
- adapters/outbound/gitea_issues.py: GiteaIssueTracker with markdown
  injection protection (fenced code blocks)
- adapters/outbound/chroma_rules.py: ChromaRuleRepository with clamped
  similarity scores

Inbound adapter:
- adapters/inbound/api.py: thin FastAPI router with input validation
  (max_length constraints), proper HTTP status codes (503 for missing LLM)

Configuration & wiring:
- config/settings.py: Pydantic v2 SettingsConfigDict (no module-level singleton)
- config/container.py: create_app() factory with lifespan-managed DI
- main.py: minimal entry point

Test infrastructure (90 tests, all passing):
- tests/fakes/: in-memory implementations of all 4 ports
- tests/domain/: 26 tests for models and ChatService
- tests/adapters/: 64 tests for all adapters using fakes/mocks
- No real API calls, no model downloads, no disk I/O in fast tests

Also fixes: aiosqlite version constraint (>=0.19.0), adds hatch build
targets for new package layout.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 15:51:16 -05:00

393 lines
14 KiB
Python

"""Tests for the OpenRouterLLM outbound adapter.
Tests cover:
- Successful JSON response parsing from the LLM
- JSON embedded in markdown code fences (```json ... ```)
- Plain-text fallback when JSON parsing fails completely
- HTTP error status codes raising RuntimeError
- Regex fallback for cited_rules when the LLM omits them but mentions rules in text
- Conversation history is forwarded correctly to the API
- The adapter returns domain.models.LLMResponse, not any legacy type
- close() shuts down the underlying httpx client
All HTTP calls are intercepted via unittest.mock so no real API key is needed.
"""
from __future__ import annotations
import json
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from domain.models import LLMResponse, RuleSearchResult
from domain.ports import LLMPort
from adapters.outbound.openrouter import OpenRouterLLM
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_rules(*rule_ids: str) -> list[RuleSearchResult]:
"""Create minimal RuleSearchResult fixtures."""
return [
RuleSearchResult(
rule_id=rid,
title=f"Title for {rid}",
content=f"Content for rule {rid}.",
section="General",
similarity=0.9,
)
for rid in rule_ids
]
def _api_payload(content: str) -> dict:
"""Wrap a content string in the OpenRouter / OpenAI response envelope."""
return {"choices": [{"message": {"content": content}}]}
def _mock_http_response(
status_code: int = 200, body: dict | str | None = None
) -> MagicMock:
"""Build a mock httpx.Response with the given status and JSON body."""
resp = MagicMock()
resp.status_code = status_code
if isinstance(body, dict):
resp.json.return_value = body
resp.text = json.dumps(body)
else:
resp.json.side_effect = ValueError("not JSON")
resp.text = body or ""
return resp
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture()
def adapter() -> OpenRouterLLM:
"""Return an OpenRouterLLM with a mocked internal httpx.AsyncClient.
We patch httpx.AsyncClient so the adapter's __init__ wires up a mock
that we can control per-test through the returned instance.
"""
mock_client = AsyncMock()
with patch(
"adapters.outbound.openrouter.httpx.AsyncClient", return_value=mock_client
):
inst = OpenRouterLLM(api_key="test-key", model="test-model")
inst._http = mock_client
return inst
# ---------------------------------------------------------------------------
# Interface compliance
# ---------------------------------------------------------------------------
def test_openrouter_llm_implements_port():
"""OpenRouterLLM must be a concrete implementation of LLMPort.
This catches missing abstract method overrides at class-definition time,
not just at instantiation time.
"""
assert issubclass(OpenRouterLLM, LLMPort)
# ---------------------------------------------------------------------------
# Successful JSON response
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_successful_json_response(adapter: OpenRouterLLM):
"""A well-formed JSON body from the LLM should be parsed into LLMResponse.
Verifies that answer, cited_rules, confidence, and needs_human are all
mapped correctly from the parsed JSON.
"""
llm_json = {
"answer": "The runner advances one base.",
"cited_rules": ["5.2.1(b)", "5.2.2"],
"confidence": 0.9,
"needs_human": False,
}
api_body = _api_payload(json.dumps(llm_json))
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
result = await adapter.generate_response(
"Can the runner advance?", _make_rules("5.2.1(b)", "5.2.2")
)
assert isinstance(result, LLMResponse)
assert result.answer == "The runner advances one base."
assert "5.2.1(b)" in result.cited_rules
assert "5.2.2" in result.cited_rules
assert result.confidence == pytest.approx(0.9)
assert result.needs_human is False
# ---------------------------------------------------------------------------
# Markdown-fenced JSON response
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_markdown_fenced_json_response(adapter: OpenRouterLLM):
"""LLMs often wrap JSON in ```json ... ``` fences.
The adapter must strip the fences before parsing so responses formatted
this way are handled identically to bare JSON.
"""
llm_json = {
"answer": "No, the batter is out.",
"cited_rules": ["3.1"],
"confidence": 0.85,
"needs_human": False,
}
fenced_content = f"```json\n{json.dumps(llm_json)}\n```"
api_body = _api_payload(fenced_content)
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
result = await adapter.generate_response("Is the batter out?", _make_rules("3.1"))
assert isinstance(result, LLMResponse)
assert result.answer == "No, the batter is out."
assert result.cited_rules == ["3.1"]
assert result.confidence == pytest.approx(0.85)
assert result.needs_human is False
# ---------------------------------------------------------------------------
# Plain-text fallback (JSON parse failure)
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_plain_text_fallback_on_parse_failure(adapter: OpenRouterLLM):
"""When the LLM returns plain text that cannot be parsed as JSON, the
adapter falls back gracefully:
- answer = raw content string
- cited_rules = []
- confidence = 0.0 (not 0.5, signalling unreliable parse)
- needs_human = True (not False, signalling human review needed)
"""
plain_text = "I'm not sure which rule covers this situation."
api_body = _api_payload(plain_text)
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
result = await adapter.generate_response("Which rule applies?", [])
assert isinstance(result, LLMResponse)
assert result.answer == plain_text
assert result.cited_rules == []
assert result.confidence == pytest.approx(0.0)
assert result.needs_human is True
# ---------------------------------------------------------------------------
# HTTP error codes
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_http_error_raises_runtime_error(adapter: OpenRouterLLM):
"""Non-200 HTTP status codes from the API must raise RuntimeError.
This ensures upstream callers (the service layer) can catch a predictable
exception type and decide whether to retry or surface an error message.
"""
error_body_text = "Rate limit exceeded"
resp = _mock_http_response(429, error_body_text)
adapter._http.post = AsyncMock(return_value=resp)
with pytest.raises(RuntimeError, match="429"):
await adapter.generate_response("Any question", [])
@pytest.mark.asyncio
async def test_http_500_raises_runtime_error(adapter: OpenRouterLLM):
"""500 Internal Server Error from OpenRouter should also raise RuntimeError."""
resp = _mock_http_response(500, "Internal server error")
adapter._http.post = AsyncMock(return_value=resp)
with pytest.raises(RuntimeError, match="500"):
await adapter.generate_response("Any question", [])
# ---------------------------------------------------------------------------
# cited_rules regex fallback
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_cited_rules_regex_fallback(adapter: OpenRouterLLM):
"""When the LLM returns valid JSON but omits cited_rules (empty list),
the adapter should extract rule IDs mentioned in the answer text via regex
and populate cited_rules from those matches.
This preserves rule attribution even when the model forgets the field.
"""
llm_json = {
"answer": "According to Rule 5.2.1(b) the runner must advance. See also Rule 7.4.",
"cited_rules": [],
"confidence": 0.75,
"needs_human": False,
}
api_body = _api_payload(json.dumps(llm_json))
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
result = await adapter.generate_response(
"Advance question?", _make_rules("5.2.1(b)", "7.4")
)
assert isinstance(result, LLMResponse)
# Regex should have extracted both rule IDs from the answer text
assert "5.2.1(b)" in result.cited_rules
assert "7.4" in result.cited_rules
@pytest.mark.asyncio
async def test_cited_rules_regex_not_triggered_when_rules_present(
adapter: OpenRouterLLM,
):
"""When cited_rules is already populated by the LLM, the regex fallback
must NOT override it — to avoid double-adding or mangling IDs.
"""
llm_json = {
"answer": "Rule 5.2.1(b) says the runner advances.",
"cited_rules": ["5.2.1(b)"],
"confidence": 0.8,
"needs_human": False,
}
api_body = _api_payload(json.dumps(llm_json))
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
result = await adapter.generate_response(
"Advance question?", _make_rules("5.2.1(b)")
)
assert result.cited_rules == ["5.2.1(b)"]
# ---------------------------------------------------------------------------
# Conversation history forwarded correctly
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_conversation_history_included_in_request(adapter: OpenRouterLLM):
"""When conversation_history is provided it must appear in the messages list
sent to the API, interleaved between the system prompt and the new user turn.
We inspect the captured POST body to assert ordering and content.
"""
history = [
{"role": "user", "content": "Who bats first?"},
{"role": "assistant", "content": "The home team bats last."},
]
llm_json = {
"answer": "Yes, that is correct.",
"cited_rules": [],
"confidence": 0.8,
"needs_human": False,
}
api_body = _api_payload(json.dumps(llm_json))
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
await adapter.generate_response(
"Follow-up question?", [], conversation_history=history
)
call_kwargs = adapter._http.post.call_args
sent_json = (
call_kwargs.kwargs.get("json") or call_kwargs.args[1]
if call_kwargs.args
else call_kwargs.kwargs["json"]
)
messages = sent_json["messages"]
roles = [m["role"] for m in messages]
# system prompt first, history next, new user message last
assert roles[0] == "system"
assert {"role": "user", "content": "Who bats first?"} in messages
assert {"role": "assistant", "content": "The home team bats last."} in messages
# final message should be the new user turn
assert messages[-1]["role"] == "user"
assert "Follow-up question?" in messages[-1]["content"]
@pytest.mark.asyncio
async def test_no_conversation_history_omitted_from_request(adapter: OpenRouterLLM):
"""When conversation_history is None or empty the messages list must only
contain the system prompt and the new user message — no history entries.
"""
llm_json = {
"answer": "Yes.",
"cited_rules": [],
"confidence": 0.9,
"needs_human": False,
}
api_body = _api_payload(json.dumps(llm_json))
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
await adapter.generate_response("Simple question?", [], conversation_history=None)
call_kwargs = adapter._http.post.call_args
sent_json = call_kwargs.kwargs.get("json") or call_kwargs.kwargs["json"]
messages = sent_json["messages"]
assert len(messages) == 2
assert messages[0]["role"] == "system"
assert messages[1]["role"] == "user"
# ---------------------------------------------------------------------------
# No rules context
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_no_rules_uses_not_found_message(adapter: OpenRouterLLM):
"""When rules is an empty list the user message sent to the API should
contain a clear indication that no relevant rules were found, rather than
an empty or misleading context block.
"""
llm_json = {
"answer": "I don't have a rule for this.",
"cited_rules": [],
"confidence": 0.1,
"needs_human": True,
}
api_body = _api_payload(json.dumps(llm_json))
adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))
await adapter.generate_response("Unknown rule question?", [])
call_kwargs = adapter._http.post.call_args
sent_json = call_kwargs.kwargs.get("json") or call_kwargs.kwargs["json"]
user_message = next(
m["content"] for m in sent_json["messages"] if m["role"] == "user"
)
assert "No relevant rules" in user_message
# ---------------------------------------------------------------------------
# close()
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_close_shuts_down_http_client(adapter: OpenRouterLLM):
"""close() must await the underlying httpx.AsyncClient.aclose() so that
connection pools are released cleanly without leaving open sockets.
"""
adapter._http.aclose = AsyncMock()
await adapter.close()
adapter._http.aclose.assert_awaited_once()