"""Tests for the OpenRouterLLM outbound adapter. Tests cover: - Successful JSON response parsing from the LLM - JSON embedded in markdown code fences (```json ... ```) - Plain-text fallback when JSON parsing fails completely - HTTP error status codes raising RuntimeError - Regex fallback for cited_rules when the LLM omits them but mentions rules in text - Conversation history is forwarded correctly to the API - The adapter returns domain.models.LLMResponse, not any legacy type - close() shuts down the underlying httpx client All HTTP calls are intercepted via unittest.mock so no real API key is needed. """ from __future__ import annotations import json from unittest.mock import AsyncMock, MagicMock, patch import pytest from domain.models import LLMResponse, RuleSearchResult from domain.ports import LLMPort from adapters.outbound.openrouter import OpenRouterLLM # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _make_rules(*rule_ids: str) -> list[RuleSearchResult]: """Create minimal RuleSearchResult fixtures.""" return [ RuleSearchResult( rule_id=rid, title=f"Title for {rid}", content=f"Content for rule {rid}.", section="General", similarity=0.9, ) for rid in rule_ids ] def _api_payload(content: str) -> dict: """Wrap a content string in the OpenRouter / OpenAI response envelope.""" return {"choices": [{"message": {"content": content}}]} def _mock_http_response( status_code: int = 200, body: dict | str | None = None ) -> MagicMock: """Build a mock httpx.Response with the given status and JSON body.""" resp = MagicMock() resp.status_code = status_code if isinstance(body, dict): resp.json.return_value = body resp.text = json.dumps(body) else: resp.json.side_effect = ValueError("not JSON") resp.text = body or "" return resp # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @pytest.fixture() def adapter() -> OpenRouterLLM: """Return an OpenRouterLLM with a mocked internal httpx.AsyncClient. We patch httpx.AsyncClient so the adapter's __init__ wires up a mock that we can control per-test through the returned instance. """ mock_client = AsyncMock() with patch( "adapters.outbound.openrouter.httpx.AsyncClient", return_value=mock_client ): inst = OpenRouterLLM(api_key="test-key", model="test-model") inst._http = mock_client return inst # --------------------------------------------------------------------------- # Interface compliance # --------------------------------------------------------------------------- def test_openrouter_llm_implements_port(): """OpenRouterLLM must be a concrete implementation of LLMPort. This catches missing abstract method overrides at class-definition time, not just at instantiation time. """ assert issubclass(OpenRouterLLM, LLMPort) # --------------------------------------------------------------------------- # Successful JSON response # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_successful_json_response(adapter: OpenRouterLLM): """A well-formed JSON body from the LLM should be parsed into LLMResponse. Verifies that answer, cited_rules, confidence, and needs_human are all mapped correctly from the parsed JSON. """ llm_json = { "answer": "The runner advances one base.", "cited_rules": ["5.2.1(b)", "5.2.2"], "confidence": 0.9, "needs_human": False, } api_body = _api_payload(json.dumps(llm_json)) adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body)) result = await adapter.generate_response( "Can the runner advance?", _make_rules("5.2.1(b)", "5.2.2") ) assert isinstance(result, LLMResponse) assert result.answer == "The runner advances one base." assert "5.2.1(b)" in result.cited_rules assert "5.2.2" in result.cited_rules assert result.confidence == pytest.approx(0.9) assert result.needs_human is False # --------------------------------------------------------------------------- # Markdown-fenced JSON response # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_markdown_fenced_json_response(adapter: OpenRouterLLM): """LLMs often wrap JSON in ```json ... ``` fences. The adapter must strip the fences before parsing so responses formatted this way are handled identically to bare JSON. """ llm_json = { "answer": "No, the batter is out.", "cited_rules": ["3.1"], "confidence": 0.85, "needs_human": False, } fenced_content = f"```json\n{json.dumps(llm_json)}\n```" api_body = _api_payload(fenced_content) adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body)) result = await adapter.generate_response("Is the batter out?", _make_rules("3.1")) assert isinstance(result, LLMResponse) assert result.answer == "No, the batter is out." assert result.cited_rules == ["3.1"] assert result.confidence == pytest.approx(0.85) assert result.needs_human is False # --------------------------------------------------------------------------- # Plain-text fallback (JSON parse failure) # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_plain_text_fallback_on_parse_failure(adapter: OpenRouterLLM): """When the LLM returns plain text that cannot be parsed as JSON, the adapter falls back gracefully: - answer = raw content string - cited_rules = [] - confidence = 0.0 (not 0.5, signalling unreliable parse) - needs_human = True (not False, signalling human review needed) """ plain_text = "I'm not sure which rule covers this situation." api_body = _api_payload(plain_text) adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body)) result = await adapter.generate_response("Which rule applies?", []) assert isinstance(result, LLMResponse) assert result.answer == plain_text assert result.cited_rules == [] assert result.confidence == pytest.approx(0.0) assert result.needs_human is True # --------------------------------------------------------------------------- # HTTP error codes # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_http_error_raises_runtime_error(adapter: OpenRouterLLM): """Non-200 HTTP status codes from the API must raise RuntimeError. This ensures upstream callers (the service layer) can catch a predictable exception type and decide whether to retry or surface an error message. """ error_body_text = "Rate limit exceeded" resp = _mock_http_response(429, error_body_text) adapter._http.post = AsyncMock(return_value=resp) with pytest.raises(RuntimeError, match="429"): await adapter.generate_response("Any question", []) @pytest.mark.asyncio async def test_http_500_raises_runtime_error(adapter: OpenRouterLLM): """500 Internal Server Error from OpenRouter should also raise RuntimeError.""" resp = _mock_http_response(500, "Internal server error") adapter._http.post = AsyncMock(return_value=resp) with pytest.raises(RuntimeError, match="500"): await adapter.generate_response("Any question", []) # --------------------------------------------------------------------------- # cited_rules regex fallback # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_cited_rules_regex_fallback(adapter: OpenRouterLLM): """When the LLM returns valid JSON but omits cited_rules (empty list), the adapter should extract rule IDs mentioned in the answer text via regex and populate cited_rules from those matches. This preserves rule attribution even when the model forgets the field. """ llm_json = { "answer": "According to Rule 5.2.1(b) the runner must advance. See also Rule 7.4.", "cited_rules": [], "confidence": 0.75, "needs_human": False, } api_body = _api_payload(json.dumps(llm_json)) adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body)) result = await adapter.generate_response( "Advance question?", _make_rules("5.2.1(b)", "7.4") ) assert isinstance(result, LLMResponse) # Regex should have extracted both rule IDs from the answer text assert "5.2.1(b)" in result.cited_rules assert "7.4" in result.cited_rules @pytest.mark.asyncio async def test_cited_rules_regex_not_triggered_when_rules_present( adapter: OpenRouterLLM, ): """When cited_rules is already populated by the LLM, the regex fallback must NOT override it — to avoid double-adding or mangling IDs. """ llm_json = { "answer": "Rule 5.2.1(b) says the runner advances.", "cited_rules": ["5.2.1(b)"], "confidence": 0.8, "needs_human": False, } api_body = _api_payload(json.dumps(llm_json)) adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body)) result = await adapter.generate_response( "Advance question?", _make_rules("5.2.1(b)") ) assert result.cited_rules == ["5.2.1(b)"] # --------------------------------------------------------------------------- # Conversation history forwarded correctly # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_conversation_history_included_in_request(adapter: OpenRouterLLM): """When conversation_history is provided it must appear in the messages list sent to the API, interleaved between the system prompt and the new user turn. We inspect the captured POST body to assert ordering and content. """ history = [ {"role": "user", "content": "Who bats first?"}, {"role": "assistant", "content": "The home team bats last."}, ] llm_json = { "answer": "Yes, that is correct.", "cited_rules": [], "confidence": 0.8, "needs_human": False, } api_body = _api_payload(json.dumps(llm_json)) adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body)) await adapter.generate_response( "Follow-up question?", [], conversation_history=history ) call_kwargs = adapter._http.post.call_args sent_json = ( call_kwargs.kwargs.get("json") or call_kwargs.args[1] if call_kwargs.args else call_kwargs.kwargs["json"] ) messages = sent_json["messages"] roles = [m["role"] for m in messages] # system prompt first, history next, new user message last assert roles[0] == "system" assert {"role": "user", "content": "Who bats first?"} in messages assert {"role": "assistant", "content": "The home team bats last."} in messages # final message should be the new user turn assert messages[-1]["role"] == "user" assert "Follow-up question?" in messages[-1]["content"] @pytest.mark.asyncio async def test_no_conversation_history_omitted_from_request(adapter: OpenRouterLLM): """When conversation_history is None or empty the messages list must only contain the system prompt and the new user message — no history entries. """ llm_json = { "answer": "Yes.", "cited_rules": [], "confidence": 0.9, "needs_human": False, } api_body = _api_payload(json.dumps(llm_json)) adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body)) await adapter.generate_response("Simple question?", [], conversation_history=None) call_kwargs = adapter._http.post.call_args sent_json = call_kwargs.kwargs.get("json") or call_kwargs.kwargs["json"] messages = sent_json["messages"] assert len(messages) == 2 assert messages[0]["role"] == "system" assert messages[1]["role"] == "user" # --------------------------------------------------------------------------- # No rules context # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_no_rules_uses_not_found_message(adapter: OpenRouterLLM): """When rules is an empty list the user message sent to the API should contain a clear indication that no relevant rules were found, rather than an empty or misleading context block. """ llm_json = { "answer": "I don't have a rule for this.", "cited_rules": [], "confidence": 0.1, "needs_human": True, } api_body = _api_payload(json.dumps(llm_json)) adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body)) await adapter.generate_response("Unknown rule question?", []) call_kwargs = adapter._http.post.call_args sent_json = call_kwargs.kwargs.get("json") or call_kwargs.kwargs["json"] user_message = next( m["content"] for m in sent_json["messages"] if m["role"] == "user" ) assert "No relevant rules" in user_message # --------------------------------------------------------------------------- # close() # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_close_shuts_down_http_client(adapter: OpenRouterLLM): """close() must await the underlying httpx.AsyncClient.aclose() so that connection pools are released cleanly without leaving open sockets. """ adapter._http.aclose = AsyncMock() await adapter.close() adapter._http.aclose.assert_awaited_once()