strat-chatbot/tests/adapters/test_openrouter.py

"""Tests for the OpenRouterLLM outbound adapter.

Tests cover:
- Successful JSON response parsing from the LLM
- JSON embedded in markdown code fences (```json ... ```)
- Plain-text fallback when JSON parsing fails completely
- HTTP error status codes raising RuntimeError
- Regex fallback for cited_rules when the LLM omits them but mentions rules in text
- Conversation history is forwarded correctly to the API
- The adapter returns domain.models.LLMResponse, not any legacy type
- close() shuts down the underlying httpx client

All HTTP calls are intercepted via unittest.mock so no real API key is needed.
"""

from __future__ import annotations

import json
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from domain.models import LLMResponse, RuleSearchResult
from domain.ports import LLMPort
from adapters.outbound.openrouter import OpenRouterLLM

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _make_rules(*rule_ids: str) -> list[RuleSearchResult]:
    """Create minimal RuleSearchResult fixtures."""
    return [
        RuleSearchResult(
            rule_id=rid,
            title=f"Title for {rid}",
            content=f"Content for rule {rid}.",
            section="General",
            similarity=0.9,
        )
        for rid in rule_ids
    ]


def _api_payload(content: str) -> dict:
    """Wrap a content string in the OpenRouter / OpenAI response envelope."""
    return {"choices": [{"message": {"content": content}}]}


def _mock_http_response(
    status_code: int = 200, body: dict | str | None = None
) -> MagicMock:
    """Build a mock httpx.Response with the given status and JSON body."""
    resp = MagicMock()
    resp.status_code = status_code
    if isinstance(body, dict):
        resp.json.return_value = body
        resp.text = json.dumps(body)
    else:
        resp.json.side_effect = ValueError("not JSON")
        resp.text = body or ""
    return resp


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture()
def adapter() -> OpenRouterLLM:
    """Return an OpenRouterLLM with a mocked internal httpx.AsyncClient.

    We patch httpx.AsyncClient so the adapter's __init__ wires up a mock
    that we can control per-test through the returned instance.
    """
    mock_client = AsyncMock()
    with patch(
        "adapters.outbound.openrouter.httpx.AsyncClient", return_value=mock_client
    ):
        inst = OpenRouterLLM(api_key="test-key", model="test-model")
    inst._http = mock_client
    return inst


# ---------------------------------------------------------------------------
# Interface compliance
# ---------------------------------------------------------------------------


def test_openrouter_llm_implements_port():
    """OpenRouterLLM must be a concrete implementation of LLMPort.

    This catches missing abstract method overrides at class-definition time,
    not just at instantiation time.
    """
    assert issubclass(OpenRouterLLM, LLMPort)


# ---------------------------------------------------------------------------
# Successful JSON response
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_successful_json_response(adapter: OpenRouterLLM):
    """A well-formed JSON body from the LLM should be parsed into LLMResponse.

    Verifies that answer, cited_rules, confidence, and needs_human are all
    mapped correctly from the parsed JSON.
    """
    llm_json = {
        "answer": "The runner advances one base.",
        "cited_rules": ["5.2.1(b)", "5.2.2"],
        "confidence": 0.9,
        "needs_human": False,
    }
    api_body = _api_payload(json.dumps(llm_json))
    adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))

    result = await adapter.generate_response(
        "Can the runner advance?", _make_rules("5.2.1(b)", "5.2.2")
    )

    assert isinstance(result, LLMResponse)
    assert result.answer == "The runner advances one base."
    assert "5.2.1(b)" in result.cited_rules
    assert "5.2.2" in result.cited_rules
    assert result.confidence == pytest.approx(0.9)
    assert result.needs_human is False


# ---------------------------------------------------------------------------
# Markdown-fenced JSON response
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_markdown_fenced_json_response(adapter: OpenRouterLLM):
    """LLMs often wrap JSON in ```json ... ``` fences.

    The adapter must strip the fences before parsing so responses formatted
    this way are handled identically to bare JSON.
    """
    llm_json = {
        "answer": "No, the batter is out.",
        "cited_rules": ["3.1"],
        "confidence": 0.85,
        "needs_human": False,
    }
    fenced_content = f"```json\n{json.dumps(llm_json)}\n```"
    api_body = _api_payload(fenced_content)
    adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))

    result = await adapter.generate_response("Is the batter out?", _make_rules("3.1"))

    assert isinstance(result, LLMResponse)
    assert result.answer == "No, the batter is out."
    assert result.cited_rules == ["3.1"]
    assert result.confidence == pytest.approx(0.85)
    assert result.needs_human is False


# ---------------------------------------------------------------------------
# Plain-text fallback (JSON parse failure)
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_plain_text_fallback_on_parse_failure(adapter: OpenRouterLLM):
    """When the LLM returns plain text that cannot be parsed as JSON, the
    adapter falls back gracefully:
      - answer  = raw content string
      - cited_rules = []
      - confidence  = 0.0 (not 0.5, signalling unreliable parse)
      - needs_human = True  (not False, signalling human review needed)
    """
    plain_text = "I'm not sure which rule covers this situation."
    api_body = _api_payload(plain_text)
    adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))

    result = await adapter.generate_response("Which rule applies?", [])

    assert isinstance(result, LLMResponse)
    assert result.answer == plain_text
    assert result.cited_rules == []
    assert result.confidence == pytest.approx(0.0)
    assert result.needs_human is True


# ---------------------------------------------------------------------------
# HTTP error codes
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_http_error_raises_runtime_error(adapter: OpenRouterLLM):
    """Non-200 HTTP status codes from the API must raise RuntimeError.

    This ensures upstream callers (the service layer) can catch a predictable
    exception type and decide whether to retry or surface an error message.
    """
    error_body_text = "Rate limit exceeded"
    resp = _mock_http_response(429, error_body_text)
    adapter._http.post = AsyncMock(return_value=resp)

    with pytest.raises(RuntimeError, match="429"):
        await adapter.generate_response("Any question", [])


@pytest.mark.asyncio
async def test_http_500_raises_runtime_error(adapter: OpenRouterLLM):
    """500 Internal Server Error from OpenRouter should also raise RuntimeError."""
    resp = _mock_http_response(500, "Internal server error")
    adapter._http.post = AsyncMock(return_value=resp)

    with pytest.raises(RuntimeError, match="500"):
        await adapter.generate_response("Any question", [])


# ---------------------------------------------------------------------------
# cited_rules regex fallback
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_cited_rules_regex_fallback(adapter: OpenRouterLLM):
    """When the LLM returns valid JSON but omits cited_rules (empty list),
    the adapter should extract rule IDs mentioned in the answer text via regex
    and populate cited_rules from those matches.

    This preserves rule attribution even when the model forgets the field.
    """
    llm_json = {
        "answer": "According to Rule 5.2.1(b) the runner must advance. See also Rule 7.4.",
        "cited_rules": [],
        "confidence": 0.75,
        "needs_human": False,
    }
    api_body = _api_payload(json.dumps(llm_json))
    adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))

    result = await adapter.generate_response(
        "Advance question?", _make_rules("5.2.1(b)", "7.4")
    )

    assert isinstance(result, LLMResponse)
    # Regex should have extracted both rule IDs from the answer text
    assert "5.2.1(b)" in result.cited_rules
    assert "7.4" in result.cited_rules


@pytest.mark.asyncio
async def test_cited_rules_regex_not_triggered_when_rules_present(
    adapter: OpenRouterLLM,
):
    """When cited_rules is already populated by the LLM, the regex fallback
    must NOT override it — to avoid double-adding or mangling IDs.
    """
    llm_json = {
        "answer": "Rule 5.2.1(b) says the runner advances.",
        "cited_rules": ["5.2.1(b)"],
        "confidence": 0.8,
        "needs_human": False,
    }
    api_body = _api_payload(json.dumps(llm_json))
    adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))

    result = await adapter.generate_response(
        "Advance question?", _make_rules("5.2.1(b)")
    )

    assert result.cited_rules == ["5.2.1(b)"]


# ---------------------------------------------------------------------------
# Conversation history forwarded correctly
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_conversation_history_included_in_request(adapter: OpenRouterLLM):
    """When conversation_history is provided it must appear in the messages list
    sent to the API, interleaved between the system prompt and the new user turn.

    We inspect the captured POST body to assert ordering and content.
    """
    history = [
        {"role": "user", "content": "Who bats first?"},
        {"role": "assistant", "content": "The home team bats last."},
    ]

    llm_json = {
        "answer": "Yes, that is correct.",
        "cited_rules": [],
        "confidence": 0.8,
        "needs_human": False,
    }
    api_body = _api_payload(json.dumps(llm_json))
    adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))

    await adapter.generate_response(
        "Follow-up question?", [], conversation_history=history
    )

    call_kwargs = adapter._http.post.call_args
    sent_json = (
        call_kwargs.kwargs.get("json") or call_kwargs.args[1]
        if call_kwargs.args
        else call_kwargs.kwargs["json"]
    )
    messages = sent_json["messages"]

    roles = [m["role"] for m in messages]
    # system prompt first, history next, new user message last
    assert roles[0] == "system"
    assert {"role": "user", "content": "Who bats first?"} in messages
    assert {"role": "assistant", "content": "The home team bats last."} in messages
    # final message should be the new user turn
    assert messages[-1]["role"] == "user"
    assert "Follow-up question?" in messages[-1]["content"]


@pytest.mark.asyncio
async def test_no_conversation_history_omitted_from_request(adapter: OpenRouterLLM):
    """When conversation_history is None or empty the messages list must only
    contain the system prompt and the new user message — no history entries.
    """
    llm_json = {
        "answer": "Yes.",
        "cited_rules": [],
        "confidence": 0.9,
        "needs_human": False,
    }
    api_body = _api_payload(json.dumps(llm_json))
    adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))

    await adapter.generate_response("Simple question?", [], conversation_history=None)

    call_kwargs = adapter._http.post.call_args
    sent_json = call_kwargs.kwargs.get("json") or call_kwargs.kwargs["json"]
    messages = sent_json["messages"]

    assert len(messages) == 2
    assert messages[0]["role"] == "system"
    assert messages[1]["role"] == "user"


# ---------------------------------------------------------------------------
# No rules context
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_no_rules_uses_not_found_message(adapter: OpenRouterLLM):
    """When rules is an empty list the user message sent to the API should
    contain a clear indication that no relevant rules were found, rather than
    an empty or misleading context block.
    """
    llm_json = {
        "answer": "I don't have a rule for this.",
        "cited_rules": [],
        "confidence": 0.1,
        "needs_human": True,
    }
    api_body = _api_payload(json.dumps(llm_json))
    adapter._http.post = AsyncMock(return_value=_mock_http_response(200, api_body))

    await adapter.generate_response("Unknown rule question?", [])

    call_kwargs = adapter._http.post.call_args
    sent_json = call_kwargs.kwargs.get("json") or call_kwargs.kwargs["json"]
    user_message = next(
        m["content"] for m in sent_json["messages"] if m["role"] == "user"
    )
    assert "No relevant rules" in user_message


# ---------------------------------------------------------------------------
# close()
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_close_shuts_down_http_client(adapter: OpenRouterLLM):
    """close() must await the underlying httpx.AsyncClient.aclose() so that
    connection pools are released cleanly without leaving open sockets.
    """
    adapter._http.aclose = AsyncMock()
    await adapter.close()
    adapter._http.aclose.assert_awaited_once()