strat-chatbot/tests/domain/test_services.py

"""Tests for ChatService — the core use case, tested entirely with fakes."""

import pytest

from domain.models import RuleDocument
from domain.services import ChatService
from tests.fakes import (
    FakeRuleRepository,
    FakeLLM,
    FakeConversationStore,
    FakeIssueTracker,
)


@pytest.fixture
def rules_repo():
    repo = FakeRuleRepository()
    repo.add_documents(
        [
            RuleDocument(
                rule_id="5.2.1(b)",
                title="Stolen Base Attempts",
                section="Baserunning",
                content="When a runner attempts to steal a base, roll 2 dice.",
                source_file="rules.md",
            ),
            RuleDocument(
                rule_id="3.1",
                title="Pitching Overview",
                section="Pitching",
                content="The pitcher rolls for each at-bat using the pitching card.",
                source_file="rules.md",
            ),
        ]
    )
    return repo


@pytest.fixture
def llm():
    return FakeLLM()


@pytest.fixture
def conversations():
    return FakeConversationStore()


@pytest.fixture
def issues():
    return FakeIssueTracker()


@pytest.fixture
def service(rules_repo, llm, conversations, issues):
    return ChatService(
        rules=rules_repo,
        llm=llm,
        conversations=conversations,
        issues=issues,
    )


class TestChatServiceAnswerQuestion:
    """ChatService.answer_question orchestrates the full Q&A flow."""

    async def test_returns_answer_with_cited_rules(self, service):
        """When rules match the question, the LLM is called and rules are cited."""
        result = await service.answer_question(
            message="How do I steal a base?",
            user_id="user-1",
            channel_id="chan-1",
        )
        assert "5.2.1(b)" in result.cited_rules
        assert result.confidence == 0.9
        assert result.needs_human is False
        assert result.conversation_id  # should be a non-empty string
        assert result.message_id  # should be a non-empty string

    async def test_creates_conversation_and_messages(self, service, conversations):
        """The service should persist both user and assistant messages."""
        result = await service.answer_question(
            message="How do I steal?",
            user_id="user-1",
            channel_id="chan-1",
        )
        history = await conversations.get_conversation_history(result.conversation_id)
        assert len(history) == 2
        assert history[0]["role"] == "user"
        assert history[1]["role"] == "assistant"

    async def test_continues_existing_conversation(self, service, conversations):
        """Passing a conversation_id should reuse the existing conversation."""
        result1 = await service.answer_question(
            message="How do I steal?",
            user_id="user-1",
            channel_id="chan-1",
        )
        result2 = await service.answer_question(
            message="What about pickoffs?",
            user_id="user-1",
            channel_id="chan-1",
            conversation_id=result1.conversation_id,
            parent_message_id=result1.message_id,
        )
        assert result2.conversation_id == result1.conversation_id
        history = await conversations.get_conversation_history(result1.conversation_id)
        assert len(history) == 4  # 2 user + 2 assistant

    async def test_passes_conversation_history_to_llm(self, service, llm):
        """The LLM should receive conversation history for context."""
        result1 = await service.answer_question(
            message="How do I steal?",
            user_id="user-1",
            channel_id="chan-1",
        )
        await service.answer_question(
            message="Follow-up question",
            user_id="user-1",
            channel_id="chan-1",
            conversation_id=result1.conversation_id,
        )
        assert len(llm.calls) == 2
        second_call = llm.calls[1]
        assert second_call["history"] is not None
        assert len(second_call["history"]) >= 2

    async def test_searches_rules_with_user_question(self, service, rules_repo):
        """The service should search the rules repo with the user's question."""
        await service.answer_question(
            message="steal a base",
            user_id="u",
            channel_id="c",
        )
        # FakeLLM records what rules it received
        # If "steal" and "base" matched, the steal rule should be in there

    async def test_sets_parent_message_id(self, service):
        """The result should link the assistant message back to the user message."""
        result = await service.answer_question(
            message="question",
            user_id="u",
            channel_id="c",
        )
        assert result.parent_message_id is not None


class TestChatServiceIssueCreation:
    """When confidence is low or no rules match, a Gitea issue should be created."""

    async def test_creates_issue_on_low_confidence(
        self, rules_repo, conversations, issues
    ):
        """When the LLM returns low confidence, an issue is created."""
        low_confidence_llm = FakeLLM(default_confidence=0.2)
        service = ChatService(
            rules=rules_repo,
            llm=low_confidence_llm,
            conversations=conversations,
            issues=issues,
        )
        await service.answer_question(
            message="steal question",
            user_id="user-1",
            channel_id="chan-1",
        )
        assert len(issues.issues) == 1
        assert issues.issues[0]["question"] == "steal question"

    async def test_creates_issue_when_needs_human(
        self, rules_repo, conversations, issues
    ):
        """When LLM says needs_human, an issue is created regardless of confidence."""
        llm = FakeLLM(no_rules_confidence=0.1)
        service = ChatService(
            rules=rules_repo,
            llm=llm,
            conversations=conversations,
            issues=issues,
        )
        # Use a question that won't match any rules
        await service.answer_question(
            message="something completely unrelated xyz",
            user_id="user-1",
            channel_id="chan-1",
        )
        assert len(issues.issues) == 1

    async def test_no_issue_on_high_confidence(self, service, issues):
        """High confidence answers should not create issues."""
        await service.answer_question(
            message="steal a base",
            user_id="user-1",
            channel_id="chan-1",
        )
        assert len(issues.issues) == 0

    async def test_no_issue_tracker_configured(self, rules_repo, llm, conversations):
        """If no issue tracker is provided, low confidence should not crash."""
        service = ChatService(
            rules=rules_repo,
            llm=llm,
            conversations=conversations,
            issues=None,
        )
        # Should not raise even with low confidence LLM
        result = await service.answer_question(
            message="steal a base",
            user_id="user-1",
            channel_id="chan-1",
        )
        assert result.response


class TestChatServiceErrorHandling:
    """Service should handle adapter failures gracefully."""

    async def test_llm_error_propagates(self, rules_repo, conversations, issues):
        """If the LLM raises, the service should let it propagate."""
        error_llm = FakeLLM(force_error=RuntimeError("LLM is down"))
        service = ChatService(
            rules=rules_repo,
            llm=error_llm,
            conversations=conversations,
            issues=issues,
        )
        with pytest.raises(RuntimeError, match="LLM is down"):
            await service.answer_question(
                message="steal a base",
                user_id="user-1",
                channel_id="chan-1",
            )

    async def test_issue_creation_failure_does_not_crash(
        self, rules_repo, conversations
    ):
        """If the issue tracker fails, the answer should still be returned."""

        class FailingIssueTracker(FakeIssueTracker):
            async def create_unanswered_issue(self, **kwargs) -> str:
                raise RuntimeError("Gitea is down")

        low_llm = FakeLLM(default_confidence=0.2)
        service = ChatService(
            rules=rules_repo,
            llm=low_llm,
            conversations=conversations,
            issues=FailingIssueTracker(),
        )
        # Should return the answer even though issue creation failed
        result = await service.answer_question(
            message="steal a base",
            user_id="user-1",
            channel_id="chan-1",
        )
        assert result.response