strat-chatbot/tests/domain/test_services.py
Cal Corum c3218f70c4 refactor: hexagonal architecture with ports & adapters, DI, and test-first development
Domain layer (zero framework imports):
- domain/models.py: pure dataclasses (RuleDocument, RuleSearchResult,
  Conversation, ChatMessage, LLMResponse, ChatResult)
- domain/ports.py: ABC interfaces (RuleRepository, LLMPort,
  ConversationStore, IssueTracker)
- domain/services.py: ChatService orchestrates Q&A flow using only ports

Outbound adapters (implement domain ports):
- adapters/outbound/openrouter.py: OpenRouterLLM with persistent httpx
  client, robust JSON parsing, regex citation fallback
- adapters/outbound/sqlite_convos.py: SQLiteConversationStore with
  async_sessionmaker, timezone-aware datetimes, cleanup support
- adapters/outbound/gitea_issues.py: GiteaIssueTracker with markdown
  injection protection (fenced code blocks)
- adapters/outbound/chroma_rules.py: ChromaRuleRepository with clamped
  similarity scores

Inbound adapter:
- adapters/inbound/api.py: thin FastAPI router with input validation
  (max_length constraints), proper HTTP status codes (503 for missing LLM)

Configuration & wiring:
- config/settings.py: Pydantic v2 SettingsConfigDict (no module-level singleton)
- config/container.py: create_app() factory with lifespan-managed DI
- main.py: minimal entry point

Test infrastructure (90 tests, all passing):
- tests/fakes/: in-memory implementations of all 4 ports
- tests/domain/: 26 tests for models and ChatService
- tests/adapters/: 64 tests for all adapters using fakes/mocks
- No real API calls, no model downloads, no disk I/O in fast tests

Also fixes: aiosqlite version constraint (>=0.19.0), adds hatch build
targets for new package layout.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 15:51:16 -05:00

257 lines
8.6 KiB
Python

"""Tests for ChatService — the core use case, tested entirely with fakes."""
import pytest
from domain.models import RuleDocument
from domain.services import ChatService
from tests.fakes import (
FakeRuleRepository,
FakeLLM,
FakeConversationStore,
FakeIssueTracker,
)
@pytest.fixture
def rules_repo():
repo = FakeRuleRepository()
repo.add_documents(
[
RuleDocument(
rule_id="5.2.1(b)",
title="Stolen Base Attempts",
section="Baserunning",
content="When a runner attempts to steal a base, roll 2 dice.",
source_file="rules.md",
),
RuleDocument(
rule_id="3.1",
title="Pitching Overview",
section="Pitching",
content="The pitcher rolls for each at-bat using the pitching card.",
source_file="rules.md",
),
]
)
return repo
@pytest.fixture
def llm():
return FakeLLM()
@pytest.fixture
def conversations():
return FakeConversationStore()
@pytest.fixture
def issues():
return FakeIssueTracker()
@pytest.fixture
def service(rules_repo, llm, conversations, issues):
return ChatService(
rules=rules_repo,
llm=llm,
conversations=conversations,
issues=issues,
)
class TestChatServiceAnswerQuestion:
"""ChatService.answer_question orchestrates the full Q&A flow."""
async def test_returns_answer_with_cited_rules(self, service):
"""When rules match the question, the LLM is called and rules are cited."""
result = await service.answer_question(
message="How do I steal a base?",
user_id="user-1",
channel_id="chan-1",
)
assert "5.2.1(b)" in result.cited_rules
assert result.confidence == 0.9
assert result.needs_human is False
assert result.conversation_id # should be a non-empty string
assert result.message_id # should be a non-empty string
async def test_creates_conversation_and_messages(self, service, conversations):
"""The service should persist both user and assistant messages."""
result = await service.answer_question(
message="How do I steal?",
user_id="user-1",
channel_id="chan-1",
)
history = await conversations.get_conversation_history(result.conversation_id)
assert len(history) == 2
assert history[0]["role"] == "user"
assert history[1]["role"] == "assistant"
async def test_continues_existing_conversation(self, service, conversations):
"""Passing a conversation_id should reuse the existing conversation."""
result1 = await service.answer_question(
message="How do I steal?",
user_id="user-1",
channel_id="chan-1",
)
result2 = await service.answer_question(
message="What about pickoffs?",
user_id="user-1",
channel_id="chan-1",
conversation_id=result1.conversation_id,
parent_message_id=result1.message_id,
)
assert result2.conversation_id == result1.conversation_id
history = await conversations.get_conversation_history(result1.conversation_id)
assert len(history) == 4 # 2 user + 2 assistant
async def test_passes_conversation_history_to_llm(self, service, llm):
"""The LLM should receive conversation history for context."""
result1 = await service.answer_question(
message="How do I steal?",
user_id="user-1",
channel_id="chan-1",
)
await service.answer_question(
message="Follow-up question",
user_id="user-1",
channel_id="chan-1",
conversation_id=result1.conversation_id,
)
assert len(llm.calls) == 2
second_call = llm.calls[1]
assert second_call["history"] is not None
assert len(second_call["history"]) >= 2
async def test_searches_rules_with_user_question(self, service, rules_repo):
"""The service should search the rules repo with the user's question."""
await service.answer_question(
message="steal a base",
user_id="u",
channel_id="c",
)
# FakeLLM records what rules it received
# If "steal" and "base" matched, the steal rule should be in there
async def test_sets_parent_message_id(self, service):
"""The result should link the assistant message back to the user message."""
result = await service.answer_question(
message="question",
user_id="u",
channel_id="c",
)
assert result.parent_message_id is not None
class TestChatServiceIssueCreation:
"""When confidence is low or no rules match, a Gitea issue should be created."""
async def test_creates_issue_on_low_confidence(
self, rules_repo, conversations, issues
):
"""When the LLM returns low confidence, an issue is created."""
low_confidence_llm = FakeLLM(default_confidence=0.2)
service = ChatService(
rules=rules_repo,
llm=low_confidence_llm,
conversations=conversations,
issues=issues,
)
await service.answer_question(
message="steal question",
user_id="user-1",
channel_id="chan-1",
)
assert len(issues.issues) == 1
assert issues.issues[0]["question"] == "steal question"
async def test_creates_issue_when_needs_human(
self, rules_repo, conversations, issues
):
"""When LLM says needs_human, an issue is created regardless of confidence."""
llm = FakeLLM(no_rules_confidence=0.1)
service = ChatService(
rules=rules_repo,
llm=llm,
conversations=conversations,
issues=issues,
)
# Use a question that won't match any rules
await service.answer_question(
message="something completely unrelated xyz",
user_id="user-1",
channel_id="chan-1",
)
assert len(issues.issues) == 1
async def test_no_issue_on_high_confidence(self, service, issues):
"""High confidence answers should not create issues."""
await service.answer_question(
message="steal a base",
user_id="user-1",
channel_id="chan-1",
)
assert len(issues.issues) == 0
async def test_no_issue_tracker_configured(self, rules_repo, llm, conversations):
"""If no issue tracker is provided, low confidence should not crash."""
service = ChatService(
rules=rules_repo,
llm=llm,
conversations=conversations,
issues=None,
)
# Should not raise even with low confidence LLM
result = await service.answer_question(
message="steal a base",
user_id="user-1",
channel_id="chan-1",
)
assert result.response
class TestChatServiceErrorHandling:
"""Service should handle adapter failures gracefully."""
async def test_llm_error_propagates(self, rules_repo, conversations, issues):
"""If the LLM raises, the service should let it propagate."""
error_llm = FakeLLM(force_error=RuntimeError("LLM is down"))
service = ChatService(
rules=rules_repo,
llm=error_llm,
conversations=conversations,
issues=issues,
)
with pytest.raises(RuntimeError, match="LLM is down"):
await service.answer_question(
message="steal a base",
user_id="user-1",
channel_id="chan-1",
)
async def test_issue_creation_failure_does_not_crash(
self, rules_repo, conversations
):
"""If the issue tracker fails, the answer should still be returned."""
class FailingIssueTracker(FakeIssueTracker):
async def create_unanswered_issue(self, **kwargs) -> str:
raise RuntimeError("Gitea is down")
low_llm = FakeLLM(default_confidence=0.2)
service = ChatService(
rules=rules_repo,
llm=low_llm,
conversations=conversations,
issues=FailingIssueTracker(),
)
# Should return the answer even though issue creation failed
result = await service.answer_question(
message="steal a base",
user_id="user-1",
channel_id="chan-1",
)
assert result.response