Domain layer (zero framework imports): - domain/models.py: pure dataclasses (RuleDocument, RuleSearchResult, Conversation, ChatMessage, LLMResponse, ChatResult) - domain/ports.py: ABC interfaces (RuleRepository, LLMPort, ConversationStore, IssueTracker) - domain/services.py: ChatService orchestrates Q&A flow using only ports Outbound adapters (implement domain ports): - adapters/outbound/openrouter.py: OpenRouterLLM with persistent httpx client, robust JSON parsing, regex citation fallback - adapters/outbound/sqlite_convos.py: SQLiteConversationStore with async_sessionmaker, timezone-aware datetimes, cleanup support - adapters/outbound/gitea_issues.py: GiteaIssueTracker with markdown injection protection (fenced code blocks) - adapters/outbound/chroma_rules.py: ChromaRuleRepository with clamped similarity scores Inbound adapter: - adapters/inbound/api.py: thin FastAPI router with input validation (max_length constraints), proper HTTP status codes (503 for missing LLM) Configuration & wiring: - config/settings.py: Pydantic v2 SettingsConfigDict (no module-level singleton) - config/container.py: create_app() factory with lifespan-managed DI - main.py: minimal entry point Test infrastructure (90 tests, all passing): - tests/fakes/: in-memory implementations of all 4 ports - tests/domain/: 26 tests for models and ChatService - tests/adapters/: 64 tests for all adapters using fakes/mocks - No real API calls, no model downloads, no disk I/O in fast tests Also fixes: aiosqlite version constraint (>=0.19.0), adds hatch build targets for new package layout. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
257 lines
8.6 KiB
Python
257 lines
8.6 KiB
Python
"""Tests for ChatService — the core use case, tested entirely with fakes."""
|
|
|
|
import pytest
|
|
|
|
from domain.models import RuleDocument
|
|
from domain.services import ChatService
|
|
from tests.fakes import (
|
|
FakeRuleRepository,
|
|
FakeLLM,
|
|
FakeConversationStore,
|
|
FakeIssueTracker,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def rules_repo():
|
|
repo = FakeRuleRepository()
|
|
repo.add_documents(
|
|
[
|
|
RuleDocument(
|
|
rule_id="5.2.1(b)",
|
|
title="Stolen Base Attempts",
|
|
section="Baserunning",
|
|
content="When a runner attempts to steal a base, roll 2 dice.",
|
|
source_file="rules.md",
|
|
),
|
|
RuleDocument(
|
|
rule_id="3.1",
|
|
title="Pitching Overview",
|
|
section="Pitching",
|
|
content="The pitcher rolls for each at-bat using the pitching card.",
|
|
source_file="rules.md",
|
|
),
|
|
]
|
|
)
|
|
return repo
|
|
|
|
|
|
@pytest.fixture
|
|
def llm():
|
|
return FakeLLM()
|
|
|
|
|
|
@pytest.fixture
|
|
def conversations():
|
|
return FakeConversationStore()
|
|
|
|
|
|
@pytest.fixture
|
|
def issues():
|
|
return FakeIssueTracker()
|
|
|
|
|
|
@pytest.fixture
|
|
def service(rules_repo, llm, conversations, issues):
|
|
return ChatService(
|
|
rules=rules_repo,
|
|
llm=llm,
|
|
conversations=conversations,
|
|
issues=issues,
|
|
)
|
|
|
|
|
|
class TestChatServiceAnswerQuestion:
|
|
"""ChatService.answer_question orchestrates the full Q&A flow."""
|
|
|
|
async def test_returns_answer_with_cited_rules(self, service):
|
|
"""When rules match the question, the LLM is called and rules are cited."""
|
|
result = await service.answer_question(
|
|
message="How do I steal a base?",
|
|
user_id="user-1",
|
|
channel_id="chan-1",
|
|
)
|
|
assert "5.2.1(b)" in result.cited_rules
|
|
assert result.confidence == 0.9
|
|
assert result.needs_human is False
|
|
assert result.conversation_id # should be a non-empty string
|
|
assert result.message_id # should be a non-empty string
|
|
|
|
async def test_creates_conversation_and_messages(self, service, conversations):
|
|
"""The service should persist both user and assistant messages."""
|
|
result = await service.answer_question(
|
|
message="How do I steal?",
|
|
user_id="user-1",
|
|
channel_id="chan-1",
|
|
)
|
|
history = await conversations.get_conversation_history(result.conversation_id)
|
|
assert len(history) == 2
|
|
assert history[0]["role"] == "user"
|
|
assert history[1]["role"] == "assistant"
|
|
|
|
async def test_continues_existing_conversation(self, service, conversations):
|
|
"""Passing a conversation_id should reuse the existing conversation."""
|
|
result1 = await service.answer_question(
|
|
message="How do I steal?",
|
|
user_id="user-1",
|
|
channel_id="chan-1",
|
|
)
|
|
result2 = await service.answer_question(
|
|
message="What about pickoffs?",
|
|
user_id="user-1",
|
|
channel_id="chan-1",
|
|
conversation_id=result1.conversation_id,
|
|
parent_message_id=result1.message_id,
|
|
)
|
|
assert result2.conversation_id == result1.conversation_id
|
|
history = await conversations.get_conversation_history(result1.conversation_id)
|
|
assert len(history) == 4 # 2 user + 2 assistant
|
|
|
|
async def test_passes_conversation_history_to_llm(self, service, llm):
|
|
"""The LLM should receive conversation history for context."""
|
|
result1 = await service.answer_question(
|
|
message="How do I steal?",
|
|
user_id="user-1",
|
|
channel_id="chan-1",
|
|
)
|
|
await service.answer_question(
|
|
message="Follow-up question",
|
|
user_id="user-1",
|
|
channel_id="chan-1",
|
|
conversation_id=result1.conversation_id,
|
|
)
|
|
assert len(llm.calls) == 2
|
|
second_call = llm.calls[1]
|
|
assert second_call["history"] is not None
|
|
assert len(second_call["history"]) >= 2
|
|
|
|
async def test_searches_rules_with_user_question(self, service, rules_repo):
|
|
"""The service should search the rules repo with the user's question."""
|
|
await service.answer_question(
|
|
message="steal a base",
|
|
user_id="u",
|
|
channel_id="c",
|
|
)
|
|
# FakeLLM records what rules it received
|
|
# If "steal" and "base" matched, the steal rule should be in there
|
|
|
|
async def test_sets_parent_message_id(self, service):
|
|
"""The result should link the assistant message back to the user message."""
|
|
result = await service.answer_question(
|
|
message="question",
|
|
user_id="u",
|
|
channel_id="c",
|
|
)
|
|
assert result.parent_message_id is not None
|
|
|
|
|
|
class TestChatServiceIssueCreation:
|
|
"""When confidence is low or no rules match, a Gitea issue should be created."""
|
|
|
|
async def test_creates_issue_on_low_confidence(
|
|
self, rules_repo, conversations, issues
|
|
):
|
|
"""When the LLM returns low confidence, an issue is created."""
|
|
low_confidence_llm = FakeLLM(default_confidence=0.2)
|
|
service = ChatService(
|
|
rules=rules_repo,
|
|
llm=low_confidence_llm,
|
|
conversations=conversations,
|
|
issues=issues,
|
|
)
|
|
await service.answer_question(
|
|
message="steal question",
|
|
user_id="user-1",
|
|
channel_id="chan-1",
|
|
)
|
|
assert len(issues.issues) == 1
|
|
assert issues.issues[0]["question"] == "steal question"
|
|
|
|
async def test_creates_issue_when_needs_human(
|
|
self, rules_repo, conversations, issues
|
|
):
|
|
"""When LLM says needs_human, an issue is created regardless of confidence."""
|
|
llm = FakeLLM(no_rules_confidence=0.1)
|
|
service = ChatService(
|
|
rules=rules_repo,
|
|
llm=llm,
|
|
conversations=conversations,
|
|
issues=issues,
|
|
)
|
|
# Use a question that won't match any rules
|
|
await service.answer_question(
|
|
message="something completely unrelated xyz",
|
|
user_id="user-1",
|
|
channel_id="chan-1",
|
|
)
|
|
assert len(issues.issues) == 1
|
|
|
|
async def test_no_issue_on_high_confidence(self, service, issues):
|
|
"""High confidence answers should not create issues."""
|
|
await service.answer_question(
|
|
message="steal a base",
|
|
user_id="user-1",
|
|
channel_id="chan-1",
|
|
)
|
|
assert len(issues.issues) == 0
|
|
|
|
async def test_no_issue_tracker_configured(self, rules_repo, llm, conversations):
|
|
"""If no issue tracker is provided, low confidence should not crash."""
|
|
service = ChatService(
|
|
rules=rules_repo,
|
|
llm=llm,
|
|
conversations=conversations,
|
|
issues=None,
|
|
)
|
|
# Should not raise even with low confidence LLM
|
|
result = await service.answer_question(
|
|
message="steal a base",
|
|
user_id="user-1",
|
|
channel_id="chan-1",
|
|
)
|
|
assert result.response
|
|
|
|
|
|
class TestChatServiceErrorHandling:
|
|
"""Service should handle adapter failures gracefully."""
|
|
|
|
async def test_llm_error_propagates(self, rules_repo, conversations, issues):
|
|
"""If the LLM raises, the service should let it propagate."""
|
|
error_llm = FakeLLM(force_error=RuntimeError("LLM is down"))
|
|
service = ChatService(
|
|
rules=rules_repo,
|
|
llm=error_llm,
|
|
conversations=conversations,
|
|
issues=issues,
|
|
)
|
|
with pytest.raises(RuntimeError, match="LLM is down"):
|
|
await service.answer_question(
|
|
message="steal a base",
|
|
user_id="user-1",
|
|
channel_id="chan-1",
|
|
)
|
|
|
|
async def test_issue_creation_failure_does_not_crash(
|
|
self, rules_repo, conversations
|
|
):
|
|
"""If the issue tracker fails, the answer should still be returned."""
|
|
|
|
class FailingIssueTracker(FakeIssueTracker):
|
|
async def create_unanswered_issue(self, **kwargs) -> str:
|
|
raise RuntimeError("Gitea is down")
|
|
|
|
low_llm = FakeLLM(default_confidence=0.2)
|
|
service = ChatService(
|
|
rules=rules_repo,
|
|
llm=low_llm,
|
|
conversations=conversations,
|
|
issues=FailingIssueTracker(),
|
|
)
|
|
# Should return the answer even though issue creation failed
|
|
result = await service.answer_question(
|
|
message="steal a base",
|
|
user_id="user-1",
|
|
channel_id="chan-1",
|
|
)
|
|
assert result.response
|