"""Tests for the FastAPI inbound adapter (adapters/inbound/api.py). Strategy -------- We build a minimal FastAPI app in each fixture by wiring fakes into app.state, then drive it with httpx.AsyncClient using ASGITransport so no real HTTP server is needed. This means: - No real ChromaDB, SQLite, OpenRouter, or Gitea calls. - Tests are fast, deterministic, and isolated. - The test app mirrors exactly what the production container does — the only difference is which objects sit in app.state. What is tested -------------- - POST /chat returns 200 and a well-formed ChatResponse for a normal message. - POST /chat stores the conversation and returns a stable conversation_id on a second call with the same conversation_id (conversation continuation). - GET /health returns {"status": "healthy", ...} with rule counts. - GET /stats returns a knowledge_base sub-dict and a config sub-dict. - POST /chat with missing required fields returns HTTP 422 (Unprocessable Entity). - POST /chat with a message that exceeds 4000 characters returns HTTP 422. - POST /chat with a user_id that exceeds 64 characters returns HTTP 422. - POST /chat when ChatService.answer_question raises returns HTTP 500. - RateLimiter allows requests within the window and blocks once the limit is hit. - RateLimiter resets after the window expires so the caller can send again. - POST /chat returns 429 when the per-user rate limit is exceeded. """ from __future__ import annotations from unittest.mock import patch import pytest import httpx from fastapi import FastAPI from httpx import ASGITransport from domain.models import RuleDocument from domain.services import ChatService from adapters.inbound.api import router, RateLimiter from tests.fakes import ( FakeRuleRepository, FakeLLM, FakeConversationStore, FakeIssueTracker, ) # --------------------------------------------------------------------------- # Test app factory # --------------------------------------------------------------------------- def make_test_app( *, rules: FakeRuleRepository | None = None, llm: FakeLLM | None = None, conversations: FakeConversationStore | None = None, issues: FakeIssueTracker | None = None, top_k_rules: int = 5, api_secret: str = "", ) -> FastAPI: """Build a minimal FastAPI app with fakes wired into app.state. The factory mirrors what config/container.py does in production, but uses in-memory fakes so no external services are needed. Each test that calls this gets a fresh, isolated set of fakes unless shared fixtures are passed. """ _rules = rules or FakeRuleRepository() _llm = llm or FakeLLM() _conversations = conversations or FakeConversationStore() _issues = issues or FakeIssueTracker() service = ChatService( rules=_rules, llm=_llm, conversations=_conversations, issues=_issues, top_k_rules=top_k_rules, ) app = FastAPI() app.include_router(router) app.state.chat_service = service app.state.rule_repository = _rules app.state.api_secret = api_secret app.state.config_snapshot = { "openrouter_model": "fake-model", "top_k_rules": top_k_rules, "embedding_model": "fake-embeddings", } return app # --------------------------------------------------------------------------- # Shared fixture: an async client backed by the test app # --------------------------------------------------------------------------- @pytest.fixture() async def client() -> httpx.AsyncClient: """Return an AsyncClient wired to a fresh test app. Each test function gets its own completely isolated set of fakes so that state from one test cannot leak into another. """ app = make_test_app() async with httpx.AsyncClient( transport=ASGITransport(app=app), base_url="http://test" ) as ac: yield ac # --------------------------------------------------------------------------- # POST /chat — successful response # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_chat_returns_200_with_valid_payload(client: httpx.AsyncClient): """A well-formed POST /chat request must return HTTP 200 and a response body that maps one-to-one with the ChatResponse Pydantic model. We verify every field so a structural change to ChatResult or ChatResponse is caught immediately rather than silently producing a wrong value. """ payload = { "message": "How many strikes to strike out?", "user_id": "user-001", "channel_id": "channel-001", } resp = await client.post("/chat", json=payload) assert resp.status_code == 200 body = resp.json() assert isinstance(body["response"], str) assert len(body["response"]) > 0 assert isinstance(body["conversation_id"], str) assert isinstance(body["message_id"], str) assert isinstance(body["cited_rules"], list) assert isinstance(body["confidence"], float) assert isinstance(body["needs_human"], bool) @pytest.mark.asyncio async def test_chat_uses_rules_when_available(): """When the FakeRuleRepository has documents matching the query, the FakeLLM receives them and returns a high-confidence answer with cited_rules populated. This exercises the full ChatService flow through the inbound adapter. """ rules_repo = FakeRuleRepository() rules_repo.add_documents( [ RuleDocument( rule_id="1.1", title="Batting Order", section="Batting", content="A batter gets three strikes before striking out.", source_file="rules.pdf", ) ] ) app = make_test_app(rules=rules_repo) async with httpx.AsyncClient( transport=ASGITransport(app=app), base_url="http://test" ) as ac: resp = await ac.post( "/chat", json={ "message": "How many strikes before a batter strikes out?", "user_id": "user-abc", "channel_id": "ch-xyz", }, ) assert resp.status_code == 200 body = resp.json() # FakeLLM returns cited_rules when rules are found assert len(body["cited_rules"]) > 0 assert body["confidence"] > 0.5 # --------------------------------------------------------------------------- # POST /chat — conversation continuation # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_chat_continues_existing_conversation(): """Supplying conversation_id in the request should resume the same conversation rather than creating a new one. We make two requests: the first creates a conversation and returns its ID; the second passes that ID back and must return the same conversation_id. This ensures the FakeConversationStore (and real SQLite adapter) behave consistently from the router's perspective. """ conversations = FakeConversationStore() app = make_test_app(conversations=conversations) async with httpx.AsyncClient( transport=ASGITransport(app=app), base_url="http://test" ) as ac: # First turn — no conversation_id resp1 = await ac.post( "/chat", json={ "message": "First question", "user_id": "user-42", "channel_id": "ch-1", }, ) assert resp1.status_code == 200 conv_id = resp1.json()["conversation_id"] # Second turn — same conversation resp2 = await ac.post( "/chat", json={ "message": "Follow-up question", "user_id": "user-42", "channel_id": "ch-1", "conversation_id": conv_id, }, ) assert resp2.status_code == 200 assert resp2.json()["conversation_id"] == conv_id # --------------------------------------------------------------------------- # GET /health # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_health_returns_healthy_status(client: httpx.AsyncClient): """GET /health must return {"status": "healthy", ...} with integer rule count and a sections dict. The FakeRuleRepository starts empty so rules_count should be 0. """ resp = await client.get("/health") assert resp.status_code == 200 body = resp.json() assert body["status"] == "healthy" assert isinstance(body["rules_count"], int) assert isinstance(body["sections"], dict) @pytest.mark.asyncio async def test_health_reflects_loaded_rules(): """After adding documents to FakeRuleRepository, GET /health must show the updated rule count. This confirms the router reads a live reference to the repository, not a snapshot taken at startup. """ rules_repo = FakeRuleRepository() rules_repo.add_documents( [ RuleDocument( rule_id="2.1", title="Pitching", section="Pitching", content="The pitcher throws the ball.", source_file="rules.pdf", ) ] ) app = make_test_app(rules=rules_repo) async with httpx.AsyncClient( transport=ASGITransport(app=app), base_url="http://test" ) as ac: resp = await ac.get("/health") assert resp.status_code == 200 assert resp.json()["rules_count"] == 1 # --------------------------------------------------------------------------- # GET /stats # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_stats_returns_knowledge_base_and_config(client: httpx.AsyncClient): """GET /stats must include a knowledge_base sub-dict (from RuleRepository.get_stats) and a config sub-dict (from app.state.config_snapshot set by the container). This ensures the stats endpoint exposes enough information for an operator to confirm what model and retrieval settings are active. """ resp = await client.get("/stats") assert resp.status_code == 200 body = resp.json() assert "knowledge_base" in body assert "config" in body assert "total_rules" in body["knowledge_base"] # --------------------------------------------------------------------------- # POST /chat — validation errors (HTTP 422) # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_chat_missing_message_returns_422(client: httpx.AsyncClient): """Omitting the required 'message' field must trigger Pydantic validation and return HTTP 422 Unprocessable Entity with a detail array describing the error. We do NOT want a 500 — a missing field is a client error, not a server error. """ resp = await client.post("/chat", json={"user_id": "u1", "channel_id": "ch1"}) assert resp.status_code == 422 @pytest.mark.asyncio async def test_chat_missing_user_id_returns_422(client: httpx.AsyncClient): """Omitting 'user_id' must return HTTP 422.""" resp = await client.post("/chat", json={"message": "Hello", "channel_id": "ch1"}) assert resp.status_code == 422 @pytest.mark.asyncio async def test_chat_missing_channel_id_returns_422(client: httpx.AsyncClient): """Omitting 'channel_id' must return HTTP 422.""" resp = await client.post("/chat", json={"message": "Hello", "user_id": "u1"}) assert resp.status_code == 422 @pytest.mark.asyncio async def test_chat_message_too_long_returns_422(client: httpx.AsyncClient): """A message that exceeds 4000 characters must fail field-level validation and return HTTP 422 rather than passing to the service layer. The max_length constraint on ChatRequest.message enforces this. """ long_message = "x" * 4001 resp = await client.post( "/chat", json={"message": long_message, "user_id": "u1", "channel_id": "ch1"}, ) assert resp.status_code == 422 @pytest.mark.asyncio async def test_chat_user_id_too_long_returns_422(client: httpx.AsyncClient): """A user_id that exceeds 64 characters must return HTTP 422. Discord snowflakes are at most 20 digits; 64 chars is a generous cap that still prevents runaway strings from reaching the database layer. """ long_user_id = "u" * 65 resp = await client.post( "/chat", json={"message": "Hello", "user_id": long_user_id, "channel_id": "ch1"}, ) assert resp.status_code == 422 @pytest.mark.asyncio async def test_chat_channel_id_too_long_returns_422(client: httpx.AsyncClient): """A channel_id that exceeds 64 characters must return HTTP 422.""" long_channel_id = "c" * 65 resp = await client.post( "/chat", json={"message": "Hello", "user_id": "u1", "channel_id": long_channel_id}, ) assert resp.status_code == 422 @pytest.mark.asyncio async def test_chat_empty_message_returns_422(client: httpx.AsyncClient): """An empty string for 'message' must fail min_length=1 and return HTTP 422. We never want an empty string propagated to the LLM — it would produce a confusing response and waste tokens. """ resp = await client.post( "/chat", json={"message": "", "user_id": "u1", "channel_id": "ch1"} ) assert resp.status_code == 422 # --------------------------------------------------------------------------- # POST /chat — service-layer exception bubbles up as 500 # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_chat_service_exception_returns_500(): """When ChatService.answer_question raises an unexpected exception the router must catch it and return HTTP 500, not let the exception propagate and crash the server process. We use FakeLLM(force_error=...) to inject the failure deterministically. """ broken_llm = FakeLLM(force_error=RuntimeError("LLM exploded")) app = make_test_app(llm=broken_llm) async with httpx.AsyncClient( transport=ASGITransport(app=app), base_url="http://test" ) as ac: resp = await ac.post( "/chat", json={"message": "Hello", "user_id": "u1", "channel_id": "ch1"}, ) assert resp.status_code == 500 assert "LLM exploded" in resp.json()["detail"] # --------------------------------------------------------------------------- # POST /chat — parent_message_id thread reply # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_chat_with_parent_message_id_returns_200(client: httpx.AsyncClient): """Supplying the optional parent_message_id must not cause an error. The field passes through to ChatService and ends up in the conversation store. We just assert a 200 here — the service-layer tests cover the parent_id wiring in more detail. """ resp = await client.post( "/chat", json={ "message": "Thread reply", "user_id": "u1", "channel_id": "ch1", "parent_message_id": "some-parent-uuid", }, ) assert resp.status_code == 200 body = resp.json() # The response's parent_message_id is the user turn message id, # not the one we passed in — that's the service's threading model. assert body["parent_message_id"] is not None # --------------------------------------------------------------------------- # API secret authentication # --------------------------------------------------------------------------- _CHAT_PAYLOAD = {"message": "Test question", "user_id": "u1", "channel_id": "ch1"} @pytest.mark.asyncio async def test_chat_no_secret_configured_allows_any_request(): """When api_secret is empty (the default for local dev), POST /chat must succeed without any X-API-Secret header. This preserves the existing open-access behaviour so developers can run the service locally without configuring a secret. """ app = make_test_app(api_secret="") async with httpx.AsyncClient( transport=ASGITransport(app=app), base_url="http://test" ) as ac: resp = await ac.post("/chat", json=_CHAT_PAYLOAD) assert resp.status_code == 200 @pytest.mark.asyncio async def test_chat_missing_secret_header_returns_401(): """When api_secret is configured, POST /chat without X-API-Secret must return HTTP 401, preventing unauthenticated access to the LLM endpoint. """ app = make_test_app(api_secret="supersecret") async with httpx.AsyncClient( transport=ASGITransport(app=app), base_url="http://test" ) as ac: resp = await ac.post("/chat", json=_CHAT_PAYLOAD) assert resp.status_code == 401 @pytest.mark.asyncio async def test_chat_wrong_secret_header_returns_401(): """A request with an incorrect X-API-Secret value must return HTTP 401. This guards against callers who know a header is required but are guessing or have an outdated secret. """ app = make_test_app(api_secret="supersecret") async with httpx.AsyncClient( transport=ASGITransport(app=app), base_url="http://test" ) as ac: resp = await ac.post( "/chat", json=_CHAT_PAYLOAD, headers={"X-API-Secret": "wrongvalue"} ) assert resp.status_code == 401 @pytest.mark.asyncio async def test_chat_correct_secret_header_returns_200(): """A request with the correct X-API-Secret header must succeed and return HTTP 200 when api_secret is configured. """ app = make_test_app(api_secret="supersecret") async with httpx.AsyncClient( transport=ASGITransport(app=app), base_url="http://test" ) as ac: resp = await ac.post( "/chat", json=_CHAT_PAYLOAD, headers={"X-API-Secret": "supersecret"} ) assert resp.status_code == 200 @pytest.mark.asyncio async def test_health_always_public(): """GET /health must return 200 regardless of whether api_secret is set. Health checks are used by monitoring systems that do not hold application secrets; requiring auth there would break uptime probes. """ app = make_test_app(api_secret="supersecret") async with httpx.AsyncClient( transport=ASGITransport(app=app), base_url="http://test" ) as ac: resp = await ac.get("/health") assert resp.status_code == 200 @pytest.mark.asyncio async def test_stats_missing_secret_header_returns_401(): """GET /stats without X-API-Secret must return HTTP 401 when a secret is configured. The stats endpoint exposes configuration details (model names, retrieval settings) that should be restricted to authenticated callers. """ app = make_test_app(api_secret="supersecret") async with httpx.AsyncClient( transport=ASGITransport(app=app), base_url="http://test" ) as ac: resp = await ac.get("/stats") assert resp.status_code == 401 # --------------------------------------------------------------------------- # RateLimiter unit tests # --------------------------------------------------------------------------- def test_rate_limiter_allows_requests_within_limit(): """Requests below max_requests within the window must all return True. We create a limiter with max_requests=3 and verify that three consecutive calls for the same user are all permitted. """ limiter = RateLimiter(max_requests=3, window_seconds=60.0) assert limiter.check("user-a") is True assert limiter.check("user-a") is True assert limiter.check("user-a") is True def test_rate_limiter_blocks_when_limit_exceeded(): """The (max_requests + 1)-th call within the window must return False. This confirms the sliding-window boundary is enforced correctly: once a user has consumed all allowed slots, further requests are rejected until the window advances. """ limiter = RateLimiter(max_requests=3, window_seconds=60.0) for _ in range(3): limiter.check("user-b") assert limiter.check("user-b") is False def test_rate_limiter_resets_after_window_expires(): """After the window has fully elapsed, a previously rate-limited user must be allowed to send again. We use unittest.mock.patch to freeze time.monotonic so the test runs instantly: first we consume the quota at t=0, then advance the clock past the window boundary and confirm the limiter grants the next request. """ limiter = RateLimiter(max_requests=2, window_seconds=10.0) with patch("adapters.inbound.api.time") as mock_time: # All requests happen at t=0. mock_time.monotonic.return_value = 0.0 limiter.check("user-c") limiter.check("user-c") assert limiter.check("user-c") is False # quota exhausted at t=0 # Advance time past the full window so all timestamps are stale. mock_time.monotonic.return_value = 11.0 assert limiter.check("user-c") is True # window reset; request allowed def test_rate_limiter_isolates_different_users(): """Rate limiting must be per-user: consuming user-x's quota must not affect user-y's available requests. This covers the dict-keying logic — a bug that shares state across users would cause false 429s for innocent callers. """ limiter = RateLimiter(max_requests=1, window_seconds=60.0) limiter.check("user-x") # exhausts user-x's single slot assert limiter.check("user-x") is False # user-x is blocked assert limiter.check("user-y") is True # user-y has their own fresh bucket # --------------------------------------------------------------------------- # POST /chat — rate limit integration (HTTP 429) # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_chat_returns_429_when_rate_limit_exceeded(): """POST /chat must return HTTP 429 once the per-user rate limit is hit. We patch the module-level _rate_limiter so we can exercise the integration between the FastAPI dependency and the limiter without waiting for real time to pass. The first call returns 200; after patching check() to return False, the second call must return 429. """ import adapters.inbound.api as api_module # Use a tight limiter (1 request per 60 s) injected into the module so # both the app and the dependency share the same instance. tight_limiter = RateLimiter(max_requests=1, window_seconds=60.0) original = api_module._rate_limiter api_module._rate_limiter = tight_limiter payload = {"message": "Hello", "user_id": "rl-user", "channel_id": "ch1"} app = make_test_app() try: async with httpx.AsyncClient( transport=ASGITransport(app=app), base_url="http://test" ) as ac: resp1 = await ac.post("/chat", json=payload) assert resp1.status_code == 200 # first request is within limit resp2 = await ac.post("/chat", json=payload) assert resp2.status_code == 429 # second request is blocked assert "Rate limit" in resp2.json()["detail"] finally: api_module._rate_limiter = original # restore to avoid polluting other tests