From a34aec06f11e90927d4a911d030380d89d3a9b90 Mon Sep 17 00:00:00 2001
From: Cal Corum <calcorum@users.noreply.github.com>
Date: Fri, 19 Dec 2025 00:18:12 -0600
Subject: [PATCH] Initial commit: Voice server with Piper TTS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A local HTTP service that accepts text via POST and speaks it through
system speakers using Piper TTS neural voice synthesis.

Features:
- POST /notify - Queue text for TTS playback
- GET /health - Health check with TTS/audio/queue status
- GET /voices - List installed voice models
- Async queue processing (no overlapping audio)
- Non-blocking audio via sounddevice
- 73 tests covering API contract

Tech stack:
- FastAPI + Uvicorn
- Piper TTS (neural voices, offline)
- sounddevice (PortAudio)
- Pydantic for validation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .env.example         |   22 +
 .gitignore           |   58 ++
 PRD.md               | 2147 ++++++++++++++++++++++++++++++++++++++++++
 PROJECT_ROADMAP.json | 1012 ++++++++++++++++++++
 README.md            |   41 +
 app/__init__.py      |    0
 app/audio_player.py  |  192 ++++
 app/config.py        |   98 ++
 app/main.py          |  140 +++
 app/models.py        |  162 ++++
 app/queue_manager.py |  236 +++++
 app/routes.py        |  198 ++++
 app/tts_engine.py    |  287 ++++++
 pyproject.toml       |   69 ++
 tests/__init__.py    |    0
 tests/test_api.py    |  324 +++++++
 tests/test_config.py |  300 ++++++
 tests/test_models.py |  388 ++++++++
 18 files changed, 5674 insertions(+)
 create mode 100644 .env.example
 create mode 100644 .gitignore
 create mode 100644 PRD.md
 create mode 100644 PROJECT_ROADMAP.json
 create mode 100644 README.md
 create mode 100644 app/__init__.py
 create mode 100644 app/audio_player.py
 create mode 100644 app/config.py
 create mode 100644 app/main.py
 create mode 100644 app/models.py
 create mode 100644 app/queue_manager.py
 create mode 100644 app/routes.py
 create mode 100644 app/tts_engine.py
 create mode 100644 pyproject.toml
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_api.py
 create mode 100644 tests/test_config.py
 create mode 100644 tests/test_models.py

diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..49c70cc
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,22 @@
+# Voice Server Configuration
+# Copy this file to .env and modify as needed
+
+# Server Settings
+HOST=0.0.0.0
+PORT=8888
+
+# TTS Settings
+MODEL_DIR=./models
+DEFAULT_VOICE=en_US-lessac-medium
+DEFAULT_RATE=170
+
+# Queue Settings
+QUEUE_MAX_SIZE=50
+REQUEST_TIMEOUT_SECONDS=60
+
+# Logging
+LOG_LEVEL=INFO
+LOG_FILE=voice-server.log
+
+# Debug (disable TTS for testing)
+# VOICE_ENABLED=true
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e175b07
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,58 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+.venv/
+venv/
+ENV/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# Environment
+.env
+.env.local
+
+# Logs
+*.log
+logs/
+
+# Voice models (large files)
+models/*.onnx
+models/*.onnx.json
+
+# Testing
+.coverage
+htmlcov/
+.pytest_cache/
+.tox/
+
+# OS
+.DS_Store
+Thumbs.db
+
+# uv
+uv.lock
diff --git a/PRD.md b/PRD.md
new file mode 100644
index 0000000..5c4c0f6
--- /dev/null
+++ b/PRD.md
@@ -0,0 +1,2147 @@
+# Product Requirements Document: Local Voice Server
+
+**Version:** 1.0
+**Date:** 2025-12-18
+**Author:** Atlas (Principal Software Architect)
+**Project:** Local HTTP Voice Server for Text-to-Speech
+
+---
+
+## Table of Contents
+
+1. [Executive Summary](#executive-summary)
+2. [Goals and Non-Goals](#goals-and-non-goals)
+3. [Technical Requirements](#technical-requirements)
+4. [System Architecture](#system-architecture)
+5. [API Specification](#api-specification)
+6. [TTS Engine Analysis](#tts-engine-analysis)
+7. [Web Framework Selection](#web-framework-selection)
+8. [Audio Playback Strategy](#audio-playback-strategy)
+9. [Error Handling Strategy](#error-handling-strategy)
+10. [Implementation Checklist](#implementation-checklist)
+11. [Testing Strategy](#testing-strategy)
+12. [Future Considerations](#future-considerations)
+
+---
+
+## Executive Summary
+
+### Project Overview
+
+This project delivers a local HTTP service that accepts POST requests containing text strings and converts them to speech through the computer's speakers. The service will run locally on Linux (Nobara/Fedora 42), providing fast, offline text-to-speech capabilities without requiring external API calls or internet connectivity.
+
+### Success Metrics
+
+- **Response Time:** TTS conversion and playback initiation within 200ms for short texts (< 100 characters)
+- **Reliability:** 99.9% successful request handling under normal operating conditions
+- **Concurrency:** Support for at least 5 concurrent TTS requests with proper queuing
+- **Audio Quality:** Clear, intelligible speech output comparable to Google TTS quality
+- **Startup Time:** Server ready to accept requests within 2 seconds of launch
+
+### Technical Stack
+
+| Component | Technology | Justification |
+|-----------|-----------|---------------|
+| Web Framework | FastAPI | Async support, high performance (15k-20k req/s), automatic API documentation |
+| TTS Engine | Piper TTS | Neural voice quality, offline, optimized for local inference, ONNX-based |
+| Audio Playback | sounddevice | Cross-platform, Pythonic API, excellent NumPy integration, non-blocking playback |
+| Package Manager | uv | Fast Python package management (user preference) |
+| ASGI Server | Uvicorn | High-performance ASGI server, native FastAPI integration |
+| Async Runtime | asyncio | Built-in Python async support for concurrent request handling |
+
+### Timeline Estimate
+
+- **Phase 1 - Core Implementation:** 2-3 days (basic HTTP server + TTS integration)
+- **Phase 2 - Error Handling & Testing:** 1-2 days (comprehensive error handling, unit tests)
+- **Phase 3 - Concurrency & Queue Management:** 1-2 days (async queue, concurrent playback)
+- **Total Estimated Time:** 4-7 days for production-ready v1.0
+
+### Resource Requirements
+
+- **Development:** 1 full-stack Python developer with async programming experience
+- **Testing:** Access to Linux environment (Nobara/Fedora 42) with audio hardware
+- **Infrastructure:** Local development machine with 2+ CPU cores, 4GB+ RAM
+
+---
+
+## Goals and Non-Goals
+
+### Goals
+
+**Primary Goals:**
+1. Create a local HTTP service that accepts text via POST requests
+2. Convert text to speech using high-quality offline TTS
+3. Play audio through system speakers with minimal latency
+4. Support concurrent requests with proper queue management
+5. Provide comprehensive error handling and logging
+6. Maintain zero external dependencies (fully offline capable)
+
+**Secondary Goals:**
+1. Automatic API documentation via FastAPI's built-in OpenAPI support
+2. Configurable TTS parameters (voice, speed, volume) via request parameters
+3. Health check endpoint for service monitoring
+4. Graceful handling of long-running text conversions
+5. Support for multiple voice models
+
+### Non-Goals
+
+**Explicitly Out of Scope:**
+1. Cloud-based or external API integration
+2. Speech-to-text (STT) capabilities
+3. Audio file storage or retrieval
+4. User authentication or authorization
+5. Rate limiting or quota management
+6. Multi-language UI or web interface
+7. Real-time streaming audio synthesis
+8. Mobile app integration
+9. Persistent audio history or logging
+10. Advanced audio effects (reverb, pitch shifting, etc.)
+
+---
+
+## Technical Requirements
+
+### Functional Requirements
+
+#### FR1: HTTP Server
+- **FR1.1:** Server SHALL listen on configurable host and port (default: `0.0.0.0:8888`)
+- **FR1.2:** Server SHALL accept POST requests to `/notify` endpoint
+- **FR1.3:** Server SHALL accept JSON payload with `message` field containing text
+- **FR1.4:** Server SHALL return HTTP 200 with success confirmation
+- **FR1.5:** Server SHALL support CORS for local development
+
+#### FR2: Text-to-Speech Conversion
+- **FR2.1:** System SHALL convert text strings to audio using Piper TTS
+- **FR2.2:** System SHALL support configurable voice models via request parameters
+- **FR2.3:** System SHALL support adjustable speech rate (50-400 words per minute)
+- **FR2.4:** System SHALL handle text inputs from 1 to 10,000 characters
+- **FR2.5:** System SHALL use default voice if not specified in request
+
+#### FR3: Audio Playback
+- **FR3.1:** System SHALL play generated audio through default system audio output
+- **FR3.2:** System SHALL support non-blocking audio playback
+- **FR3.3:** System SHALL queue concurrent requests in FIFO order
+- **FR3.4:** System SHALL allow configurable maximum queue size (default: 50)
+- **FR3.5:** System SHALL provide feedback when queue is full
+
+#### FR4: Configuration
+- **FR4.1:** System SHALL support configuration via environment variables
+- **FR4.2:** System SHALL support configuration via command-line arguments
+- **FR4.3:** System SHALL provide sensible defaults for all configuration values
+- **FR4.4:** System SHALL validate configuration at startup
+
+#### FR5: Error Handling
+- **FR5.1:** System SHALL return appropriate HTTP error codes for failures
+- **FR5.2:** System SHALL log all errors with timestamps and context
+- **FR5.3:** System SHALL continue operating after non-fatal errors
+- **FR5.4:** System SHALL gracefully handle TTS engine failures
+- **FR5.5:** System SHALL provide detailed error messages in responses
+
+### Non-Functional Requirements
+
+#### NFR1: Performance
+- **NFR1.1:** API response time SHALL be < 50ms (excluding TTS processing)
+- **NFR1.2:** TTS conversion SHALL complete in < 2 seconds for 500 character texts
+- **NFR1.3:** System SHALL handle 20+ requests per second without degradation
+- **NFR1.4:** Memory usage SHALL remain < 500MB under normal load
+- **NFR1.5:** CPU usage SHALL average < 30% during active TTS processing
+
+#### NFR2: Reliability
+- **NFR2.1:** System SHALL maintain 99.9% uptime during operation
+- **NFR2.2:** System SHALL recover from audio device disconnections
+- **NFR2.3:** System SHALL handle Out-of-Memory conditions gracefully
+- **NFR2.4:** System SHALL log all critical errors for debugging
+
+#### NFR3: Maintainability
+- **NFR3.1:** Code SHALL maintain > 80% test coverage
+- **NFR3.2:** All functions SHALL include docstrings with type hints
+- **NFR3.3:** Code SHALL follow PEP 8 style guidelines
+- **NFR3.4:** Dependencies SHALL be pinned to specific versions
+- **NFR3.5:** README SHALL provide clear setup and usage instructions
+
+#### NFR4: Security
+- **NFR4.1:** System SHALL sanitize all text inputs to prevent injection attacks
+- **NFR4.2:** System SHALL limit request payload size to 1MB
+- **NFR4.3:** System SHALL not expose internal stack traces in API responses
+- **NFR4.4:** System SHALL log all incoming requests for audit purposes
+
+#### NFR5: Compatibility
+- **NFR5.1:** System SHALL run on Linux (Nobara/Fedora 42)
+- **NFR5.2:** System SHALL support Python 3.9+
+- **NFR5.3:** System SHALL work with standard ALSA/PulseAudio setups
+- **NFR5.4:** System SHALL be deployable as a systemd service
+
+---
+
+## System Architecture
+
+### High-Level Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        Client Applications                       │
+│              (AI Agents, Scripts, Other Services)               │
+└────────────────────────────┬────────────────────────────────────┘
+                             │ HTTP POST /notify
+                             │ JSON: {"message": "text"}
+                             ▼
+┌─────────────────────────────────────────────────────────────────┐
+│                     FastAPI Web Server                          │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐        │
+│  │   /notify    │  │   /health    │  │    /docs     │        │
+│  │  endpoint    │  │  endpoint    │  │  (Swagger)   │        │
+│  └──────┬───────┘  └──────────────┘  └──────────────┘        │
+│         │                                                       │
+│         │ Validates & Enqueues                                 │
+│         ▼                                                       │
+│  ┌──────────────────────────────────────────────────┐         │
+│  │          Async Request Queue                     │         │
+│  │  (asyncio.Queue with max size limit)            │         │
+│  └──────────────────┬───────────────────────────────┘         │
+└────────────────────┬┼───────────────────────────────────────────┘
+                     ││
+                     ││ Background Task Processing
+                     ▼▼
+┌─────────────────────────────────────────────────────────────────┐
+│                    TTS Processing Layer                         │
+│  ┌────────────────────────────────────────────────────┐        │
+│  │              Piper TTS Engine                      │        │
+│  │  ┌──────────────┐  ┌──────────────┐               │        │
+│  │  │ Voice Models │  │ ONNX Runtime │               │        │
+│  │  │  (.onnx +    │  │  Inference   │               │        │
+│  │  │   .json)     │  │    Engine    │               │        │
+│  │  └──────────────┘  └──────────────┘               │        │
+│  └─────────────────────────┬──────────────────────────┘        │
+│                            │ Generate WAV                       │
+│                            ▼                                    │
+│  ┌────────────────────────────────────────────────────┐        │
+│  │          In-Memory Audio Buffer                    │        │
+│  │        (NumPy array / bytes)                       │        │
+│  └─────────────────────────┬──────────────────────────┘        │
+└────────────────────────────┼───────────────────────────────────┘
+                             │
+                             ▼
+┌─────────────────────────────────────────────────────────────────┐
+│                  Audio Playback Layer                           │
+│  ┌────────────────────────────────────────────────────┐        │
+│  │              PyAudio Stream Manager                │        │
+│  │  - Callback-based playback                         │        │
+│  │  - Non-blocking operation                          │        │
+│  │  - Stream lifecycle management                     │        │
+│  └─────────────────────────┬──────────────────────────┘        │
+│                            │                                    │
+│                            ▼                                    │
+│  ┌────────────────────────────────────────────────────┐        │
+│  │         System Audio Output (ALSA/PulseAudio)     │        │
+│  └────────────────────────────────────────────────────┘        │
+└─────────────────────────────────────────────────────────────────┘
+                             │
+                             ▼
+                    🔊 Computer Speakers
+```
+
+### Component Descriptions
+
+#### 1. FastAPI Web Server
+- **Responsibilities:**
+  - Accept and validate HTTP POST requests
+  - Provide automatic OpenAPI documentation
+  - Handle CORS configuration
+  - Route requests to appropriate handlers
+  - Return HTTP responses with appropriate status codes
+
+- **Dependencies:**
+  - FastAPI framework
+  - Uvicorn ASGI server
+  - Pydantic for request/response validation
+
+#### 2. Async Request Queue
+- **Responsibilities:**
+  - Queue incoming TTS requests in FIFO order
+  - Prevent queue overflow with configurable max size
+  - Enable asynchronous processing without blocking HTTP responses
+  - Provide queue status information
+
+- **Implementation:**
+  - `asyncio.Queue` for async-safe queuing
+  - Background task workers to process queue
+  - Queue metrics (size, processed count, errors)
+
+#### 3. TTS Processing Layer
+- **Responsibilities:**
+  - Load and manage Piper TTS voice models
+  - Convert text to audio waveforms
+  - Handle voice model selection
+  - Configure TTS parameters (rate, pitch, volume)
+  - Generate in-memory audio buffers
+
+- **Implementation:**
+  - Piper TTS Python bindings
+  - ONNX Runtime for model inference
+  - Voice model caching for performance
+  - Error handling for model loading failures
+
+#### 4. Audio Playback Layer
+- **Responsibilities:**
+  - Initialize audio output streams
+  - Play audio buffers through system speakers
+  - Support non-blocking playback
+  - Handle audio device errors
+  - Manage stream lifecycle
+
+- **Implementation:**
+  - sounddevice for cross-platform audio I/O
+  - Non-blocking `sd.play()` with background playback
+  - Simple NumPy array integration
+  - Graceful handling of audio device disconnections
+
+### Data Flow
+
+**Request Processing Flow:**
+
+1. **HTTP Request Reception:**
+   - Client sends POST to `/notify` with JSON payload
+   - FastAPI validates request schema using Pydantic models
+   - Request is immediately acknowledged with HTTP 202 (Accepted)
+
+2. **Request Enqueueing:**
+   - Validated request is added to async queue
+   - If queue is full, return HTTP 503 (Service Unavailable)
+   - Queue position is logged for monitoring
+
+3. **Background Processing:**
+   - Background worker retrieves request from queue
+   - Text is passed to Piper TTS for conversion
+   - Piper generates WAV audio in memory
+
+4. **Audio Playback:**
+   - Audio buffer is passed to PyAudio
+   - PyAudio streams audio to system output
+   - Playback occurs in callback thread (non-blocking)
+   - Completion is logged
+
+5. **Error Handling:**
+   - Errors at any stage are caught and logged
+   - Failed requests are removed from queue
+   - Error metrics are updated
+
+### Technology Stack Justification
+
+#### FastAPI vs Flask
+
+**Decision: FastAPI**
+
+**Rationale:**
+- **Performance:** FastAPI handles 15,000-20,000 req/s vs Flask's 2,000-3,000 req/s ([Strapi Comparison](https://strapi.io/blog/fastapi-vs-flask-python-framework-comparison))
+- **Async Native:** Built on ASGI with native async/await support, critical for non-blocking TTS processing
+- **Type Safety:** Pydantic integration provides automatic request validation and serialization
+- **Documentation:** Automatic OpenAPI (Swagger) documentation generation
+- **Modern Architecture:** Designed for microservices and high-concurrency applications
+- **Growing Adoption:** 78k GitHub stars, 38% developer adoption in 2025 (40% YoY increase)
+
+**Trade-offs:**
+- Steeper learning curve compared to Flask
+- Smaller ecosystem of extensions (though growing rapidly)
+- Requires ASGI server (Uvicorn) vs Flask's built-in development server
+
+#### Piper TTS Engine Selection
+
+**Decision: Piper TTS**
+
+**Rationale:**
+- **Voice Quality:** Neural TTS with "Google TTS level quality" ([AntiX Forum](https://www.antixforum.com/forums/topic/tts-text-to-speech-in-linux-piper/))
+- **Offline Operation:** Fully local, no internet required
+- **Performance:** Optimized for local inference using ONNX Runtime
+- **Resource Efficiency:** Runs on Raspberry Pi 4, suitable for desktop Linux
+- **Easy Installation:** Available via pip (`pip install piper-tts`)
+- **Active Development:** Maintained project with 2025 updates
+- **Multiple Voices:** Extensive voice model library with quality/speed trade-offs
+
+**Comparison with Alternatives:**
+
+| Engine | Voice Quality | Speed | Resource Usage | Offline | Ease of Use |
+|--------|---------------|-------|----------------|---------|-------------|
+| **Piper TTS** | ⭐⭐⭐⭐⭐ Neural | ⭐⭐⭐⭐ Fast | ⭐⭐⭐⭐ Medium | ✅ Yes | ⭐⭐⭐⭐ Easy |
+| pyttsx3 | ⭐⭐ Robotic | ⭐⭐⭐⭐⭐ Very Fast | ⭐⭐⭐⭐⭐ Very Low | ✅ Yes | ⭐⭐⭐⭐⭐ Very Easy |
+| eSpeak | ⭐⭐ Robotic | ⭐⭐⭐⭐⭐ Very Fast | ⭐⭐⭐⭐⭐ Very Low | ✅ Yes | ⭐⭐⭐⭐ Easy |
+| gTTS | ⭐⭐⭐⭐⭐ Neural | ⭐⭐ Slow | ⭐⭐⭐⭐ Low | ❌ No | ⭐⭐⭐⭐⭐ Very Easy |
+| Coqui TTS | ⭐⭐⭐⭐⭐ Neural | ⭐⭐⭐ Medium | ⭐⭐ High | ✅ Yes | ⭐⭐ Complex |
+
+**Trade-offs:**
+- Larger model files (~20-100MB per voice) vs simple engines
+- Higher resource usage than pyttsx3/eSpeak
+- Requires ONNX Runtime dependency
+
+#### sounddevice for Audio Playback
+
+**Decision: sounddevice**
+
+**Rationale:**
+- **Pythonic API:** Clean, intuitive interface that feels native to Python
+- **NumPy Integration:** Direct support for NumPy arrays (perfect for Piper TTS output)
+- **Non-Blocking:** Simple `sd.play()` returns immediately, audio plays in background
+- **Cross-Platform:** Works on Linux, Windows, macOS via PortAudio backend
+- **Active Maintenance:** Well-maintained with regular updates
+- **Simple Async:** Easy integration with asyncio via `sd.wait()` or callbacks
+
+**Comparison with Alternatives:**
+
+| Library | Non-Blocking | Dependencies | Maintenance | Linux Support |
+|---------|-------------|--------------|-------------|---------------|
+| **sounddevice** | ✅ Native | PortAudio | ⭐⭐⭐⭐ Active | ✅ Excellent |
+| PyAudio | ✅ Callbacks | PortAudio | ⭐⭐⭐ Active | ✅ Excellent |
+| simpleaudio | ✅ Async | None | ❌ Archived | ⭐⭐⭐ Good |
+| pygame | ⭐ Limited | SDL | ⭐⭐⭐⭐ Active | ⭐⭐⭐⭐ Excellent |
+
+**Why sounddevice over PyAudio:**
+- Simpler API - `sd.play(audio, samplerate)` vs PyAudio's stream setup
+- Better NumPy support - no conversion needed from Piper's output
+- More Pythonic - feels like a modern Python library
+- Easier async integration - works naturally with asyncio
+
+---
+
+## API Specification
+
+### Endpoint: POST /notify
+
+**Description:** Accept text string and queue for TTS playback
+
+**Request Schema:**
+
+```json
+{
+  "message": "string (required)",
+  "voice": "string (optional)",
+  "rate": "integer (optional, default: 170)",
+  "voice_enabled": "boolean (optional, default: true)"
+}
+```
+
+**Request Parameters:**
+
+| Parameter | Type | Required | Default | Constraints | Description |
+|-----------|------|----------|---------|-------------|-------------|
+| `message` | string | Yes | - | 1-10000 chars | Text to convert to speech |
+| `voice` | string | No | `en_US-lessac-medium` | Valid voice model name | Piper voice model to use |
+| `rate` | integer | No | `170` | 50-400 | Speech rate in words per minute |
+| `voice_enabled` | boolean | No | `true` | - | Enable/disable TTS (for debugging) |
+
+**Example Request:**
+
+```bash
+curl -X POST http://localhost:8888/notify \
+  -H "Content-Type: application/json" \
+  -d '{
+    "message": "Hello, this is a test of the voice server",
+    "rate": 200,
+    "voice_enabled": true
+  }'
+```
+
+**Response Schema (Success - 202 Accepted):**
+
+```json
+{
+  "status": "queued",
+  "message_length": 42,
+  "queue_position": 3,
+  "estimated_duration": 2.5,
+  "voice_model": "en_US-lessac-medium"
+}
+```
+
+**Response Schema (Error - 400 Bad Request):**
+
+```json
+{
+  "error": "validation_error",
+  "detail": "message field is required",
+  "timestamp": "2025-12-18T10:30:45.123Z"
+}
+```
+
+**Response Schema (Error - 503 Service Unavailable):**
+
+```json
+{
+  "error": "queue_full",
+  "detail": "TTS queue is full, please retry later",
+  "queue_size": 50,
+  "timestamp": "2025-12-18T10:30:45.123Z"
+}
+```
+
+**HTTP Status Codes:**
+
+| Code | Meaning | Scenario |
+|------|---------|----------|
+| 202 | Accepted | Request successfully queued for processing |
+| 400 | Bad Request | Invalid request parameters or malformed JSON |
+| 413 | Payload Too Large | Message exceeds 10,000 characters |
+| 422 | Unprocessable Entity | Valid JSON but invalid parameter values |
+| 500 | Internal Server Error | TTS engine failure or unexpected error |
+| 503 | Service Unavailable | Queue is full or service is shutting down |
+
+---
+
+### Endpoint: GET /health
+
+**Description:** Health check endpoint for monitoring
+
+**Request:** No parameters
+
+**Response Schema (Healthy - 200 OK):**
+
+```json
+{
+  "status": "healthy",
+  "uptime_seconds": 3600,
+  "queue_size": 2,
+  "queue_capacity": 50,
+  "tts_engine": "piper",
+  "audio_output": "available",
+  "voice_models_loaded": ["en_US-lessac-medium"],
+  "total_requests": 1523,
+  "failed_requests": 12,
+  "timestamp": "2025-12-18T10:30:45.123Z"
+}
+```
+
+**Response Schema (Unhealthy - 503 Service Unavailable):**
+
+```json
+{
+  "status": "unhealthy",
+  "errors": [
+    "Audio output device unavailable",
+    "TTS engine failed to initialize"
+  ],
+  "timestamp": "2025-12-18T10:30:45.123Z"
+}
+```
+
+---
+
+### Endpoint: GET /docs
+
+**Description:** Automatic Swagger UI documentation (provided by FastAPI)
+
+**Access:** `http://localhost:8888/docs`
+
+**Features:**
+- Interactive API testing
+- Schema visualization
+- Request/response examples
+- Authentication testing (if implemented)
+
+---
+
+### Endpoint: GET /voices
+
+**Description:** List available TTS voice models
+
+**Request:** No parameters
+
+**Response Schema (200 OK):**
+
+```json
+{
+  "voices": [
+    {
+      "name": "en_US-lessac-medium",
+      "language": "en_US",
+      "quality": "medium",
+      "size_mb": 63.5,
+      "installed": true
+    },
+    {
+      "name": "en_US-libritts-high",
+      "language": "en_US",
+      "quality": "high",
+      "size_mb": 108.2,
+      "installed": false
+    }
+  ],
+  "default_voice": "en_US-lessac-medium"
+}
+```
+
+---
+
+## TTS Engine Analysis
+
+### Detailed Comparison Matrix
+
+| Engine | Voice Quality | Latency | CPU Usage | Memory | Offline | Linux Support | Python API | Maintenance |
+|--------|---------------|---------|-----------|--------|---------|---------------|------------|-------------|
+| **Piper TTS** | ⭐⭐⭐⭐⭐ | ~500ms | Medium | ~200MB | ✅ | ✅ Excellent | ✅ Native | 🟢 Active |
+| **pyttsx3** | ⭐⭐ | ~100ms | Low | ~50MB | ✅ | ✅ Good | ✅ Native | 🟢 Active |
+| **eSpeak-ng** | ⭐⭐ | ~50ms | Very Low | ~20MB | ✅ | ✅ Excellent | ⚠️ Wrapper | 🟢 Active |
+| **gTTS** | ⭐⭐⭐⭐⭐ | ~2000ms | Low | ~30MB | ❌ | ✅ Good | ✅ Native | 🟢 Active |
+| **Coqui TTS** | ⭐⭐⭐⭐⭐ | ~1500ms | High | ~500MB | ✅ | ✅ Good | ✅ Native | 🟡 Slow |
+| **Festival** | ⭐⭐⭐ | ~300ms | Low | ~100MB | ✅ | ✅ Excellent | ⚠️ Wrapper | 🟡 Slow |
+| **Mimic3** | ⭐⭐⭐⭐ | ~800ms | Medium | ~300MB | ✅ | ✅ Good | ❌ HTTP only | 🟢 Active |
+
+### Detailed Engine Profiles
+
+#### 1. Piper TTS (RECOMMENDED)
+
+**Pros:**
+- Neural TTS with natural-sounding voices
+- Optimized for local inference (ONNX Runtime)
+- Multiple quality levels (low/medium/high)
+- Extensive language and voice support
+- Active development and community
+- Easy pip installation
+- GPU acceleration support (CUDA)
+
+**Cons:**
+- Larger model files (20-100MB per voice)
+- Higher resource usage than simple engines
+- Initial model download required
+- Slightly higher latency than robotic engines
+
+**Installation:**
+```bash
+uv pip install piper-tts
+```
+
+**Usage Example:**
+```python
+from piper import PiperVoice
+import wave
+
+voice = PiperVoice.load("en_US-lessac-medium.onnx")
+with wave.open("output.wav", "wb") as wav_file:
+    voice.synthesize("Hello world", wav_file)
+```
+
+**Voice Quality Sample:**
+- **Low Quality:** Faster, smaller models (~20MB), decent quality
+- **Medium Quality:** Balanced performance (~60MB), recommended default
+- **High Quality:** Best quality (~100MB), slower inference
+
+**References:**
+- [GitHub Repository](https://github.com/rhasspy/piper)
+- [PyPI Package](https://pypi.org/project/piper-tts/)
+- [Voice Model Library](https://github.com/rhasspy/piper/blob/master/VOICES.md)
+
+---
+
+#### 2. pyttsx3
+
+**Pros:**
+- Extremely lightweight and fast
+- Cross-platform (Windows SAPI5, macOS NSSpeech, Linux eSpeak)
+- Zero external dependencies
+- Simple API
+- No model downloads required
+
+**Cons:**
+- Robotic voice quality
+- Limited voice customization
+- Depends on system TTS engines
+
+**Installation:**
+```bash
+uv pip install pyttsx3
+```
+
+**Usage Example:**
+```python
+import pyttsx3
+
+engine = pyttsx3.init()
+engine.say("Hello world")
+engine.runAndWait()
+```
+
+**References:**
+- [PyPI Package](https://pypi.org/project/pyttsx3/)
+- [GitHub Repository](https://github.com/nateshmbhat/pyttsx3)
+
+---
+
+#### 3. eSpeak-ng
+
+**Pros:**
+- Ultra-fast synthesis
+- 100+ language support
+- Minimal resource usage
+- Highly customizable
+- System-level installation
+
+**Cons:**
+- Robotic, mechanical voice quality
+- Python wrapper required (not native)
+- Less natural prosody
+
+**Installation:**
+```bash
+# System package
+sudo dnf install espeak-ng
+
+# Python wrapper
+uv pip install py3-tts  # Uses eSpeak backend
+```
+
+**Usage Example:**
+```bash
+echo "Hello world" | espeak-ng
+```
+
+**References:**
+- [eSpeak-ng Homepage](https://github.com/espeak-ng/espeak-ng)
+- [Circuit Digest Comparison](https://circuitdigest.com/microcontroller-projects/best-text-to-speech-tts-converter-for-raspberry-pi-espeak-festival-google-tts-pico-and-pyttsx3)
+
+---
+
+#### 4. Coqui TTS
+
+**Pros:**
+- State-of-the-art neural voices
+- Custom voice training support
+- Multiple model architectures
+- High-quality output
+
+**Cons:**
+- Very high resource requirements
+- Slower inference
+- Complex setup
+- Larger memory footprint
+- Development has slowed
+
+**Installation:**
+```bash
+uv pip install TTS
+```
+
+**Usage Example:**
+```python
+from TTS.api import TTS
+
+tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
+tts.tts_to_file(text="Hello world", file_path="output.wav")
+```
+
+**References:**
+- [Coqui TTS GitHub](https://github.com/coqui-ai/TTS)
+
+---
+
+### Recommendation: Piper TTS
+
+**Final Decision:** Piper TTS is the optimal choice for this project.
+
+**Justification:**
+1. **Quality:** Neural voices with Google TTS-level quality
+2. **Offline:** Fully local, no internet required (critical requirement)
+3. **Performance:** Optimized for local inference, suitable for desktop Linux
+4. **Python Native:** First-class Python API, easy integration
+5. **Maintenance:** Actively maintained with 2025 updates
+6. **Flexibility:** Multiple quality levels allow performance tuning
+7. **Ease of Use:** Simple pip installation, straightforward API
+
+**Configuration Strategy:**
+- **Default Voice:** `en_US-lessac-medium` (balanced quality/performance)
+- **GPU Acceleration:** Optional CUDA support for faster inference
+- **Model Caching:** Pre-load voice models at startup to reduce latency
+- **Quality Toggle:** Allow clients to request different quality levels
+
+---
+
+## Web Framework Selection
+
+### FastAPI: Detailed Analysis
+
+**Why FastAPI is Ideal for This Project:**
+
+#### 1. Async-First Architecture
+FastAPI is built on Starlette (ASGI framework) with native async/await support. This is critical for our use case:
+
+```python
+@app.post("/notify")
+async def notify(request: NotifyRequest):
+    # Non-blocking enqueueing
+    await tts_queue.put(request)
+    return {"status": "queued"}
+
+# Background worker runs concurrently
+async def process_queue():
+    while True:
+        request = await tts_queue.get()
+        await generate_and_play_tts(request)
+```
+
+**Benefit:** HTTP responses return immediately while TTS processing happens in background.
+
+#### 2. Performance Benchmarks
+
+According to TechEmpower benchmarks ([Better Stack](https://betterstack.com/community/guides/scaling-python/flask-vs-fastapi/)):
+- **FastAPI:** 15,000-20,000 requests/second
+- **Flask:** 2,000-3,000 requests/second
+
+**Benefit:** 5-10x higher throughput for handling concurrent TTS requests.
+
+#### 3. Automatic API Documentation
+
+FastAPI generates interactive OpenAPI (Swagger) documentation automatically:
+
+```python
+@app.post("/notify", response_model=NotifyResponse)
+async def notify(request: NotifyRequest):
+    """
+    Convert text to speech and play through speakers.
+
+    - **message**: Text to convert (1-10000 characters)
+    - **rate**: Speech rate in WPM (50-400)
+    - **voice**: Voice model name (optional)
+    """
+    ...
+```
+
+**Benefit:** Instant API documentation at `/docs` without manual maintenance.
+
+#### 4. Type Safety with Pydantic
+
+Automatic request validation and serialization:
+
+```python
+from pydantic import BaseModel, Field, validator
+
+class NotifyRequest(BaseModel):
+    message: str = Field(..., min_length=1, max_length=10000)
+    rate: int = Field(170, ge=50, le=400)
+    voice_enabled: bool = True
+
+    @validator('message')
+    def sanitize_message(cls, v):
+        # Automatic validation before handler runs
+        return v.strip()
+```
+
+**Benefit:** Eliminates manual validation code, reduces bugs.
+
+#### 5. Dependency Injection
+
+Clean separation of concerns:
+
+```python
+async def get_tts_engine():
+    return global_tts_engine
+
+@app.post("/notify")
+async def notify(
+    request: NotifyRequest,
+    tts_engine: PiperVoice = Depends(get_tts_engine)
+):
+    # tts_engine automatically injected
+    ...
+```
+
+**Benefit:** Testable, maintainable code with clear dependencies.
+
+#### 6. Background Tasks
+
+Built-in support for fire-and-forget tasks:
+
+```python
+from fastapi import BackgroundTasks
+
+@app.post("/notify")
+async def notify(request: NotifyRequest, background_tasks: BackgroundTasks):
+    background_tasks.add_task(generate_tts, request.message)
+    return {"status": "queued"}
+```
+
+**Benefit:** Simplified async task management.
+
+### Flask Comparison (Why Not Flask)
+
+**Flask Limitations for This Project:**
+
+1. **WSGI-Based:** Synchronous by default, requires Gunicorn/gevent for async
+2. **Lower Performance:** 2,000-3,000 req/s vs FastAPI's 15,000-20,000 req/s
+3. **Manual Documentation:** Requires Flask-RESTPlus or manual OpenAPI setup
+4. **Manual Validation:** No built-in request validation, requires Flask-Pydantic extension
+5. **Blocking I/O:** Natural behavior blocks request threads during TTS processing
+
+**When Flask Would Be Better:**
+- Simple synchronous applications
+- Heavy reliance on Flask extensions (Flask-Login, Flask-Admin)
+- Team already experienced with Flask
+- Need for Jinja2 templating (not needed here)
+
+**Verdict:** FastAPI is the clear winner for this async-heavy, high-performance use case.
+
+---
+
+## Audio Playback Strategy
+
+### sounddevice Implementation Details
+
+#### Non-Blocking Playback
+
+sounddevice provides simple, non-blocking audio playback out of the box:
+
+```python
+import sounddevice as sd
+import numpy as np
+
+class AudioPlayer:
+    """Simple audio player using sounddevice."""
+
+    def __init__(self, sample_rate: int = 22050):
+        self.sample_rate = sample_rate
+        self._current_stream = None
+
+    def play(self, audio_data: np.ndarray, sample_rate: int = None):
+        """
+        Non-blocking audio playback.
+
+        Args:
+            audio_data: NumPy array of audio samples (float32 or int16)
+            sample_rate: Sample rate in Hz (defaults to instance default)
+        """
+        rate = sample_rate or self.sample_rate
+
+        # Stop any currently playing audio
+        self.stop()
+
+        # Play audio - returns immediately, audio plays in background
+        sd.play(audio_data, rate)
+
+    def is_playing(self) -> bool:
+        """Check if audio is currently playing."""
+        return sd.get_stream() is not None and sd.get_stream().active
+
+    def stop(self):
+        """Stop current playback."""
+        sd.stop()
+
+    def wait(self):
+        """Block until current playback completes."""
+        sd.wait()
+
+    async def wait_async(self):
+        """Async wait for playback completion."""
+        import asyncio
+        while self.is_playing():
+            await asyncio.sleep(0.05)
+```
+
+**Benefits of sounddevice:**
+- `sd.play()` returns immediately - audio plays in background thread
+- Direct NumPy array support - no conversion needed from Piper TTS
+- Simple API - one line to play audio
+- Built-in `sd.wait()` for synchronous waiting when needed
+
+---
+
+#### Handling Concurrent Requests
+
+**Strategy:** Queue-based sequential playback with async queue management.
+
+**Rationale:**
+- Playing multiple TTS outputs simultaneously would create audio chaos
+- Sequential playback ensures clarity
+- Queue allows buffering during high request volume
+
+**Implementation:**
+
+```python
+import asyncio
+import sounddevice as sd
+import numpy as np
+from typing import Dict, Any
+
+class TTSQueue:
+    def __init__(self, max_size: int = 50):
+        self.queue = asyncio.Queue(maxsize=max_size)
+        self.player = AudioPlayer()
+        self.stats = {"processed": 0, "errors": 0}
+
+    async def enqueue(self, request: Dict[str, Any]):
+        """Add TTS request to queue."""
+        try:
+            await asyncio.wait_for(
+                self.queue.put(request),
+                timeout=1.0
+            )
+            return self.queue.qsize()
+        except asyncio.TimeoutError:
+            raise QueueFullError("TTS queue is full")
+
+    async def process_queue(self):
+        """Background worker to process TTS queue."""
+        while True:
+            request = await self.queue.get()
+
+            try:
+                # Generate TTS audio
+                audio_data = await self.generate_tts(request)
+
+                # Play audio (non-blocking start)
+                self.player.play(audio_data, sample_rate=22050)
+
+                # Wait for playback to complete (async-friendly)
+                await self.player.wait_async()
+
+                self.stats["processed"] += 1
+
+            except Exception as e:
+                logger.error(f"TTS processing error: {e}")
+                self.stats["errors"] += 1
+
+            finally:
+                self.queue.task_done()
+
+    async def generate_tts(self, request: Dict[str, Any]) -> np.ndarray:
+        """Generate TTS audio using Piper."""
+        # Run CPU-intensive TTS in thread pool
+        loop = asyncio.get_event_loop()
+        audio_data = await loop.run_in_executor(
+            None,
+            self._sync_generate_tts,
+            request["message"],
+            request.get("voice", "en_US-lessac-medium")
+        )
+        return audio_data
+
+    def _sync_generate_tts(self, text: str, voice: str) -> np.ndarray:
+        """Synchronous TTS generation (runs in thread pool)."""
+        # Piper TTS generation code
+        ...
+        return audio_array
+```
+
+**Startup:**
+
+```python
+from contextlib import asynccontextmanager
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup: initialize queue and start processor
+    global tts_queue
+    tts_queue = TTSQueue(max_size=50)
+    asyncio.create_task(tts_queue.process_queue())
+    yield
+    # Shutdown: stop audio playback
+    sd.stop()
+
+app = FastAPI(lifespan=lifespan)
+```
+
+---
+
+#### Audio Device Error Handling
+
+**Common Issues:**
+1. Audio device disconnected (headphones unplugged)
+2. PulseAudio/ALSA daemon crashed
+3. No audio devices available
+4. Device in use by another process
+
+**Handling Strategy:**
+
+```python
+import sounddevice as sd
+import numpy as np
+import time
+import logging
+
+logger = logging.getLogger(__name__)
+
+class RobustAudioPlayer:
+    """Audio player with automatic retry and device recovery."""
+
+    def __init__(self, retry_attempts: int = 3, sample_rate: int = 22050):
+        self.retry_attempts = retry_attempts
+        self.sample_rate = sample_rate
+        self.verify_audio_devices()
+
+    def verify_audio_devices(self):
+        """Verify audio devices are available."""
+        try:
+            devices = sd.query_devices()
+            output_devices = [d for d in devices if d['max_output_channels'] > 0]
+            if not output_devices:
+                raise AudioDeviceError("No audio output devices found")
+            logger.info(f"Audio initialized: {len(output_devices)} output devices found")
+            logger.debug(f"Default output: {sd.query_devices(kind='output')['name']}")
+        except Exception as e:
+            logger.error(f"Audio initialization failed: {e}")
+            raise
+
+    def play(self, audio_data: np.ndarray, sample_rate: int = None):
+        """Play audio with automatic retry on device errors."""
+        rate = sample_rate or self.sample_rate
+
+        for attempt in range(self.retry_attempts):
+            try:
+                sd.play(audio_data, rate)
+                return
+            except sd.PortAudioError as e:
+                logger.warning(f"Audio playback failed (attempt {attempt+1}): {e}")
+
+                if attempt < self.retry_attempts - 1:
+                    # Wait and retry - device may become available
+                    sd.stop()
+                    time.sleep(0.5)
+                    self.verify_audio_devices()
+                else:
+                    raise AudioPlaybackError(f"Failed after {self.retry_attempts} attempts: {e}")
+
+    def is_playing(self) -> bool:
+        """Check if audio is currently playing."""
+        stream = sd.get_stream()
+        return stream is not None and stream.active
+
+    def stop(self):
+        """Stop current playback."""
+        sd.stop()
+
+    async def wait_async(self):
+        """Async wait for playback completion."""
+        import asyncio
+        while self.is_playing():
+            await asyncio.sleep(0.05)
+```
+
+**Device Query for Diagnostics:**
+
+```python
+def get_audio_diagnostics() -> dict:
+    """Get audio system diagnostics for health check."""
+    try:
+        devices = sd.query_devices()
+        default_output = sd.query_devices(kind='output')
+        return {
+            "status": "available",
+            "device_count": len(devices),
+            "default_output": default_output['name'],
+            "sample_rate": default_output['default_samplerate']
+        }
+    except Exception as e:
+        return {
+            "status": "unavailable",
+            "error": str(e)
+        }
+```
+
+---
+
+## Error Handling Strategy
+
+### Error Categories and Handling
+
+#### 1. Request Validation Errors
+
+**Scenarios:**
+- Missing required fields
+- Invalid parameter types
+- Out-of-range values
+- Malformed JSON
+
+**Handling:**
+
+```python
+from fastapi import HTTPException, status
+from pydantic import BaseModel, Field, ValidationError
+
+class NotifyRequest(BaseModel):
+    message: str = Field(..., min_length=1, max_length=10000)
+    rate: int = Field(170, ge=50, le=400)
+    voice: str = Field("en_US-lessac-medium", regex=r"^[\w-]+$")
+
+@app.exception_handler(ValidationError)
+async def validation_exception_handler(request, exc):
+    return JSONResponse(
+        status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+        content={
+            "error": "validation_error",
+            "detail": str(exc),
+            "timestamp": datetime.utcnow().isoformat()
+        }
+    )
+```
+
+**HTTP Status:** 422 Unprocessable Entity
+
+---
+
+#### 2. Queue Full Errors
+
+**Scenario:** Too many concurrent requests, queue is at capacity
+
+**Handling:**
+
+```python
+class QueueFullError(Exception):
+    pass
+
+@app.post("/notify")
+async def notify(request: NotifyRequest):
+    try:
+        position = await tts_queue.enqueue(request)
+        return {
+            "status": "queued",
+            "queue_position": position
+        }
+    except QueueFullError:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail={
+                "error": "queue_full",
+                "message": "TTS queue is full, please retry later",
+                "queue_size": tts_queue.max_size,
+                "retry_after": 5  # seconds
+            }
+        )
+```
+
+**HTTP Status:** 503 Service Unavailable
+**Client Action:** Implement exponential backoff retry
+
+---
+
+#### 3. TTS Engine Errors
+
+**Scenarios:**
+- Voice model not found
+- ONNX Runtime errors
+- Memory allocation failures
+- Corrupted model files
+
+**Handling:**
+
+```python
+class TTSEngineError(Exception):
+    pass
+
+async def generate_tts(text: str, voice: str) -> np.ndarray:
+    try:
+        # Attempt TTS generation
+        audio = piper_voice.synthesize(text)
+        return audio
+    except FileNotFoundError:
+        raise TTSEngineError(f"Voice model '{voice}' not found")
+    except MemoryError:
+        raise TTSEngineError("Insufficient memory for TTS generation")
+    except Exception as e:
+        logger.error(f"TTS generation failed: {e}", exc_info=True)
+        raise TTSEngineError(f"TTS generation failed: {str(e)}")
+
+@app.exception_handler(TTSEngineError)
+async def tts_engine_exception_handler(request, exc):
+    return JSONResponse(
+        status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+        content={
+            "error": "tts_engine_error",
+            "detail": str(exc),
+            "timestamp": datetime.utcnow().isoformat()
+        }
+    )
+```
+
+**HTTP Status:** 500 Internal Server Error
+
+---
+
+#### 4. Audio Playback Errors
+
+**Scenarios:**
+- No audio devices available
+- Audio device disconnected
+- ALSA/PulseAudio errors
+- Permission denied
+
+**Handling:**
+
+```python
+class AudioPlaybackError(Exception):
+    pass
+
+async def play_audio(audio_data: np.ndarray):
+    try:
+        player.play_with_retry(audio_data, sample_rate=22050)
+    except AudioDeviceError as e:
+        logger.error(f"Audio device error: {e}")
+        raise AudioPlaybackError("No audio output devices available")
+    except OSError as e:
+        logger.error(f"Audio system error: {e}")
+        raise AudioPlaybackError(f"Audio playback failed: {str(e)}")
+
+# In queue processor
+try:
+    await play_audio(audio_data)
+except AudioPlaybackError as e:
+    logger.error(f"Playback error: {e}")
+    # Continue processing queue, don't crash server
+    stats["errors"] += 1
+```
+
+**Action:** Log error, continue processing queue (don't crash server)
+
+---
+
+#### 5. System Resource Errors
+
+**Scenarios:**
+- Out of memory
+- CPU overload
+- Disk space exhausted
+
+**Handling:**
+
+```python
+import psutil
+
+async def check_system_resources():
+    """Monitor system resources."""
+    memory = psutil.virtual_memory()
+    if memory.percent > 90:
+        logger.warning(f"High memory usage: {memory.percent}%")
+
+    cpu = psutil.cpu_percent(interval=1)
+    if cpu > 90:
+        logger.warning(f"High CPU usage: {cpu}%")
+
+@app.middleware("http")
+async def resource_monitoring_middleware(request, call_next):
+    """Monitor resources on each request."""
+    await check_system_resources()
+    response = await call_next(request)
+    return response
+```
+
+**Action:** Log warnings, implement queue size limits to prevent resource exhaustion
+
+---
+
+### Logging Strategy
+
+**Log Levels:**
+
+```python
+import logging
+from logging.handlers import RotatingFileHandler
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        RotatingFileHandler(
+            'voice-server.log',
+            maxBytes=10*1024*1024,  # 10MB
+            backupCount=5
+        ),
+        logging.StreamHandler()
+    ]
+)
+
+logger = logging.getLogger(__name__)
+
+# Log levels usage:
+logger.debug("TTS parameters: rate=%d, voice=%s", rate, voice)  # DEBUG
+logger.info("Request queued: position=%d", queue_position)       # INFO
+logger.warning("Queue nearly full: %d/%d", current, max_size)    # WARNING
+logger.error("TTS generation failed: %s", error, exc_info=True)  # ERROR
+logger.critical("Audio system unavailable, shutting down")       # CRITICAL
+```
+
+**Structured Logging:**
+
+```python
+import json
+from datetime import datetime
+
+def log_request(request_id: str, message: str, status: str):
+    """Structured JSON logging."""
+    log_entry = {
+        "timestamp": datetime.utcnow().isoformat(),
+        "request_id": request_id,
+        "message_length": len(message),
+        "status": status,
+        "event_type": "tts_request"
+    }
+    logger.info(json.dumps(log_entry))
+```
+
+---
+
+### Health Check Implementation
+
+**Comprehensive Health Checks:**
+
+```python
+@app.get("/health")
+async def health_check():
+    """Detailed health status."""
+    health_status = {
+        "status": "healthy",
+        "timestamp": datetime.utcnow().isoformat(),
+        "checks": {}
+    }
+
+    # Check TTS engine
+    try:
+        tts_engine.test_synthesis("test")
+        health_status["checks"]["tts_engine"] = "healthy"
+    except Exception as e:
+        health_status["checks"]["tts_engine"] = f"unhealthy: {str(e)}"
+        health_status["status"] = "unhealthy"
+
+    # Check audio output
+    try:
+        audio_player.test_output()
+        health_status["checks"]["audio_output"] = "healthy"
+    except Exception as e:
+        health_status["checks"]["audio_output"] = f"unhealthy: {str(e)}"
+        health_status["status"] = "unhealthy"
+
+    # Check queue status
+    queue_size = tts_queue.qsize()
+    health_status["checks"]["queue"] = {
+        "size": queue_size,
+        "capacity": tts_queue.max_size,
+        "utilization": f"{(queue_size/tts_queue.max_size)*100:.1f}%"
+    }
+
+    # Check system resources
+    health_status["checks"]["system"] = {
+        "memory_percent": psutil.virtual_memory().percent,
+        "cpu_percent": psutil.cpu_percent(interval=0.1)
+    }
+
+    status_code = 200 if health_status["status"] == "healthy" else 503
+    return JSONResponse(status_code=status_code, content=health_status)
+```
+
+---
+
+## Implementation Checklist
+
+### Phase 1: Core Infrastructure (Days 1-2)
+
+#### 1.1 Project Setup
+- [ ] Initialize project directory `/mnt/NV2/Development/voice-server`
+- [ ] Create Python virtual environment using `uv`
+- [ ] Install core dependencies:
+  - [ ] `uv pip install fastapi`
+  - [ ] `uv pip install uvicorn[standard]`
+  - [ ] `uv pip install piper-tts`
+  - [ ] `uv pip install sounddevice`
+  - [ ] `uv pip install numpy`
+  - [ ] `uv pip install pydantic`
+  - [ ] `uv pip install python-dotenv`
+- [ ] Create `requirements.txt` with pinned versions
+- [ ] Create `.env.example` for configuration template
+- [ ] Initialize git repository
+- [ ] Create `.gitignore` (Python, IDEs, .env, voice models)
+
+#### 1.2 FastAPI Application Structure
+- [ ] Create `app/main.py` with FastAPI app initialization
+- [ ] Implement `/notify` endpoint skeleton
+- [ ] Implement `/health` endpoint skeleton
+- [ ] Implement `/voices` endpoint skeleton
+- [ ] Configure CORS middleware
+- [ ] Configure JSON logging middleware
+- [ ] Create Pydantic models for request/response schemas
+- [ ] Test basic server startup: `uvicorn app.main:app --reload`
+
+#### 1.3 Configuration Management
+- [ ] Create `app/config.py` for configuration loading
+- [ ] Implement environment variable loading
+- [ ] Define configuration schema (host, port, queue size, etc.)
+- [ ] Implement configuration validation at startup
+- [ ] Create CLI argument parsing for overrides
+- [ ] Document all configuration options in README
+
+### Phase 2: TTS Integration (Days 2-3)
+
+#### 2.1 Piper TTS Setup
+- [ ] Create `app/tts_engine.py` module
+- [ ] Implement `PiperTTSEngine` class
+- [ ] Download default voice model (`en_US-lessac-medium`)
+- [ ] Implement voice model loading with caching
+- [ ] Implement text-to-audio synthesis method
+- [ ] Add support for configurable speech rate
+- [ ] Test TTS generation with sample text
+- [ ] Measure TTS latency for various text lengths
+
+#### 2.2 Voice Model Management
+- [ ] Create `models/` directory for voice model storage
+- [ ] Implement voice model discovery (scan `models/` directory)
+- [ ] Implement lazy loading of voice models (load on first use)
+- [ ] Create model metadata cache (name, language, quality, size)
+- [ ] Implement `/voices` endpoint to list available models
+- [ ] Add error handling for missing/corrupted models
+- [ ] Document voice model installation process
+
+#### 2.3 TTS Parameter Support
+- [ ] Implement speech rate adjustment (50-400 WPM)
+- [ ] Test rate adjustment across range
+- [ ] Add voice selection via request parameter
+- [ ] Implement voice validation (reject unknown voices)
+- [ ] Add `voice_enabled` flag for debugging/testing
+- [ ] Create comprehensive TTS unit tests
+
+### Phase 3: Audio Playback (Day 3)
+
+#### 3.1 sounddevice Integration
+- [ ] Create `app/audio_player.py` module
+- [ ] Implement `AudioPlayer` class with non-blocking `sd.play()`
+- [ ] Verify sounddevice detects audio devices at startup
+- [ ] Implement non-blocking playback method
+- [ ] Implement async `wait_async()` method for queue processing
+- [ ] Test audio playback with sample NumPy array
+- [ ] Verify non-blocking behavior with concurrent requests
+
+#### 3.2 Audio Error Handling
+- [ ] Implement audio device detection
+- [ ] Add retry logic for device failures
+- [ ] Handle device disconnection gracefully
+- [ ] Test with headphones unplugged during playback
+- [ ] Implement fallback to different audio devices
+- [ ] Add detailed audio error logging
+- [ ] Create audio system health check
+
+#### 3.3 Playback Testing
+- [ ] Test simultaneous playback (should queue)
+- [ ] Test rapid successive requests
+- [ ] Measure audio latency (request → sound output)
+- [ ] Test with various audio formats
+- [ ] Verify memory cleanup after playback
+- [ ] Test long-running playback (10+ minutes)
+
+### Phase 4: Queue Management (Day 4)
+
+#### 4.1 Async Queue Implementation
+- [ ] Create `app/queue_manager.py` module
+- [ ] Implement `TTSQueue` class with `asyncio.Queue`
+- [ ] Set configurable max queue size (default: 50)
+- [ ] Implement queue full detection
+- [ ] Create background queue processor task
+- [ ] Implement graceful queue shutdown
+- [ ] Add queue metrics (size, processed, errors)
+
+#### 4.2 Request Processing Pipeline
+- [ ] Implement request enqueueing in `/notify` endpoint
+- [ ] Create background worker to process queue
+- [ ] Integrate TTS generation in worker
+- [ ] Integrate audio playback in worker
+- [ ] Implement sequential playback (one at a time)
+- [ ] Add request timeout handling (max 60s per request)
+- [ ] Test queue with 100+ concurrent requests
+
+#### 4.3 Queue Monitoring
+- [ ] Add queue size to `/health` endpoint
+- [ ] Implement queue utilization metrics
+- [ ] Add logging for queue events (enqueue, process, error)
+- [ ] Create queue performance benchmarks
+- [ ] Test queue overflow scenarios
+- [ ] Document queue behavior and limits
+
+### Phase 5: Error Handling (Day 5)
+
+#### 5.1 Exception Handlers
+- [ ] Implement custom exception classes
+- [ ] Create `QueueFullError` exception handler
+- [ ] Create `TTSEngineError` exception handler
+- [ ] Create `AudioPlaybackError` exception handler
+- [ ] Create `ValidationError` exception handler
+- [ ] Implement generic exception handler (catch-all)
+- [ ] Test all error scenarios
+
+#### 5.2 Logging Infrastructure
+- [ ] Configure structured JSON logging
+- [ ] Implement rotating file handler (10MB, 5 backups)
+- [ ] Add request ID tracking across logs
+- [ ] Implement log levels appropriately (DEBUG, INFO, WARNING, ERROR)
+- [ ] Create log aggregation for queue processor
+- [ ] Test log rotation
+- [ ] Document log file locations and format
+
+#### 5.3 Health Monitoring
+- [ ] Implement comprehensive `/health` endpoint
+- [ ] Add TTS engine health check
+- [ ] Add audio system health check
+- [ ] Add queue status to health check
+- [ ] Add system resource metrics (CPU, memory)
+- [ ] Test health endpoint under load
+- [ ] Create health check monitoring script
+
+### Phase 6: Testing (Days 5-6)
+
+#### 6.1 Unit Tests
+- [ ] Create `tests/` directory structure
+- [ ] Install pytest: `uv pip install pytest pytest-asyncio`
+- [ ] Write tests for Pydantic models
+- [ ] Write tests for TTS engine
+- [ ] Write tests for audio player
+- [ ] Write tests for queue manager
+- [ ] Write tests for configuration loading
+- [ ] Achieve 80%+ code coverage
+
+#### 6.2 Integration Tests
+- [ ] Write tests for `/notify` endpoint
+- [ ] Write tests for `/health` endpoint
+- [ ] Write tests for `/voices` endpoint
+- [ ] Test end-to-end request flow
+- [ ] Test concurrent request handling
+- [ ] Test queue overflow scenarios
+- [ ] Test error scenarios (TTS failure, audio failure)
+
+#### 6.3 Performance Tests
+- [ ] Create load testing script with `locust` or `wrk`
+- [ ] Test 100 concurrent requests
+- [ ] Measure request latency (p50, p95, p99)
+- [ ] Measure TTS generation time
+- [ ] Measure audio playback latency
+- [ ] Measure memory usage under load
+- [ ] Document performance characteristics
+
+#### 6.4 System Tests
+- [ ] Test on target Linux environment (Nobara/Fedora 42)
+- [ ] Test with different audio devices
+- [ ] Test with PulseAudio and ALSA
+- [ ] Test headphone disconnect/reconnect
+- [ ] Test system resource exhaustion scenarios
+- [ ] Test server restart recovery
+- [ ] Test long-running stability (24+ hours)
+
+### Phase 7: Documentation & Deployment (Days 6-7)
+
+#### 7.1 Documentation
+- [ ] Create comprehensive README.md:
+  - [ ] Project overview
+  - [ ] Installation instructions
+  - [ ] Configuration options
+  - [ ] Usage examples
+  - [ ] API documentation
+  - [ ] Troubleshooting guide
+- [ ] Create CONTRIBUTING.md (if open source)
+- [ ] Create CHANGELOG.md
+- [ ] Document voice model installation
+- [ ] Create architecture diagrams
+- [ ] Add inline code documentation
+- [ ] Create example client scripts (curl, Python)
+
+#### 7.2 Deployment Preparation
+- [ ] Create systemd service file (`voice-server.service`)
+- [ ] Test systemd service installation
+- [ ] Test automatic restart on failure
+- [ ] Create deployment script (`deploy.sh`)
+- [ ] Document deployment process
+- [ ] Create backup/restore procedures
+- [ ] Test upgrade procedure
+
+#### 7.3 Production Hardening
+- [ ] Enable production logging (disable debug logs)
+- [ ] Configure log rotation
+- [ ] Set up monitoring (optional: Prometheus, Grafana)
+- [ ] Implement graceful shutdown (SIGTERM handling)
+- [ ] Test crash recovery
+- [ ] Implement rate limiting (optional)
+- [ ] Security audit (input sanitization, resource limits)
+- [ ] Performance tuning (queue size, worker count)
+
+---
+
+## Testing Strategy
+
+### Unit Testing
+
+**Framework:** pytest with pytest-asyncio
+
+**Test Coverage Requirements:**
+- Minimum 80% code coverage
+- 100% coverage for critical paths (TTS, audio playback)
+- All error handlers must have tests
+
+**Test Structure:**
+
+```
+tests/
+├── __init__.py
+├── conftest.py              # Shared fixtures
+├── unit/
+│   ├── test_config.py       # Configuration loading tests
+│   ├── test_models.py       # Pydantic model tests
+│   ├── test_tts_engine.py   # TTS engine tests
+│   ├── test_audio_player.py # Audio player tests
+│   └── test_queue.py        # Queue manager tests
+├── integration/
+│   ├── test_api.py          # API endpoint tests
+│   ├── test_end_to_end.py   # Full request flow tests
+│   └── test_errors.py       # Error scenario tests
+└── performance/
+    └── test_load.py         # Load testing
+```
+
+**Sample Unit Test:**
+
+```python
+# tests/unit/test_tts_engine.py
+import pytest
+from app.tts_engine import PiperTTSEngine
+
+@pytest.fixture
+def tts_engine():
+    """Create TTS engine instance."""
+    return PiperTTSEngine(model_dir="models/")
+
+def test_tts_engine_initialization(tts_engine):
+    """Test TTS engine initializes successfully."""
+    assert tts_engine is not None
+    assert tts_engine.default_voice == "en_US-lessac-medium"
+
+def test_text_to_audio_conversion(tts_engine):
+    """Test converting text to audio."""
+    audio = tts_engine.synthesize("Hello world")
+    assert audio is not None
+    assert len(audio) > 0
+    assert audio.dtype == np.float32
+
+def test_invalid_voice_raises_error(tts_engine):
+    """Test that invalid voice raises appropriate error."""
+    with pytest.raises(ValueError, match="Voice model .* not found"):
+        tts_engine.synthesize("Hello", voice="invalid_voice")
+
+@pytest.mark.asyncio
+async def test_async_synthesis(tts_engine):
+    """Test async TTS synthesis."""
+    audio = await tts_engine.synthesize_async("Hello world")
+    assert audio is not None
+```
+
+**Sample Integration Test:**
+
+```python
+# tests/integration/test_api.py
+import pytest
+from fastapi.testclient import TestClient
+from app.main import app
+
+@pytest.fixture
+def client():
+    """Create test client."""
+    return TestClient(app)
+
+def test_notify_endpoint_success(client):
+    """Test successful /notify request."""
+    response = client.post(
+        "/notify",
+        json={"message": "Test message", "rate": 180}
+    )
+    assert response.status_code == 202
+    data = response.json()
+    assert data["status"] == "queued"
+    assert data["message_length"] == 12
+
+def test_notify_endpoint_validation_error(client):
+    """Test /notify with invalid parameters."""
+    response = client.post(
+        "/notify",
+        json={"message": "", "rate": 1000}  # Empty message, invalid rate
+    )
+    assert response.status_code == 422
+
+def test_health_endpoint(client):
+    """Test /health endpoint."""
+    response = client.get("/health")
+    assert response.status_code == 200
+    data = response.json()
+    assert "status" in data
+    assert "queue_size" in data
+```
+
+---
+
+### Load Testing
+
+**Tool:** wrk or locust
+
+**Sample wrk Test:**
+
+```bash
+# Install wrk
+sudo dnf install wrk
+
+# Run load test: 100 concurrent connections, 30 seconds
+wrk -t4 -c100 -d30s -s post.lua http://localhost:8888/notify
+
+# post.lua script:
+# wrk.method = "POST"
+# wrk.headers["Content-Type"] = "application/json"
+# wrk.body = '{"message": "Load test message"}'
+```
+
+**Sample locust Test:**
+
+```python
+# locustfile.py
+from locust import HttpUser, task, between
+
+class VoiceServerUser(HttpUser):
+    wait_time = between(1, 3)
+
+    @task
+    def notify(self):
+        self.client.post("/notify", json={
+            "message": "This is a load test message",
+            "rate": 180
+        })
+
+    @task(5)
+    def health_check(self):
+        self.client.get("/health")
+
+# Run: locust -f locustfile.py --host=http://localhost:8888
+```
+
+**Performance Benchmarks:**
+
+| Metric | Target | Acceptable | Unacceptable |
+|--------|--------|------------|--------------|
+| API Response Time (p95) | < 50ms | < 100ms | > 200ms |
+| TTS Generation (500 chars) | < 2s | < 5s | > 10s |
+| Requests/Second | > 50 | > 20 | < 10 |
+| Memory Usage (idle) | < 200MB | < 500MB | > 1GB |
+| Memory Usage (load) | < 500MB | < 1GB | > 2GB |
+| Queue Processing Rate | > 10/s | > 5/s | < 2/s |
+
+---
+
+### Manual Testing Checklist
+
+**Functional Testing:**
+- [ ] Send POST request with valid message → Hear audio playback
+- [ ] Send request with long text (5000 chars) → Successful playback
+- [ ] Send request with special characters → Successful sanitization
+- [ ] Send request with invalid voice → Receive 422 error
+- [ ] Send request with rate=50 → Slow speech playback
+- [ ] Send request with rate=400 → Fast speech playback
+- [ ] Send 10 concurrent requests → All play sequentially
+- [ ] Fill queue to capacity → Receive 503 error
+- [ ] Check /health endpoint → Receive status information
+- [ ] Check /voices endpoint → See available voice models
+- [ ] Check /docs endpoint → See Swagger documentation
+
+**Error Scenario Testing:**
+- [ ] Unplug headphones during playback → Graceful error handling
+- [ ] Kill PulseAudio daemon → Audio error logged, server continues
+- [ ] Send malformed JSON → Receive 400 error
+- [ ] Send empty message → Receive 422 error
+- [ ] Send 11,000 character message → Receive 413 error
+- [ ] Restart server during playback → Queue cleared, server restarts
+
+**System Testing:**
+- [ ] Run server for 24 hours → No memory leaks
+- [ ] Send 10,000 requests → All processed successfully
+- [ ] Monitor CPU usage during load → < 50% average
+- [ ] Monitor memory usage during load → < 1GB
+- [ ] Test on Fedora 42 → Successful operation
+- [ ] Test with ALSA (without PulseAudio) → Successful operation
+
+---
+
+## Future Considerations
+
+### Optional Features (Post-v1.0)
+
+#### 1. Advanced Voice Control
+- **Pitch adjustment:** Allow clients to specify pitch modification
+- **Volume control:** Per-request volume settings
+- **Emotion/tone control:** Happy, sad, angry voice modulation (if TTS engine supports)
+- **Voice cloning:** Custom voice model training (Coqui TTS integration)
+
+**Implementation Complexity:** Medium
+**User Value:** High for accessibility and personalization
+
+---
+
+#### 2. Audio Format Options
+- **Output format selection:** Support WAV, MP3, OGG output
+- **Sample rate options:** Allow 16kHz, 22kHz, 44.1kHz selection
+- **Compression levels:** Configurable audio quality vs file size
+
+**Implementation Complexity:** Low
+**User Value:** Medium (mostly for file storage use cases)
+
+---
+
+#### 3. Streaming Audio
+- **Real-time streaming:** Stream audio as it's generated (WebSocket or SSE)
+- **Chunked TTS:** Generate and stream long texts in chunks
+- **Lower latency:** Start playback before full text is synthesized
+
+**Implementation Complexity:** High
+**User Value:** High for very long texts
+
+---
+
+#### 4. SSML Support
+- **Prosody control:** Fine-grained control over speech characteristics
+- **Break insertion:** Explicit pauses and timing control
+- **Phoneme specification:** Correct pronunciation for unusual words
+- **Multi-voice support:** Different voices within single text
+
+**Example:**
+```xml
+<speak>
+  Hello, <break time="500ms"/> this is <emphasis>important</emphasis>.
+  <voice name="en_US-libritts">A different voice.</voice>
+</speak>
+```
+
+**Implementation Complexity:** Medium
+**User Value:** High for advanced use cases
+
+---
+
+#### 5. Caching Layer
+- **TTS result caching:** Cache frequently requested texts
+- **Cache invalidation:** LRU eviction policy
+- **Cache persistence:** Store cache across restarts
+- **Cache statistics:** Hit rate monitoring
+
+**Implementation Complexity:** Low
+**User Value:** High for repeated texts (notifications, alerts)
+
+**Sample Implementation:**
+
+```python
+from functools import lru_cache
+import hashlib
+
+class TTSCache:
+    def __init__(self, max_size: int = 1000):
+        self.cache = {}
+        self.max_size = max_size
+
+    def get_cache_key(self, text: str, voice: str, rate: int) -> str:
+        """Generate cache key from TTS parameters."""
+        content = f"{text}|{voice}|{rate}"
+        return hashlib.sha256(content.encode()).hexdigest()
+
+    def get(self, text: str, voice: str, rate: int):
+        """Retrieve cached audio."""
+        key = self.get_cache_key(text, voice, rate)
+        return self.cache.get(key)
+
+    def put(self, text: str, voice: str, rate: int, audio_data):
+        """Store audio in cache with LRU eviction."""
+        if len(self.cache) >= self.max_size:
+            # Evict oldest entry (simple FIFO, use OrderedDict for true LRU)
+            self.cache.pop(next(iter(self.cache)))
+
+        key = self.get_cache_key(text, voice, rate)
+        self.cache[key] = audio_data
+```
+
+---
+
+#### 6. Multi-Language Support
+- **Automatic language detection:** Detect input language
+- **Language-specific voice selection:** Match voice to detected language
+- **Mixed-language support:** Handle multilingual texts
+
+**Implementation Complexity:** Medium
+**User Value:** High for international users
+
+---
+
+#### 7. Audio Effects
+- **Reverb:** Add spatial audio effects
+- **Echo:** Add echo effects
+- **Speed adjustment:** Time-stretch without pitch change
+- **Normalization:** Automatic volume leveling
+
+**Implementation Complexity:** Medium (requires audio processing library like `pydub` or `librosa`)
+**User Value:** Medium (aesthetic enhancement)
+
+---
+
+#### 8. Queue Priority System
+- **Priority levels:** High, normal, low priority requests
+- **Priority queues:** Separate queues for different priorities
+- **Preemption:** Allow high-priority requests to interrupt low-priority
+
+**Implementation Complexity:** Medium
+**User Value:** Medium for multi-tenant scenarios
+
+---
+
+#### 9. Webhook Notifications
+- **Completion webhooks:** Notify external service when TTS completes
+- **Error webhooks:** Notify on TTS failures
+- **Webhook retry logic:** Handle webhook delivery failures
+
+**Example Request:**
+```json
+{
+  "message": "Hello world",
+  "webhook_url": "https://example.com/tts-complete"
+}
+```
+
+**Implementation Complexity:** Low
+**User Value:** High for integration scenarios
+
+---
+
+#### 10. Authentication & Authorization
+- **API key authentication:** Secure endpoint access
+- **Rate limiting:** Per-user request limits
+- **Usage quotas:** Daily/monthly request quotas
+- **Multi-tenant support:** Isolated queues per user
+
+**Implementation Complexity:** High
+**User Value:** High for shared/production deployments
+
+---
+
+#### 11. Web Interface
+- **Simple web UI:** Browser-based TTS interface
+- **Queue visualization:** Real-time queue status display
+- **Voice model management:** Upload/download voice models via UI
+- **Settings configuration:** Web-based configuration editor
+
+**Implementation Complexity:** Medium
+**User Value:** High for non-technical users
+
+---
+
+#### 12. Docker Deployment
+- **Dockerfile:** Container image for easy deployment
+- **Docker Compose:** Multi-container setup with monitoring
+- **Volume management:** Persistent voice model storage
+- **Health check integration:** Container health monitoring
+
+**Sample Dockerfile:**
+
+```dockerfile
+FROM python:3.11-slim
+
+# Install system dependencies (PortAudio for sounddevice)
+RUN apt-get update && apt-get install -y \
+    libportaudio2 \
+    portaudio19-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application
+COPY app/ ./app/
+
+# Download default voice model
+RUN python -c "from piper import PiperVoice; PiperVoice.download('en_US-lessac-medium')"
+
+EXPOSE 8888
+
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8888"]
+```
+
+**Implementation Complexity:** Low
+**User Value:** High for deployment consistency
+
+---
+
+#### 13. Metrics & Monitoring
+- **Prometheus metrics:** Request count, latency, queue size
+- **Grafana dashboards:** Visual monitoring
+- **Alerting:** Notify on errors, high queue size, etc.
+- **Performance profiling:** Identify bottlenecks
+
+**Sample Metrics:**
+
+```python
+from prometheus_client import Counter, Histogram, Gauge
+
+request_counter = Counter('tts_requests_total', 'Total TTS requests')
+latency_histogram = Histogram('tts_latency_seconds', 'TTS latency')
+queue_size_gauge = Gauge('tts_queue_size', 'Current queue size')
+
+@app.post("/notify")
+async def notify(request: NotifyRequest):
+    request_counter.inc()
+    with latency_histogram.time():
+        # Process request
+        ...
+    queue_size_gauge.set(tts_queue.qsize())
+```
+
+**Implementation Complexity:** Medium
+**User Value:** High for production deployments
+
+---
+
+### Scalability Considerations
+
+**Horizontal Scaling:**
+- Use Redis for shared queue across multiple server instances
+- Implement distributed locking for audio device access
+- Load balance requests across multiple servers
+
+**Vertical Scaling:**
+- Increase queue size for higher throughput
+- Use GPU acceleration for TTS (CUDA support in Piper)
+- Optimize voice model loading (keep models in memory)
+
+**Architecture Evolution:**
+- Separate TTS generation and audio playback into microservices
+- Use message queue (RabbitMQ, Kafka) for request distribution
+- Implement worker pool for parallel TTS generation
+
+---
+
+## Appendix: References
+
+### Technical Documentation
+- [FastAPI Official Documentation](https://fastapi.tiangolo.com/)
+- [Piper TTS GitHub Repository](https://github.com/rhasspy/piper)
+- [PyAudio Documentation](https://people.csail.mit.edu/hubert/pyaudio/docs/)
+- [Uvicorn Documentation](https://www.uvicorn.org/)
+
+### Research & Comparisons
+- [FastAPI vs Flask Performance Comparison - Strapi](https://strapi.io/blog/fastapi-vs-flask-python-framework-comparison)
+- [Flask vs FastAPI - Better Stack](https://betterstack.com/community/guides/scaling-python/flask-vs-fastapi/)
+- [Python TTS Engines Comparison - Smallest AI](https://smallest.ai/blog/python-packages-realistic-text-to-speech)
+- [TTS Converters for Raspberry Pi - Circuit Digest](https://circuitdigest.com/microcontroller-projects/best-text-to-speech-tts-converter-for-raspberry-pi-espeak-festival-google-tts-pico-and-pyttsx3)
+- [Piper TTS Tutorial - RMauro Dev](https://rmauro.dev/how-to-run-piper-tts-on-your-raspberry-pi-offline-voice-zero-internet-needed/)
+- [Python Audio Playback - simpleaudio Docs](https://simpleaudio.readthedocs.io/)
+
+### Tools & Libraries
+- [uv - Fast Python Package Manager](https://github.com/astral-sh/uv)
+- [pytest - Testing Framework](https://docs.pytest.org/)
+- [locust - Load Testing](https://locust.io/)
+
+---
+
+## Document History
+
+| Version | Date | Author | Changes |
+|---------|------|--------|---------|
+| 1.0 | 2025-12-18 | Atlas | Initial PRD creation |
+
+---
+
+**Document Status:** ✅ Complete - Ready for Implementation
+
+**Next Steps:**
+1. Review PRD with stakeholders
+2. Approve technical stack decisions
+3. Begin Phase 1 implementation
+4. Set up project tracking (GitHub Issues, Jira, etc.)
+5. Assign development resources
+
+**Questions or Feedback:** Contact Atlas at [atlas@manticorum.com]
diff --git a/PROJECT_ROADMAP.json b/PROJECT_ROADMAP.json
new file mode 100644
index 0000000..3e397f3
--- /dev/null
+++ b/PROJECT_ROADMAP.json
@@ -0,0 +1,1012 @@
+{
+  "project": {
+    "name": "voice-server",
+    "description": "Local HTTP service for text-to-speech playback",
+    "version": "1.0.0",
+    "created": "2025-12-18",
+    "last_updated": "2025-12-18"
+  },
+  "methodology": {
+    "approach": "hybrid-tdd",
+    "description": "TDD for API contracts, validation, and queue logic. Implementation-first for hardware integrations.",
+    "tdd_components": [
+      "request_validation",
+      "queue_behavior",
+      "error_responses",
+      "health_check_logic"
+    ],
+    "implementation_first_components": [
+      "piper_tts_integration",
+      "sounddevice_playback",
+      "end_to_end_flow"
+    ]
+  },
+  "phases": [
+    {
+      "id": "phase_1",
+      "name": "Core Infrastructure",
+      "description": "Project setup, FastAPI skeleton, and configuration management",
+      "estimated_days": "1-2"
+    },
+    {
+      "id": "phase_2",
+      "name": "TTS Integration",
+      "description": "Piper TTS setup, voice model management, and parameter support",
+      "estimated_days": "1-2"
+    },
+    {
+      "id": "phase_3",
+      "name": "Audio Playback",
+      "description": "sounddevice integration and audio error handling",
+      "estimated_days": "1"
+    },
+    {
+      "id": "phase_4",
+      "name": "Queue Management",
+      "description": "Async queue implementation and request processing pipeline",
+      "estimated_days": "1"
+    },
+    {
+      "id": "phase_5",
+      "name": "Error Handling",
+      "description": "Exception handlers, logging infrastructure, and health monitoring",
+      "estimated_days": "1"
+    },
+    {
+      "id": "phase_6",
+      "name": "Testing",
+      "description": "Unit tests, integration tests, performance tests, and system tests",
+      "estimated_days": "1-2"
+    },
+    {
+      "id": "phase_7",
+      "name": "Documentation & Deployment",
+      "description": "README, systemd service, and production hardening",
+      "estimated_days": "1"
+    }
+  ],
+  "tasks": [
+    {
+      "id": "1.1.1",
+      "phase": "phase_1",
+      "name": "Initialize project directory",
+      "description": "Create project directory structure at /mnt/NV2/Development/voice-server with app/, tests/, models/ subdirectories",
+      "dependencies": [],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Directory already exists from PRD creation"
+    },
+    {
+      "id": "1.1.2",
+      "phase": "phase_1",
+      "name": "Create Python virtual environment",
+      "description": "Initialize virtual environment using uv (uv venv)",
+      "dependencies": ["1.1.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use uv as per user preference"
+    },
+    {
+      "id": "1.1.3",
+      "phase": "phase_1",
+      "name": "Install core dependencies",
+      "description": "Install fastapi, uvicorn[standard], piper-tts, sounddevice, numpy, pydantic, python-dotenv",
+      "dependencies": ["1.1.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Also install pytest, pytest-asyncio, httpx for testing"
+    },
+    {
+      "id": "1.1.4",
+      "phase": "phase_1",
+      "name": "Create pyproject.toml",
+      "description": "Create pyproject.toml with pinned dependency versions and project metadata",
+      "dependencies": ["1.1.3"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use uv's native pyproject.toml support instead of requirements.txt"
+    },
+    {
+      "id": "1.1.5",
+      "phase": "phase_1",
+      "name": "Create environment configuration",
+      "description": "Create .env.example with all configurable environment variables",
+      "dependencies": ["1.1.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Include HOST, PORT, QUEUE_SIZE, LOG_LEVEL, MODEL_DIR, DEFAULT_VOICE"
+    },
+    {
+      "id": "1.1.6",
+      "phase": "phase_1",
+      "name": "Initialize git repository",
+      "description": "Initialize git repo with .gitignore for Python, IDEs, .env, voice models, __pycache__",
+      "dependencies": ["1.1.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Add models/*.onnx and models/*.json to .gitignore (large files)"
+    },
+    {
+      "id": "1.2.1",
+      "phase": "phase_1",
+      "name": "Write tests for Pydantic request/response models",
+      "description": "TDD: Write tests for NotifyRequest, NotifyResponse, HealthResponse, ErrorResponse models with validation rules",
+      "dependencies": ["1.1.4"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "tdd",
+      "notes": "Test message length limits (1-10000), rate range (50-400), voice pattern validation"
+    },
+    {
+      "id": "1.2.2",
+      "phase": "phase_1",
+      "name": "Implement Pydantic models",
+      "description": "Create app/models.py with NotifyRequest, NotifyResponse, HealthResponse, ErrorResponse models",
+      "dependencies": ["1.2.1"],
+      "completed": false,
+      "tested": true,
+      "test_approach": "tdd",
+      "notes": "Implementation to make tests from 1.2.1 pass"
+    },
+    {
+      "id": "1.2.3",
+      "phase": "phase_1",
+      "name": "Create FastAPI application skeleton",
+      "description": "Create app/main.py with FastAPI app, lifespan handler, and CORS middleware",
+      "dependencies": ["1.2.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use lifespan context manager (not deprecated on_event)"
+    },
+    {
+      "id": "1.2.4",
+      "phase": "phase_1",
+      "name": "Write tests for /notify endpoint contract",
+      "description": "TDD: Write tests for POST /notify - valid requests return 202, invalid return 422, missing fields return 400",
+      "dependencies": ["1.2.3"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "tdd",
+      "notes": "Use httpx.AsyncClient for async endpoint testing"
+    },
+    {
+      "id": "1.2.5",
+      "phase": "phase_1",
+      "name": "Implement /notify endpoint skeleton",
+      "description": "Create POST /notify endpoint that validates request and returns 202 (queue integration later)",
+      "dependencies": ["1.2.4"],
+      "completed": false,
+      "tested": true,
+      "test_approach": "tdd",
+      "notes": "Initially just validate and return success; queue integration in phase 4"
+    },
+    {
+      "id": "1.2.6",
+      "phase": "phase_1",
+      "name": "Write tests for /health endpoint",
+      "description": "TDD: Write tests for GET /health - returns status, uptime, queue info structure",
+      "dependencies": ["1.2.3"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "tdd",
+      "notes": "Test both healthy and unhealthy response structures"
+    },
+    {
+      "id": "1.2.7",
+      "phase": "phase_1",
+      "name": "Implement /health endpoint skeleton",
+      "description": "Create GET /health endpoint returning basic health status",
+      "dependencies": ["1.2.6"],
+      "completed": false,
+      "tested": true,
+      "test_approach": "tdd",
+      "notes": "Full health checks (TTS, audio) added in phase 5"
+    },
+    {
+      "id": "1.2.8",
+      "phase": "phase_1",
+      "name": "Implement /voices endpoint skeleton",
+      "description": "Create GET /voices endpoint returning list of available voice models",
+      "dependencies": ["1.2.3"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Initially return empty list; populate after TTS integration"
+    },
+    {
+      "id": "1.2.9",
+      "phase": "phase_1",
+      "name": "Configure JSON logging middleware",
+      "description": "Add structured JSON logging for all requests with timestamp, request_id, path, status_code",
+      "dependencies": ["1.2.3"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use Python's logging with JSON formatter"
+    },
+    {
+      "id": "1.2.10",
+      "phase": "phase_1",
+      "name": "Verify server startup",
+      "description": "Test server starts successfully with uvicorn app.main:app --reload",
+      "dependencies": ["1.2.5", "1.2.7", "1.2.8"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Verify /docs (Swagger UI) is accessible"
+    },
+    {
+      "id": "1.3.1",
+      "phase": "phase_1",
+      "name": "Write tests for configuration loading",
+      "description": "TDD: Write tests for config loading from env vars with defaults, validation of values",
+      "dependencies": ["1.1.5"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "tdd",
+      "notes": "Test default values, env var override, invalid value handling"
+    },
+    {
+      "id": "1.3.2",
+      "phase": "phase_1",
+      "name": "Implement configuration module",
+      "description": "Create app/config.py with Settings class using pydantic-settings for env var loading",
+      "dependencies": ["1.3.1"],
+      "completed": false,
+      "tested": true,
+      "test_approach": "tdd",
+      "notes": "Include host, port, queue_size, log_level, model_dir, default_voice settings"
+    },
+    {
+      "id": "1.3.3",
+      "phase": "phase_1",
+      "name": "Add CLI argument parsing",
+      "description": "Add CLI argument support for --host, --port, --log-level to override env vars",
+      "dependencies": ["1.3.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use argparse or typer; CLI args take precedence over env vars"
+    },
+    {
+      "id": "2.1.1",
+      "phase": "phase_2",
+      "name": "Create TTS engine module structure",
+      "description": "Create app/tts_engine.py with TTSEngine abstract base and PiperTTSEngine class skeleton",
+      "dependencies": ["1.2.10"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Abstract base allows for future TTS engine swapping"
+    },
+    {
+      "id": "2.1.2",
+      "phase": "phase_2",
+      "name": "Download default voice model",
+      "description": "Download en_US-lessac-medium.onnx and .json to models/ directory",
+      "dependencies": ["1.1.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Can use piper --download-dir or manual download from GitHub releases"
+    },
+    {
+      "id": "2.1.3",
+      "phase": "phase_2",
+      "name": "Implement Piper TTS voice loading",
+      "description": "Implement PiperTTSEngine.load_voice() to load .onnx model with caching",
+      "dependencies": ["2.1.1", "2.1.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "implementation_first",
+      "notes": "Cache loaded models in memory to avoid reload on each request"
+    },
+    {
+      "id": "2.1.4",
+      "phase": "phase_2",
+      "name": "Implement text-to-audio synthesis",
+      "description": "Implement PiperTTSEngine.synthesize() returning NumPy array of audio samples",
+      "dependencies": ["2.1.3"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "implementation_first",
+      "notes": "Return float32 NumPy array compatible with sounddevice"
+    },
+    {
+      "id": "2.1.5",
+      "phase": "phase_2",
+      "name": "Write integration tests for TTS synthesis",
+      "description": "Write tests verifying TTS generates valid audio array for sample text",
+      "dependencies": ["2.1.4"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "implementation_first",
+      "notes": "Test output is non-empty NumPy array with expected sample rate"
+    },
+    {
+      "id": "2.1.6",
+      "phase": "phase_2",
+      "name": "Measure TTS latency benchmarks",
+      "description": "Benchmark TTS generation time for various text lengths (10, 100, 500, 1000, 5000 chars)",
+      "dependencies": ["2.1.5"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Document results for performance baseline"
+    },
+    {
+      "id": "2.2.1",
+      "phase": "phase_2",
+      "name": "Create models directory structure",
+      "description": "Create models/ directory for voice model storage with README explaining model installation",
+      "dependencies": ["1.1.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Include instructions for downloading additional voices"
+    },
+    {
+      "id": "2.2.2",
+      "phase": "phase_2",
+      "name": "Implement voice model discovery",
+      "description": "Implement function to scan models/ directory and return available voice models",
+      "dependencies": ["2.2.1", "2.1.3"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Parse .json config files for model metadata (language, quality)"
+    },
+    {
+      "id": "2.2.3",
+      "phase": "phase_2",
+      "name": "Implement /voices endpoint fully",
+      "description": "Update /voices endpoint to return discovered models with metadata",
+      "dependencies": ["2.2.2", "1.2.8"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Include name, language, quality, size_mb, installed status"
+    },
+    {
+      "id": "2.2.4",
+      "phase": "phase_2",
+      "name": "Add voice validation to /notify",
+      "description": "Validate requested voice exists before queuing; return 422 if not found",
+      "dependencies": ["2.2.2", "1.2.5"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Provide helpful error message listing available voices"
+    },
+    {
+      "id": "2.3.1",
+      "phase": "phase_2",
+      "name": "Implement speech rate adjustment",
+      "description": "Add rate parameter support to TTS synthesis (50-400 WPM range)",
+      "dependencies": ["2.1.4"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Check if Piper supports rate adjustment natively or needs post-processing"
+    },
+    {
+      "id": "2.3.2",
+      "phase": "phase_2",
+      "name": "Test rate adjustment across range",
+      "description": "Test TTS output at rate=50, 100, 170 (default), 300, 400 WPM",
+      "dependencies": ["2.3.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Verify audio sounds correct at extremes"
+    },
+    {
+      "id": "2.3.3",
+      "phase": "phase_2",
+      "name": "Implement voice_enabled flag",
+      "description": "Add voice_enabled parameter to skip TTS for debugging/testing",
+      "dependencies": ["2.1.4"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "When false, skip TTS and audio playback but still process request"
+    },
+    {
+      "id": "3.1.1",
+      "phase": "phase_3",
+      "name": "Create audio player module",
+      "description": "Create app/audio_player.py with AudioPlayer class skeleton",
+      "dependencies": ["1.2.10"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use sounddevice for non-blocking playback"
+    },
+    {
+      "id": "3.1.2",
+      "phase": "phase_3",
+      "name": "Implement audio device verification",
+      "description": "Implement verify_audio_devices() to check for available output devices at startup",
+      "dependencies": ["3.1.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "implementation_first",
+      "notes": "Use sd.query_devices() to enumerate devices"
+    },
+    {
+      "id": "3.1.3",
+      "phase": "phase_3",
+      "name": "Implement non-blocking playback",
+      "description": "Implement AudioPlayer.play() using sd.play() for non-blocking audio output",
+      "dependencies": ["3.1.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "implementation_first",
+      "notes": "sd.play() returns immediately; audio plays in background thread"
+    },
+    {
+      "id": "3.1.4",
+      "phase": "phase_3",
+      "name": "Implement async wait method",
+      "description": "Implement AudioPlayer.wait_async() for async-friendly waiting on playback completion",
+      "dependencies": ["3.1.3"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "implementation_first",
+      "notes": "Poll sd.get_stream().active with asyncio.sleep()"
+    },
+    {
+      "id": "3.1.5",
+      "phase": "phase_3",
+      "name": "Test audio playback with sample data",
+      "description": "Test AudioPlayer with synthesized sine wave to verify audio output works",
+      "dependencies": ["3.1.4"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "implementation_first",
+      "notes": "Use numpy to generate test tone; verify sound is heard"
+    },
+    {
+      "id": "3.1.6",
+      "phase": "phase_3",
+      "name": "Verify non-blocking behavior",
+      "description": "Test that play() returns immediately and server can handle requests during playback",
+      "dependencies": ["3.1.5"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Send request, verify 202 returned before audio finishes"
+    },
+    {
+      "id": "3.2.1",
+      "phase": "phase_3",
+      "name": "Implement retry logic for device failures",
+      "description": "Implement RobustAudioPlayer with automatic retry on sd.PortAudioError",
+      "dependencies": ["3.1.4"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Retry up to 3 times with 0.5s delay between attempts"
+    },
+    {
+      "id": "3.2.2",
+      "phase": "phase_3",
+      "name": "Handle device disconnection",
+      "description": "Gracefully handle audio device disconnection during playback",
+      "dependencies": ["3.2.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Log error, skip playback, continue processing queue"
+    },
+    {
+      "id": "3.2.3",
+      "phase": "phase_3",
+      "name": "Implement audio diagnostics",
+      "description": "Implement get_audio_diagnostics() for health check reporting",
+      "dependencies": ["3.1.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Return device count, default output name, sample rate"
+    },
+    {
+      "id": "3.2.4",
+      "phase": "phase_3",
+      "name": "Add audio error logging",
+      "description": "Add detailed logging for all audio errors with device context",
+      "dependencies": ["3.2.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Include device name, error type, retry count in logs"
+    },
+    {
+      "id": "4.1.1",
+      "phase": "phase_4",
+      "name": "Write tests for queue behavior",
+      "description": "TDD: Write tests for queue enqueue, dequeue, overflow, ordering (FIFO)",
+      "dependencies": ["1.2.10"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "tdd",
+      "notes": "Test max size enforcement, QueueFullError raising"
+    },
+    {
+      "id": "4.1.2",
+      "phase": "phase_4",
+      "name": "Create queue manager module",
+      "description": "Create app/queue_manager.py with TTSQueue class using asyncio.Queue",
+      "dependencies": ["4.1.1"],
+      "completed": false,
+      "tested": true,
+      "test_approach": "tdd",
+      "notes": "Implement to pass tests from 4.1.1"
+    },
+    {
+      "id": "4.1.3",
+      "phase": "phase_4",
+      "name": "Implement queue enqueue with timeout",
+      "description": "Implement TTSQueue.enqueue() with 1s timeout, raising QueueFullError on timeout",
+      "dependencies": ["4.1.2"],
+      "completed": false,
+      "tested": true,
+      "test_approach": "tdd",
+      "notes": "Return queue position on success"
+    },
+    {
+      "id": "4.1.4",
+      "phase": "phase_4",
+      "name": "Implement queue metrics",
+      "description": "Add stats tracking: processed count, error count, current size",
+      "dependencies": ["4.1.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Expose via TTSQueue.stats property"
+    },
+    {
+      "id": "4.1.5",
+      "phase": "phase_4",
+      "name": "Implement graceful queue shutdown",
+      "description": "Implement TTSQueue.shutdown() to wait for current item, reject new items",
+      "dependencies": ["4.1.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Called during application shutdown via lifespan"
+    },
+    {
+      "id": "4.2.1",
+      "phase": "phase_4",
+      "name": "Implement background queue processor",
+      "description": "Create async background task to process queue items sequentially",
+      "dependencies": ["4.1.2", "2.1.4", "3.1.4"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Generate TTS, play audio, wait for completion, then next item"
+    },
+    {
+      "id": "4.2.2",
+      "phase": "phase_4",
+      "name": "Integrate queue with /notify endpoint",
+      "description": "Update /notify to enqueue validated requests to TTSQueue",
+      "dependencies": ["4.2.1", "1.2.5"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Return queue_position in response"
+    },
+    {
+      "id": "4.2.3",
+      "phase": "phase_4",
+      "name": "Add request timeout handling",
+      "description": "Add 60s timeout for individual request processing in queue worker",
+      "dependencies": ["4.2.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Cancel TTS generation if exceeds timeout; log and continue"
+    },
+    {
+      "id": "4.2.4",
+      "phase": "phase_4",
+      "name": "Implement CPU-bound TTS in thread pool",
+      "description": "Run TTS synthesis in thread pool executor to avoid blocking event loop",
+      "dependencies": ["4.2.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use loop.run_in_executor() for TTS generation"
+    },
+    {
+      "id": "4.2.5",
+      "phase": "phase_4",
+      "name": "Test queue with concurrent requests",
+      "description": "Test sending 20+ concurrent requests and verify sequential playback",
+      "dependencies": ["4.2.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "All requests should be processed in order received"
+    },
+    {
+      "id": "4.3.1",
+      "phase": "phase_4",
+      "name": "Add queue status to /health",
+      "description": "Update /health to include queue size, capacity, utilization percentage",
+      "dependencies": ["4.1.4", "1.2.7"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Include processed and error counts"
+    },
+    {
+      "id": "4.3.2",
+      "phase": "phase_4",
+      "name": "Add queue event logging",
+      "description": "Log queue events: enqueue, process start, process complete, errors",
+      "dependencies": ["4.2.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Include request_id for correlation"
+    },
+    {
+      "id": "4.3.3",
+      "phase": "phase_4",
+      "name": "Test queue overflow scenarios",
+      "description": "Test behavior when queue reaches max size; verify 503 returned",
+      "dependencies": ["4.2.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Fill queue with slow requests, send additional request"
+    },
+    {
+      "id": "5.1.1",
+      "phase": "phase_5",
+      "name": "Write tests for error responses",
+      "description": "TDD: Write tests for all error response formats (400, 422, 500, 503)",
+      "dependencies": ["1.2.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": "tdd",
+      "notes": "Verify error response structure matches API spec"
+    },
+    {
+      "id": "5.1.2",
+      "phase": "phase_5",
+      "name": "Create custom exception classes",
+      "description": "Create app/exceptions.py with QueueFullError, TTSEngineError, AudioPlaybackError",
+      "dependencies": ["5.1.1"],
+      "completed": false,
+      "tested": true,
+      "test_approach": "tdd",
+      "notes": "Include error codes and details for each exception type"
+    },
+    {
+      "id": "5.1.3",
+      "phase": "phase_5",
+      "name": "Implement exception handlers",
+      "description": "Add FastAPI exception handlers for each custom exception type",
+      "dependencies": ["5.1.2"],
+      "completed": false,
+      "tested": true,
+      "test_approach": "tdd",
+      "notes": "Map exceptions to appropriate HTTP status codes"
+    },
+    {
+      "id": "5.1.4",
+      "phase": "phase_5",
+      "name": "Implement generic exception handler",
+      "description": "Add catch-all exception handler for unexpected errors (500)",
+      "dependencies": ["5.1.3"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Log full traceback but return sanitized error to client"
+    },
+    {
+      "id": "5.2.1",
+      "phase": "phase_5",
+      "name": "Configure structured JSON logging",
+      "description": "Set up logging with JSON formatter including timestamp, level, message, context",
+      "dependencies": ["1.2.9"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use python-json-logger or custom formatter"
+    },
+    {
+      "id": "5.2.2",
+      "phase": "phase_5",
+      "name": "Implement rotating file handler",
+      "description": "Configure RotatingFileHandler with 10MB max size, 5 backups",
+      "dependencies": ["5.2.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Write to voice-server.log in project directory"
+    },
+    {
+      "id": "5.2.3",
+      "phase": "phase_5",
+      "name": "Add request ID tracking",
+      "description": "Generate unique request_id for each request; include in all related logs",
+      "dependencies": ["5.2.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use UUID4; add to response headers for client correlation"
+    },
+    {
+      "id": "5.2.4",
+      "phase": "phase_5",
+      "name": "Test log rotation",
+      "description": "Verify log files rotate correctly when size limit reached",
+      "dependencies": ["5.2.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Generate enough log entries to trigger rotation"
+    },
+    {
+      "id": "5.3.1",
+      "phase": "phase_5",
+      "name": "Implement comprehensive /health endpoint",
+      "description": "Update /health with TTS engine, audio system, queue, and system resource checks",
+      "dependencies": ["4.3.1", "3.2.3"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Return 200 if all healthy, 503 if any component unhealthy"
+    },
+    {
+      "id": "5.3.2",
+      "phase": "phase_5",
+      "name": "Add TTS engine health check",
+      "description": "Test TTS engine can synthesize short test phrase",
+      "dependencies": ["2.1.4"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use minimal text to minimize overhead"
+    },
+    {
+      "id": "5.3.3",
+      "phase": "phase_5",
+      "name": "Add system resource monitoring",
+      "description": "Include CPU and memory usage in health check response",
+      "dependencies": ["5.3.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use psutil for system metrics"
+    },
+    {
+      "id": "5.3.4",
+      "phase": "phase_5",
+      "name": "Test health endpoint under load",
+      "description": "Verify /health responds correctly while queue is processing",
+      "dependencies": ["5.3.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Health check should not block or be blocked by TTS processing"
+    },
+    {
+      "id": "6.1.1",
+      "phase": "phase_6",
+      "name": "Set up pytest infrastructure",
+      "description": "Create tests/ directory with conftest.py, pytest.ini, test fixtures",
+      "dependencies": ["1.1.4"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Include async fixtures for FastAPI testing"
+    },
+    {
+      "id": "6.1.2",
+      "phase": "phase_6",
+      "name": "Write remaining unit tests",
+      "description": "Complete any missing unit tests to achieve 80%+ coverage",
+      "dependencies": ["6.1.1", "5.1.3"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use pytest-cov for coverage reporting"
+    },
+    {
+      "id": "6.2.1",
+      "phase": "phase_6",
+      "name": "Write end-to-end integration tests",
+      "description": "Test complete request flow from HTTP request to audio playback",
+      "dependencies": ["4.2.5"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "May need to mock audio output for CI environments"
+    },
+    {
+      "id": "6.2.2",
+      "phase": "phase_6",
+      "name": "Write error scenario tests",
+      "description": "Test TTS failure, audio failure, queue overflow scenarios end-to-end",
+      "dependencies": ["6.2.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Use dependency injection to simulate failures"
+    },
+    {
+      "id": "6.3.1",
+      "phase": "phase_6",
+      "name": "Create load testing script",
+      "description": "Create load test using locust or wrk for performance benchmarking",
+      "dependencies": ["4.2.5"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Target: 50+ req/s, <50ms API response time"
+    },
+    {
+      "id": "6.3.2",
+      "phase": "phase_6",
+      "name": "Measure performance benchmarks",
+      "description": "Record p50, p95, p99 latency; TTS generation time; memory usage",
+      "dependencies": ["6.3.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Document baseline performance for future comparison"
+    },
+    {
+      "id": "6.4.1",
+      "phase": "phase_6",
+      "name": "Test on target environment",
+      "description": "Run full test suite on Nobara/Fedora 42 with real audio hardware",
+      "dependencies": ["6.2.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Test with both PulseAudio and ALSA if possible"
+    },
+    {
+      "id": "6.4.2",
+      "phase": "phase_6",
+      "name": "Test long-running stability",
+      "description": "Run server for 24+ hours with periodic requests; check for memory leaks",
+      "dependencies": ["6.4.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Monitor memory usage over time"
+    },
+    {
+      "id": "7.1.1",
+      "phase": "phase_7",
+      "name": "Create comprehensive README",
+      "description": "Write README.md with overview, installation, configuration, usage, API docs, troubleshooting",
+      "dependencies": ["6.4.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Include example curl commands and Python client code"
+    },
+    {
+      "id": "7.1.2",
+      "phase": "phase_7",
+      "name": "Document voice model installation",
+      "description": "Create guide for downloading and installing additional Piper voice models",
+      "dependencies": ["2.2.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Link to Piper voice model repository"
+    },
+    {
+      "id": "7.1.3",
+      "phase": "phase_7",
+      "name": "Create example client scripts",
+      "description": "Create examples/ directory with curl, Python, and JavaScript client examples",
+      "dependencies": ["7.1.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Include async Python example using httpx"
+    },
+    {
+      "id": "7.2.1",
+      "phase": "phase_7",
+      "name": "Create systemd service file",
+      "description": "Create voice-server.service for systemd deployment",
+      "dependencies": ["6.4.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Include restart on failure, proper user/group, working directory"
+    },
+    {
+      "id": "7.2.2",
+      "phase": "phase_7",
+      "name": "Test systemd service",
+      "description": "Test service installation, start, stop, restart, and auto-restart on failure",
+      "dependencies": ["7.2.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Document installation steps in README"
+    },
+    {
+      "id": "7.2.3",
+      "phase": "phase_7",
+      "name": "Create deployment script",
+      "description": "Create deploy.sh script for automated deployment",
+      "dependencies": ["7.2.1"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Include venv setup, dependency install, service restart"
+    },
+    {
+      "id": "7.3.1",
+      "phase": "phase_7",
+      "name": "Configure production logging",
+      "description": "Set up production log levels (INFO default, DEBUG disabled)",
+      "dependencies": ["5.2.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Configurable via LOG_LEVEL environment variable"
+    },
+    {
+      "id": "7.3.2",
+      "phase": "phase_7",
+      "name": "Implement graceful shutdown",
+      "description": "Handle SIGTERM/SIGINT for graceful shutdown with queue draining",
+      "dependencies": ["4.1.5"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Wait for current playback to complete before exit"
+    },
+    {
+      "id": "7.3.3",
+      "phase": "phase_7",
+      "name": "Security audit",
+      "description": "Review input sanitization, resource limits, error message exposure",
+      "dependencies": ["6.2.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Verify no internal details leaked in error responses"
+    },
+    {
+      "id": "7.3.4",
+      "phase": "phase_7",
+      "name": "Performance tuning",
+      "description": "Tune queue size, worker count, timeouts based on benchmark results",
+      "dependencies": ["6.3.2"],
+      "completed": false,
+      "tested": false,
+      "test_approach": null,
+      "notes": "Document recommended settings for different use cases"
+    }
+  ],
+  "summary": {
+    "total_tasks": 78,
+    "tdd_tasks": 12,
+    "implementation_first_tasks": 8,
+    "phases": 7,
+    "estimated_days": "4-7"
+  }
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7375bca
--- /dev/null
+++ b/README.md
@@ -0,0 +1,41 @@
+# Voice Server
+
+Local HTTP service for text-to-speech playback using Piper TTS.
+
+## Features
+
+- HTTP POST endpoint for text-to-speech requests
+- High-quality neural TTS using Piper
+- Non-blocking audio playback with sounddevice
+- Async request queue for concurrent handling
+- Automatic OpenAPI documentation
+
+## Quick Start
+
+```bash
+# Install dependencies
+uv pip install -e ".[dev]"
+
+# Run server
+uvicorn app.main:app --reload
+
+# Test endpoint
+curl -X POST http://localhost:8000/notify \
+  -H "Content-Type: application/json" \
+  -d '{"message": "Hello, world!"}'
+```
+
+## API Endpoints
+
+- `POST /notify` - Submit text for TTS playback
+- `GET /health` - Health check endpoint
+- `GET /voices` - List available voice models
+- `GET /docs` - OpenAPI documentation
+
+## Configuration
+
+See `.env.example` for configuration options.
+
+## License
+
+MIT
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/app/audio_player.py b/app/audio_player.py
new file mode 100644
index 0000000..0551377
--- /dev/null
+++ b/app/audio_player.py
@@ -0,0 +1,192 @@
+"""
+Audio playback module for voice-server.
+
+Provides non-blocking audio playback using sounddevice.
+"""
+
+import asyncio
+import logging
+import time
+from typing import Protocol
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+class AudioPlayer(Protocol):
+    """Protocol defining the audio player interface."""
+
+    def play(self, audio_data: np.ndarray, sample_rate: int) -> None:
+        """Play audio data (non-blocking)."""
+        ...
+
+    def is_playing(self) -> bool:
+        """Check if audio is currently playing."""
+        ...
+
+    def stop(self) -> None:
+        """Stop current playback."""
+        ...
+
+    async def wait_async(self) -> None:
+        """Wait asynchronously for playback to complete."""
+        ...
+
+
+class SounddevicePlayer:
+    """
+    Audio player implementation using sounddevice.
+
+    Provides non-blocking playback with async wait support.
+    """
+
+    def __init__(self, default_sample_rate: int = 22050, retry_attempts: int = 3):
+        """
+        Initialize the audio player.
+
+        Args:
+            default_sample_rate: Default sample rate if not specified in play()
+            retry_attempts: Number of retry attempts on playback failure
+        """
+        self.default_sample_rate = default_sample_rate
+        self.retry_attempts = retry_attempts
+        self._initialized = False
+
+        # Lazy import sounddevice to defer PortAudio initialization
+        self._sd = None
+
+    def _ensure_initialized(self):
+        """Ensure sounddevice is imported and initialized."""
+        if self._sd is None:
+            try:
+                import sounddevice as sd
+
+                self._sd = sd
+                self._initialized = True
+                logger.info("SounddevicePlayer initialized successfully")
+            except OSError as e:
+                logger.error(f"Failed to initialize sounddevice: {e}")
+                raise RuntimeError(f"Audio system unavailable: {e}") from e
+
+    def play(self, audio_data: np.ndarray, sample_rate: int | None = None) -> None:
+        """
+        Play audio data (non-blocking).
+
+        The audio plays in a background thread. Use is_playing() to check status
+        or wait_async() to wait for completion.
+
+        Args:
+            audio_data: NumPy array of audio samples (float32 or int16)
+            sample_rate: Sample rate in Hz (uses default if not specified)
+        """
+        self._ensure_initialized()
+
+        if len(audio_data) == 0:
+            logger.debug("Skipping playback of empty audio")
+            return
+
+        rate = sample_rate or self.default_sample_rate
+
+        # Stop any currently playing audio
+        self.stop()
+
+        for attempt in range(self.retry_attempts):
+            try:
+                # Play audio - returns immediately, audio plays in background
+                self._sd.play(audio_data, rate)
+                logger.debug(f"Started playback: {len(audio_data)} samples at {rate}Hz")
+                return
+            except self._sd.PortAudioError as e:
+                logger.warning(f"Playback attempt {attempt + 1} failed: {e}")
+                if attempt < self.retry_attempts - 1:
+                    time.sleep(0.5)
+                else:
+                    raise RuntimeError(f"Audio playback failed after {self.retry_attempts} attempts: {e}")
+
+    def is_playing(self) -> bool:
+        """Check if audio is currently playing."""
+        if self._sd is None:
+            return False
+
+        try:
+            stream = self._sd.get_stream()
+            return stream is not None and stream.active
+        except Exception:
+            return False
+
+    def stop(self) -> None:
+        """Stop current playback."""
+        if self._sd is not None:
+            try:
+                self._sd.stop()
+            except Exception as e:
+                logger.warning(f"Error stopping playback: {e}")
+
+    def wait(self) -> None:
+        """Block until current playback completes."""
+        if self._sd is not None:
+            try:
+                self._sd.wait()
+            except Exception as e:
+                logger.warning(f"Error waiting for playback: {e}")
+
+    async def wait_async(self, poll_interval: float = 0.05) -> None:
+        """
+        Wait asynchronously for playback to complete.
+
+        Uses polling to avoid blocking the event loop.
+
+        Args:
+            poll_interval: How often to check playback status (seconds)
+        """
+        while self.is_playing():
+            await asyncio.sleep(poll_interval)
+
+    def get_diagnostics(self) -> dict:
+        """
+        Get audio system diagnostics for health checks.
+
+        Returns:
+            Dictionary with audio device information and status
+        """
+        try:
+            self._ensure_initialized()
+
+            devices = self._sd.query_devices()
+            output_devices = [d for d in devices if d["max_output_channels"] > 0]
+
+            if not output_devices:
+                return {
+                    "status": "unavailable",
+                    "error": "No audio output devices found",
+                }
+
+            default_output = self._sd.query_devices(kind="output")
+
+            return {
+                "status": "available",
+                "device_count": len(output_devices),
+                "default_output": default_output["name"],
+                "default_sample_rate": int(default_output["default_samplerate"]),
+            }
+
+        except Exception as e:
+            return {
+                "status": "unavailable",
+                "error": str(e),
+            }
+
+    def health_check(self) -> dict:
+        """
+        Perform a health check on the audio system.
+
+        Returns:
+            Dictionary with status and any error messages
+        """
+        diagnostics = self.get_diagnostics()
+
+        if diagnostics["status"] == "available":
+            return {"status": "healthy", "details": diagnostics}
+        else:
+            return {"status": "unhealthy", "error": diagnostics.get("error", "Unknown error")}
diff --git a/app/config.py b/app/config.py
new file mode 100644
index 0000000..4b488ea
--- /dev/null
+++ b/app/config.py
@@ -0,0 +1,98 @@
+"""
+Configuration management for voice-server.
+
+Loads configuration from environment variables with sensible defaults.
+Uses pydantic-settings for type-safe configuration loading and validation.
+"""
+
+from functools import lru_cache
+from typing import Annotated, Literal
+
+from pydantic import Field, field_validator
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+# Valid log levels
+LogLevel = Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
+
+
+class Settings(BaseSettings):
+    """
+    Application settings loaded from environment variables.
+
+    All settings have sensible defaults and can be overridden via environment
+    variables or a .env file.
+    """
+
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        case_sensitive=False,
+        extra="ignore",
+    )
+
+    # Server settings
+    host: Annotated[str, Field(default="0.0.0.0", description="Host to bind to")]
+    port: Annotated[
+        int,
+        Field(default=8888, ge=1, le=65535, description="Port to listen on"),
+    ]
+
+    # TTS settings
+    model_dir: Annotated[
+        str,
+        Field(default="./models", description="Directory containing voice models"),
+    ]
+    default_voice: Annotated[
+        str,
+        Field(default="en_US-lessac-medium", description="Default voice model"),
+    ]
+    default_rate: Annotated[
+        int,
+        Field(default=170, ge=50, le=400, description="Default speech rate (WPM)"),
+    ]
+
+    # Queue settings
+    queue_max_size: Annotated[
+        int,
+        Field(default=50, gt=0, description="Maximum TTS queue size"),
+    ]
+    request_timeout_seconds: Annotated[
+        int,
+        Field(default=60, gt=0, description="Request processing timeout"),
+    ]
+
+    # Logging
+    log_level: Annotated[
+        LogLevel,
+        Field(default="INFO", description="Logging level"),
+    ]
+    log_file: Annotated[
+        str | None,
+        Field(default=None, description="Log file path (None for stdout only)"),
+    ]
+
+    # Debug
+    voice_enabled: Annotated[
+        bool,
+        Field(default=True, description="Enable/disable TTS playback"),
+    ]
+
+    @field_validator("log_level", mode="before")
+    @classmethod
+    def uppercase_log_level(cls, v: str) -> str:
+        """Ensure log level is uppercase."""
+        if isinstance(v, str):
+            return v.upper()
+        return v
+
+
+@lru_cache
+def get_settings() -> Settings:
+    """
+    Get cached application settings.
+
+    Returns the same Settings instance on subsequent calls for efficiency.
+    The cache can be cleared by calling get_settings.cache_clear().
+    """
+    return Settings()
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 0000000..4ebc8b5
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,140 @@
+"""
+Voice Server - Local HTTP service for text-to-speech playback.
+
+This module provides the FastAPI application with endpoints for:
+- POST /notify: Submit text for TTS playback
+- GET /health: Health check endpoint
+- GET /voices: List available voice models
+"""
+
+import logging
+import time
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from app.audio_player import SounddevicePlayer
+from app.config import get_settings
+from app.queue_manager import TTSQueueManager
+from app.tts_engine import PiperTTSEngine
+
+logger = logging.getLogger(__name__)
+
+# Track server start time for uptime calculation
+_start_time: float = 0.0
+
+# Global instances (initialized in lifespan)
+tts_engine: PiperTTSEngine | None = None
+audio_player: SounddevicePlayer | None = None
+queue_manager: TTSQueueManager | None = None
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Application lifespan handler.
+
+    Handles startup and shutdown events:
+    - Startup: Initialize TTS engine, audio player, queue processor
+    - Shutdown: Stop audio playback, drain queue
+    """
+    global _start_time, tts_engine, audio_player, queue_manager
+
+    settings = get_settings()
+    _start_time = time.time()
+
+    # Initialize TTS engine
+    logger.info("Initializing TTS engine...")
+    tts_engine = PiperTTSEngine(
+        model_dir=settings.model_dir,
+        default_voice=settings.default_voice,
+    )
+
+    # Initialize audio player
+    logger.info("Initializing audio player...")
+    audio_player = SounddevicePlayer(
+        default_sample_rate=tts_engine.get_sample_rate(),
+    )
+
+    # Initialize and start queue manager
+    logger.info("Starting queue manager...")
+    queue_manager = TTSQueueManager(
+        tts_engine=tts_engine,
+        audio_player=audio_player,
+        max_size=settings.queue_max_size,
+        request_timeout=settings.request_timeout_seconds,
+    )
+    await queue_manager.start()
+
+    logger.info("Voice server started successfully")
+
+    yield
+
+    # Shutdown cleanup
+    logger.info("Shutting down voice server...")
+    if queue_manager:
+        await queue_manager.stop()
+    if audio_player:
+        audio_player.stop()
+    logger.info("Voice server stopped")
+
+
+def create_app() -> FastAPI:
+    """
+    Create and configure the FastAPI application.
+
+    Returns a configured FastAPI instance with all routes and middleware.
+    """
+    settings = get_settings()
+
+    app = FastAPI(
+        title="Voice Server",
+        description="Local HTTP service for text-to-speech playback using Piper TTS",
+        version="1.0.0",
+        lifespan=lifespan,
+    )
+
+    # Configure CORS
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+
+    # Register routes
+    from app.routes import router
+
+    app.include_router(router)
+
+    return app
+
+
+def get_uptime_seconds() -> int:
+    """Get server uptime in seconds."""
+    if _start_time == 0.0:
+        return 0
+    return int(time.time() - _start_time)
+
+
+# Create the application instance
+app = create_app()
+
+
+def run():
+    """Run the server using uvicorn (for CLI entry point)."""
+    import uvicorn
+
+    settings = get_settings()
+    uvicorn.run(
+        "app.main:app",
+        host=settings.host,
+        port=settings.port,
+        reload=True,
+    )
+
+
+if __name__ == "__main__":
+    run()
diff --git a/app/models.py b/app/models.py
new file mode 100644
index 0000000..8832b58
--- /dev/null
+++ b/app/models.py
@@ -0,0 +1,162 @@
+"""
+Pydantic models for voice-server request/response validation.
+
+This module defines the API contract for all endpoints:
+- NotifyRequest/NotifyResponse: POST /notify
+- HealthResponse: GET /health
+- VoicesResponse: GET /voices
+- ErrorResponse: Error responses
+"""
+
+from datetime import datetime, timezone
+from typing import Annotated
+
+from pydantic import BaseModel, Field, field_validator
+
+
+class NotifyRequest(BaseModel):
+    """
+    Request model for POST /notify endpoint.
+
+    Validates incoming TTS requests with message content and optional parameters.
+    """
+
+    message: Annotated[
+        str,
+        Field(
+            min_length=1,
+            max_length=10000,
+            description="Text to convert to speech (1-10000 characters)",
+        ),
+    ]
+    voice: Annotated[
+        str,
+        Field(
+            default="en_US-lessac-medium",
+            pattern=r"^[\w-]+$",
+            description="Piper voice model name",
+        ),
+    ]
+    rate: Annotated[
+        int,
+        Field(
+            default=170,
+            ge=50,
+            le=400,
+            description="Speech rate in words per minute (50-400)",
+        ),
+    ]
+    voice_enabled: Annotated[
+        bool,
+        Field(
+            default=True,
+            description="Enable/disable TTS playback (for debugging)",
+        ),
+    ]
+
+    @field_validator("message", mode="before")
+    @classmethod
+    def strip_message_whitespace(cls, v: str) -> str:
+        """Strip leading and trailing whitespace from message."""
+        if isinstance(v, str):
+            return v.strip()
+        return v
+
+
+class NotifyResponse(BaseModel):
+    """
+    Response model for successful POST /notify requests.
+
+    Returned when a TTS request is successfully queued for processing.
+    """
+
+    status: Annotated[str, Field(description="Request status (e.g., 'queued')")]
+    message_length: Annotated[int, Field(description="Length of the message in characters")]
+    queue_position: Annotated[int, Field(description="Position in the TTS queue")]
+    voice_model: Annotated[str, Field(description="Voice model being used")]
+    estimated_duration: Annotated[
+        float | None,
+        Field(default=None, description="Estimated playback duration in seconds"),
+    ]
+
+
+class QueueStatus(BaseModel):
+    """Queue status information for health checks."""
+
+    size: Annotated[int, Field(description="Current number of items in queue")]
+    capacity: Annotated[int, Field(description="Maximum queue capacity")]
+    utilization: Annotated[float, Field(description="Queue utilization percentage")]
+
+
+class HealthResponse(BaseModel):
+    """
+    Response model for GET /health endpoint.
+
+    Provides comprehensive health status including TTS engine, audio, and queue status.
+    """
+
+    status: Annotated[str, Field(description="Overall health status ('healthy' or 'unhealthy')")]
+    uptime_seconds: Annotated[int, Field(description="Server uptime in seconds")]
+    queue: Annotated[QueueStatus, Field(description="Queue status information")]
+    tts_engine: Annotated[str, Field(description="TTS engine name")]
+    audio_output: Annotated[str, Field(description="Audio output status")]
+    voice_models_loaded: Annotated[
+        list[str] | None,
+        Field(default=None, description="List of loaded voice models"),
+    ]
+    total_requests: Annotated[
+        int | None,
+        Field(default=None, description="Total requests processed"),
+    ]
+    failed_requests: Annotated[
+        int | None,
+        Field(default=None, description="Number of failed requests"),
+    ]
+    errors: Annotated[
+        list[str] | None,
+        Field(default=None, description="List of error messages if unhealthy"),
+    ]
+    timestamp: Annotated[
+        datetime,
+        Field(default_factory=lambda: datetime.now(timezone.utc), description="Response timestamp"),
+    ]
+
+
+class ErrorResponse(BaseModel):
+    """
+    Response model for error conditions.
+
+    Used for 4xx and 5xx error responses with consistent structure.
+    """
+
+    error: Annotated[str, Field(description="Error type identifier")]
+    detail: Annotated[str, Field(description="Human-readable error description")]
+    timestamp: Annotated[
+        datetime,
+        Field(default_factory=lambda: datetime.now(timezone.utc), description="Error timestamp"),
+    ]
+    queue_size: Annotated[
+        int | None,
+        Field(default=None, description="Current queue size (for queue_full errors)"),
+    ]
+
+
+class VoiceInfo(BaseModel):
+    """Information about a single voice model."""
+
+    name: Annotated[str, Field(description="Voice model name")]
+    language: Annotated[str, Field(description="Language code (e.g., 'en_US')")]
+    quality: Annotated[str, Field(description="Quality level (low, medium, high)")]
+    size_mb: Annotated[float, Field(description="Model size in megabytes")]
+    installed: Annotated[bool, Field(description="Whether the model is installed locally")]
+
+
+class VoicesResponse(BaseModel):
+    """
+    Response model for GET /voices endpoint.
+
+    Lists available voice models and the default voice.
+    """
+
+    voices: Annotated[list[VoiceInfo], Field(description="List of available voices")]
+    default_voice: Annotated[str, Field(description="Default voice model name")]
diff --git a/app/queue_manager.py b/app/queue_manager.py
new file mode 100644
index 0000000..83e15e8
--- /dev/null
+++ b/app/queue_manager.py
@@ -0,0 +1,236 @@
+"""
+TTS Queue Manager for voice-server.
+
+Manages an async queue of TTS requests and processes them sequentially.
+"""
+
+import asyncio
+import logging
+import time
+from dataclasses import dataclass, field
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+class QueueFullError(Exception):
+    """Raised when the TTS queue is full."""
+
+    pass
+
+
+@dataclass
+class TTSRequest:
+    """A TTS request in the queue."""
+
+    message: str
+    voice: str
+    rate: int
+    voice_enabled: bool
+    timestamp: float = field(default_factory=time.time)
+    request_id: str | None = None
+
+
+@dataclass
+class QueueStats:
+    """Statistics about queue processing."""
+
+    processed: int = 0
+    errors: int = 0
+    total_audio_seconds: float = 0.0
+
+
+class TTSQueueManager:
+    """
+    Manages the TTS request queue and processes requests sequentially.
+
+    Ensures audio doesn't overlap by processing one request at a time.
+    """
+
+    def __init__(
+        self,
+        tts_engine: Any,
+        audio_player: Any,
+        max_size: int = 50,
+        request_timeout: float = 60.0,
+    ):
+        """
+        Initialize the queue manager.
+
+        Args:
+            tts_engine: TTS engine instance for synthesis
+            audio_player: Audio player instance for playback
+            max_size: Maximum queue size
+            request_timeout: Timeout for processing each request (seconds)
+        """
+        self.tts_engine = tts_engine
+        self.audio_player = audio_player
+        self.max_size = max_size
+        self.request_timeout = request_timeout
+
+        self._queue: asyncio.Queue[TTSRequest] = asyncio.Queue(maxsize=max_size)
+        self._stats = QueueStats()
+        self._running = False
+        self._processor_task: asyncio.Task | None = None
+
+    async def start(self) -> None:
+        """Start the queue processor background task."""
+        if self._running:
+            return
+
+        self._running = True
+        self._processor_task = asyncio.create_task(self._process_queue())
+        logger.info("TTS queue processor started")
+
+    async def stop(self) -> None:
+        """Stop the queue processor and wait for current item to complete."""
+        self._running = False
+
+        if self._processor_task:
+            # Cancel the task
+            self._processor_task.cancel()
+            try:
+                await self._processor_task
+            except asyncio.CancelledError:
+                pass
+            self._processor_task = None
+
+        # Stop any playing audio
+        self.audio_player.stop()
+        logger.info("TTS queue processor stopped")
+
+    async def enqueue(self, request: TTSRequest) -> int:
+        """
+        Add a TTS request to the queue.
+
+        Args:
+            request: The TTS request to queue
+
+        Returns:
+            Queue position (1-indexed)
+
+        Raises:
+            QueueFullError: If the queue is full
+        """
+        try:
+            # Use a short timeout to avoid blocking
+            await asyncio.wait_for(
+                self._queue.put(request),
+                timeout=1.0,
+            )
+            position = self._queue.qsize()
+            logger.debug(f"Enqueued request: {request.message[:50]}... (position={position})")
+            return position
+        except asyncio.TimeoutError:
+            raise QueueFullError(f"TTS queue is full (max_size={self.max_size})")
+
+    async def _process_queue(self) -> None:
+        """Background task that processes queued requests."""
+        while self._running:
+            try:
+                # Wait for a request (with timeout to allow checking _running)
+                try:
+                    request = await asyncio.wait_for(
+                        self._queue.get(),
+                        timeout=1.0,
+                    )
+                except asyncio.TimeoutError:
+                    continue
+
+                await self._process_request(request)
+                self._queue.task_done()
+
+            except asyncio.CancelledError:
+                raise
+            except Exception as e:
+                logger.error(f"Error in queue processor: {e}")
+                self._stats.errors += 1
+
+    async def _process_request(self, request: TTSRequest) -> None:
+        """
+        Process a single TTS request.
+
+        Args:
+            request: The TTS request to process
+        """
+        start_time = time.time()
+
+        try:
+            logger.debug(f"Processing TTS request: {request.message[:50]}...")
+
+            if not request.voice_enabled:
+                logger.debug("Voice disabled, skipping TTS")
+                self._stats.processed += 1
+                return
+
+            # Synthesize audio (run in thread pool to avoid blocking)
+            loop = asyncio.get_event_loop()
+            audio_data = await asyncio.wait_for(
+                loop.run_in_executor(
+                    None,
+                    self.tts_engine.synthesize_to_float32,
+                    request.message,
+                    request.voice,
+                ),
+                timeout=self.request_timeout,
+            )
+
+            if len(audio_data) == 0:
+                logger.warning("TTS generated empty audio")
+                self._stats.processed += 1
+                return
+
+            # Play audio
+            self.audio_player.play(audio_data, self.tts_engine.get_sample_rate())
+
+            # Wait for playback to complete
+            await self.audio_player.wait_async()
+
+            # Update stats
+            duration = len(audio_data) / self.tts_engine.get_sample_rate()
+            self._stats.processed += 1
+            self._stats.total_audio_seconds += duration
+
+            elapsed = time.time() - start_time
+            logger.debug(f"Request processed: {duration:.2f}s audio in {elapsed:.2f}s")
+
+        except asyncio.TimeoutError:
+            logger.error(f"Request timed out after {self.request_timeout}s")
+            self._stats.errors += 1
+        except Exception as e:
+            logger.error(f"Error processing request: {e}")
+            self._stats.errors += 1
+
+    @property
+    def size(self) -> int:
+        """Get current queue size."""
+        return self._queue.qsize()
+
+    @property
+    def capacity(self) -> int:
+        """Get queue capacity."""
+        return self.max_size
+
+    @property
+    def utilization(self) -> float:
+        """Get queue utilization percentage."""
+        if self.max_size == 0:
+            return 0.0
+        return (self.size / self.max_size) * 100.0
+
+    @property
+    def stats(self) -> QueueStats:
+        """Get queue statistics."""
+        return self._stats
+
+    def get_status(self) -> dict:
+        """Get queue status for health checks."""
+        return {
+            "size": self.size,
+            "capacity": self.capacity,
+            "utilization": round(self.utilization, 1),
+            "processed": self._stats.processed,
+            "errors": self._stats.errors,
+            "total_audio_seconds": round(self._stats.total_audio_seconds, 1),
+            "running": self._running,
+        }
diff --git a/app/routes.py b/app/routes.py
new file mode 100644
index 0000000..5791f22
--- /dev/null
+++ b/app/routes.py
@@ -0,0 +1,198 @@
+"""
+API routes for voice-server.
+
+Defines all HTTP endpoints:
+- POST /notify: Submit text for TTS playback
+- GET /health: Health check endpoint
+- GET /voices: List available voice models
+"""
+
+from fastapi import APIRouter, HTTPException, Response, status
+
+from app.config import get_settings
+from app.models import (
+    ErrorResponse,
+    HealthResponse,
+    NotifyRequest,
+    NotifyResponse,
+    QueueStatus,
+    VoiceInfo,
+    VoicesResponse,
+)
+from app.queue_manager import QueueFullError, TTSRequest
+
+router = APIRouter()
+
+
+def _get_components():
+    """Get the TTS components from main module."""
+    from app import main
+
+    return main.tts_engine, main.audio_player, main.queue_manager, main.get_uptime_seconds
+
+
+@router.post(
+    "/notify",
+    response_model=NotifyResponse,
+    status_code=status.HTTP_202_ACCEPTED,
+    responses={
+        422: {"model": ErrorResponse, "description": "Validation error"},
+        503: {"model": ErrorResponse, "description": "Queue full"},
+    },
+)
+async def notify(request: NotifyRequest) -> NotifyResponse:
+    """
+    Submit text for TTS playback.
+
+    Accepts a text message and queues it for text-to-speech conversion
+    and playback through the system speakers.
+
+    Returns immediately with queue position; audio plays asynchronously.
+    """
+    tts_engine, audio_player, queue_manager, _ = _get_components()
+    settings = get_settings()
+
+    # Validate voice exists if specified
+    voice = request.voice or settings.default_voice
+    if tts_engine and not tts_engine.is_voice_available(voice):
+        available = [v["name"] for v in tts_engine.list_voices()]
+        raise HTTPException(
+            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+            detail=f"Voice '{voice}' not found. Available: {available}",
+        )
+
+    # Create TTS request
+    tts_request = TTSRequest(
+        message=request.message,
+        voice=voice,
+        rate=request.rate,
+        voice_enabled=request.voice_enabled and settings.voice_enabled,
+    )
+
+    # Enqueue request
+    try:
+        if queue_manager:
+            position = await queue_manager.enqueue(tts_request)
+        else:
+            position = 1  # Fallback for testing
+    except QueueFullError as e:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail=str(e),
+        )
+
+    return NotifyResponse(
+        status="queued",
+        message_length=len(request.message),
+        queue_position=position,
+        voice_model=voice,
+    )
+
+
+@router.get(
+    "/health",
+    response_model=HealthResponse,
+    responses={
+        503: {"model": HealthResponse, "description": "Service unhealthy"},
+    },
+)
+async def health(response: Response) -> HealthResponse:
+    """
+    Health check endpoint.
+
+    Returns comprehensive health status including:
+    - TTS engine status
+    - Audio output status
+    - Queue status
+    - System metrics
+    """
+    tts_engine, audio_player, queue_manager, get_uptime_seconds = _get_components()
+    settings = get_settings()
+
+    errors = []
+
+    # Check TTS engine health
+    tts_status = "unknown"
+    if tts_engine:
+        tts_health = tts_engine.health_check()
+        tts_status = tts_health.get("status", "unknown")
+        if tts_status != "healthy":
+            errors.append(f"TTS: {tts_health.get('error', 'Unknown error')}")
+
+    # Check audio health
+    audio_status = "unknown"
+    if audio_player:
+        audio_health = audio_player.health_check()
+        audio_status = audio_health.get("status", "unknown")
+        if audio_status != "healthy":
+            errors.append(f"Audio: {audio_health.get('error', 'Unknown error')}")
+
+    # Get queue status
+    if queue_manager:
+        queue_info = queue_manager.get_status()
+        queue_status = QueueStatus(
+            size=queue_info["size"],
+            capacity=queue_info["capacity"],
+            utilization=queue_info["utilization"],
+        )
+        total_requests = queue_info["processed"]
+        failed_requests = queue_info["errors"]
+    else:
+        queue_status = QueueStatus(
+            size=0,
+            capacity=settings.queue_max_size,
+            utilization=0.0,
+        )
+        total_requests = None
+        failed_requests = None
+
+    # Determine overall status
+    overall_status = "healthy" if not errors else "unhealthy"
+
+    # Set response status code for unhealthy
+    if overall_status == "unhealthy":
+        response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
+
+    return HealthResponse(
+        status=overall_status,
+        uptime_seconds=get_uptime_seconds(),
+        queue=queue_status,
+        tts_engine="piper",
+        audio_output="available" if audio_status == "healthy" else "unavailable",
+        total_requests=total_requests,
+        failed_requests=failed_requests,
+        errors=errors if errors else None,
+    )
+
+
+@router.get(
+    "/voices",
+    response_model=VoicesResponse,
+)
+async def list_voices() -> VoicesResponse:
+    """
+    List available voice models.
+
+    Returns a list of installed voice models with their metadata
+    and the current default voice.
+    """
+    tts_engine, _, _, _ = _get_components()
+    settings = get_settings()
+
+    voices = []
+    if tts_engine:
+        for voice_data in tts_engine.list_voices():
+            voices.append(
+                VoiceInfo(
+                    name=voice_data["name"],
+                    language=voice_data["language"],
+                    quality=voice_data["quality"],
+                    size_mb=voice_data["size_mb"],
+                    installed=voice_data["installed"],
+                )
+            )
+
+    return VoicesResponse(
+        voices=voices,
+        default_voice=settings.default_voice,
+    )
diff --git a/app/tts_engine.py b/app/tts_engine.py
new file mode 100644
index 0000000..a0a868a
--- /dev/null
+++ b/app/tts_engine.py
@@ -0,0 +1,287 @@
+"""
+TTS Engine module for voice-server.
+
+Provides text-to-speech synthesis using Piper TTS.
+Supports multiple voice models with lazy loading and caching.
+"""
+
+import logging
+from pathlib import Path
+from typing import Protocol
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+class TTSEngine(Protocol):
+    """Protocol defining the TTS engine interface."""
+
+    def synthesize(self, text: str, voice: str | None = None) -> np.ndarray:
+        """Convert text to audio samples."""
+        ...
+
+    def get_sample_rate(self) -> int:
+        """Get the audio sample rate."""
+        ...
+
+    def list_voices(self) -> list[dict]:
+        """List available voice models."""
+        ...
+
+
+class PiperTTSEngine:
+    """
+    Piper TTS engine implementation.
+
+    Provides high-quality neural text-to-speech using Piper's ONNX models.
+    Voice models are loaded lazily and cached for performance.
+    """
+
+    def __init__(self, model_dir: str = "./models", default_voice: str = "en_US-lessac-medium"):
+        """
+        Initialize the Piper TTS engine.
+
+        Args:
+            model_dir: Directory containing voice model files (.onnx + .onnx.json)
+            default_voice: Default voice model name to use
+        """
+        self.model_dir = Path(model_dir)
+        self.default_voice = default_voice
+        self._voices: dict = {}  # Cache of loaded PiperVoice instances
+        self._voice_metadata: dict = {}  # Cache of voice metadata
+        self._sample_rate: int = 22050  # Piper default sample rate
+
+        # Ensure model directory exists
+        self.model_dir.mkdir(parents=True, exist_ok=True)
+
+        logger.info(f"PiperTTSEngine initialized with model_dir={model_dir}")
+
+    def _get_voice_path(self, voice_name: str) -> tuple[Path, Path]:
+        """
+        Get paths to voice model files.
+
+        Args:
+            voice_name: Name of the voice model
+
+        Returns:
+            Tuple of (onnx_path, json_path)
+        """
+        onnx_path = self.model_dir / f"{voice_name}.onnx"
+        json_path = self.model_dir / f"{voice_name}.onnx.json"
+        return onnx_path, json_path
+
+    def _load_voice(self, voice_name: str):
+        """
+        Load a voice model (lazy loading with caching).
+
+        Args:
+            voice_name: Name of the voice model to load
+
+        Returns:
+            Loaded PiperVoice instance
+
+        Raises:
+            FileNotFoundError: If voice model files don't exist
+            RuntimeError: If voice model fails to load
+        """
+        if voice_name in self._voices:
+            return self._voices[voice_name]
+
+        onnx_path, json_path = self._get_voice_path(voice_name)
+
+        if not onnx_path.exists():
+            raise FileNotFoundError(
+                f"Voice model not found: {voice_name}. "
+                f"Expected file: {onnx_path}"
+            )
+
+        try:
+            from piper import PiperVoice
+
+            logger.info(f"Loading voice model: {voice_name}")
+            voice = PiperVoice.load(str(onnx_path), config_path=str(json_path) if json_path.exists() else None)
+            self._voices[voice_name] = voice
+
+            # Update sample rate from loaded voice
+            if hasattr(voice, 'config') and voice.config:
+                self._sample_rate = voice.config.sample_rate
+
+            logger.info(f"Voice model loaded: {voice_name} (sample_rate={self._sample_rate})")
+            return voice
+
+        except Exception as e:
+            logger.error(f"Failed to load voice model {voice_name}: {e}")
+            raise RuntimeError(f"Failed to load voice model: {e}") from e
+
+    def synthesize(self, text: str, voice: str | None = None) -> np.ndarray:
+        """
+        Convert text to audio samples.
+
+        Args:
+            text: Text to convert to speech
+            voice: Voice model name (uses default if None)
+
+        Returns:
+            NumPy array of audio samples (int16)
+
+        Raises:
+            FileNotFoundError: If voice model not found
+            RuntimeError: If synthesis fails
+        """
+        voice_name = voice or self.default_voice
+
+        if not text or not text.strip():
+            # Return empty audio for empty text
+            return np.array([], dtype=np.int16)
+
+        try:
+            piper_voice = self._load_voice(voice_name)
+
+            # Synthesize audio - piper returns an iterator of AudioChunk objects
+            audio_chunks = []
+            for chunk in piper_voice.synthesize(text):
+                # Each chunk has audio_int16_array property
+                audio_chunks.append(chunk.audio_int16_array)
+
+            if not audio_chunks:
+                return np.array([], dtype=np.int16)
+
+            # Concatenate all chunks
+            audio_array = np.concatenate(audio_chunks)
+
+            logger.debug(f"Synthesized {len(text)} chars -> {len(audio_array)} samples")
+            return audio_array
+
+        except FileNotFoundError:
+            raise
+        except Exception as e:
+            logger.error(f"TTS synthesis failed: {e}")
+            raise RuntimeError(f"TTS synthesis failed: {e}") from e
+
+    def synthesize_to_float32(self, text: str, voice: str | None = None) -> np.ndarray:
+        """
+        Convert text to float32 audio samples (normalized -1.0 to 1.0).
+
+        This format is preferred by sounddevice for playback.
+
+        Args:
+            text: Text to convert to speech
+            voice: Voice model name (uses default if None)
+
+        Returns:
+            NumPy array of float32 audio samples
+        """
+        int16_audio = self.synthesize(text, voice)
+
+        if len(int16_audio) == 0:
+            return np.array([], dtype=np.float32)
+
+        # Convert int16 to float32 normalized
+        float32_audio = int16_audio.astype(np.float32) / 32768.0
+        return float32_audio
+
+    def get_sample_rate(self) -> int:
+        """Get the audio sample rate for the current voice."""
+        return self._sample_rate
+
+    def list_voices(self) -> list[dict]:
+        """
+        List available voice models in the model directory.
+
+        Returns:
+            List of voice info dictionaries with name, language, quality, etc.
+        """
+        voices = []
+
+        if not self.model_dir.exists():
+            return voices
+
+        # Find all .onnx files
+        for onnx_file in self.model_dir.glob("*.onnx"):
+            voice_name = onnx_file.stem
+            json_file = onnx_file.with_suffix(".onnx.json")
+
+            voice_info = {
+                "name": voice_name,
+                "language": self._extract_language(voice_name),
+                "quality": self._extract_quality(voice_name),
+                "size_mb": round(onnx_file.stat().st_size / (1024 * 1024), 1),
+                "installed": True,
+            }
+
+            # Try to load additional metadata from JSON config
+            if json_file.exists():
+                try:
+                    import json
+                    with open(json_file) as f:
+                        config = json.load(f)
+                        if "language" in config:
+                            voice_info["language"] = config["language"].get("code", voice_info["language"])
+                except Exception:
+                    pass  # Use extracted values if JSON parsing fails
+
+            voices.append(voice_info)
+
+        return sorted(voices, key=lambda v: v["name"])
+
+    def _extract_language(self, voice_name: str) -> str:
+        """Extract language code from voice name (e.g., 'en_US' from 'en_US-lessac-medium')."""
+        parts = voice_name.split("-")
+        if parts:
+            return parts[0]
+        return "unknown"
+
+    def _extract_quality(self, voice_name: str) -> str:
+        """Extract quality level from voice name (e.g., 'medium' from 'en_US-lessac-medium')."""
+        parts = voice_name.split("-")
+        if len(parts) >= 3:
+            quality = parts[-1].lower()
+            if quality in ("low", "medium", "high", "x_low", "x_high"):
+                return quality
+        return "medium"
+
+    def is_voice_available(self, voice_name: str) -> bool:
+        """Check if a voice model is installed."""
+        onnx_path, _ = self._get_voice_path(voice_name)
+        return onnx_path.exists()
+
+    def health_check(self) -> dict:
+        """
+        Perform a health check on the TTS engine.
+
+        Returns:
+            Dict with status and any error messages
+        """
+        try:
+            # Check if piper is importable
+            from piper import PiperVoice  # noqa: F401
+
+            # Check if model directory exists
+            if not self.model_dir.exists():
+                return {
+                    "status": "degraded",
+                    "error": f"Model directory does not exist: {self.model_dir}",
+                }
+
+            # Check if default voice is available
+            if not self.is_voice_available(self.default_voice):
+                available = [v["name"] for v in self.list_voices()]
+                return {
+                    "status": "degraded",
+                    "error": f"Default voice not found: {self.default_voice}",
+                    "available_voices": available,
+                }
+
+            return {"status": "healthy"}
+
+        except ImportError as e:
+            return {
+                "status": "unhealthy",
+                "error": f"Piper TTS not installed: {e}",
+            }
+        except Exception as e:
+            return {
+                "status": "unhealthy",
+                "error": str(e),
+            }
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..f75ec8f
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,69 @@
+[project]
+name = "voice-server"
+version = "1.0.0"
+description = "Local HTTP service for text-to-speech playback"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {text = "MIT"}
+authors = [
+    {name = "Cal Corum", email = "cal.corum@gmail.com"}
+]
+keywords = ["tts", "text-to-speech", "piper", "fastapi", "voice"]
+
+dependencies = [
+    "fastapi>=0.115.0",
+    "uvicorn[standard]>=0.32.0",
+    "piper-tts>=1.2.0",
+    "sounddevice>=0.5.0",
+    "numpy>=1.26.0",
+    "pydantic>=2.10.0",
+    "pydantic-settings>=2.6.0",
+    "python-dotenv>=1.0.0",
+    "psutil>=6.0.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.3.0",
+    "pytest-asyncio>=0.24.0",
+    "pytest-cov>=6.0.0",
+    "httpx>=0.28.0",
+    "ruff>=0.8.0",
+]
+
+[project.scripts]
+voice-server = "app.main:run"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["app"]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "function"
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+python_functions = ["test_*"]
+addopts = "-v --tb=short"
+
+[tool.ruff]
+line-length = 100
+target-version = "py310"
+
+[tool.ruff.lint]
+select = ["E", "F", "I", "N", "W", "UP"]
+ignore = ["E501"]
+
+[tool.coverage.run]
+source = ["app"]
+omit = ["tests/*"]
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "if TYPE_CHECKING:",
+    "raise NotImplementedError",
+]
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_api.py b/tests/test_api.py
new file mode 100644
index 0000000..02e804c
--- /dev/null
+++ b/tests/test_api.py
@@ -0,0 +1,324 @@
+"""
+TDD Tests for API endpoints.
+
+These tests verify the API contract for all voice-server endpoints:
+- POST /notify: TTS request submission
+- GET /health: Health check
+- GET /voices: Voice model listing
+
+Uses httpx.AsyncClient for async endpoint testing with FastAPI's TestClient.
+"""
+
+import pytest
+from httpx import AsyncClient, ASGITransport
+
+from app.main import app
+
+
+@pytest.fixture
+async def client():
+    """Create an async test client for the FastAPI app."""
+    async with AsyncClient(
+        transport=ASGITransport(app=app),
+        base_url="http://test",
+    ) as client:
+        yield client
+
+
+class TestNotifyEndpoint:
+    """Tests for POST /notify endpoint."""
+
+    async def test_valid_request_returns_202(self, client: AsyncClient):
+        """
+        A valid request with just a message should return 202 Accepted.
+        """
+        response = await client.post(
+            "/notify",
+            json={"message": "Hello, world!"},
+        )
+
+        assert response.status_code == 202
+
+    async def test_valid_request_returns_queued_status(self, client: AsyncClient):
+        """
+        Response should include status='queued' for successful requests.
+        """
+        response = await client.post(
+            "/notify",
+            json={"message": "Test message"},
+        )
+
+        data = response.json()
+        assert data["status"] == "queued"
+
+    async def test_response_includes_message_length(self, client: AsyncClient):
+        """
+        Response should include the length of the submitted message.
+        """
+        message = "This is a test message"
+        response = await client.post(
+            "/notify",
+            json={"message": message},
+        )
+
+        data = response.json()
+        assert data["message_length"] == len(message)
+
+    async def test_response_includes_queue_position(self, client: AsyncClient):
+        """
+        Response should include the queue position.
+        """
+        response = await client.post(
+            "/notify",
+            json={"message": "Test"},
+        )
+
+        data = response.json()
+        assert "queue_position" in data
+        assert isinstance(data["queue_position"], int)
+        assert data["queue_position"] >= 1
+
+    async def test_response_includes_voice_model(self, client: AsyncClient):
+        """
+        Response should include the voice model being used.
+        """
+        response = await client.post(
+            "/notify",
+            json={"message": "Test"},
+        )
+
+        data = response.json()
+        assert "voice_model" in data
+        assert data["voice_model"] == "en_US-lessac-medium"  # default
+
+    async def test_custom_voice_is_preserved(self, client: AsyncClient):
+        """
+        Custom voice selection should be reflected in response.
+        """
+        response = await client.post(
+            "/notify",
+            json={"message": "Test", "voice": "en_US-libritts-high"},
+        )
+
+        data = response.json()
+        assert data["voice_model"] == "en_US-libritts-high"
+
+    async def test_missing_message_returns_422(self, client: AsyncClient):
+        """
+        Request without message should return 422 Unprocessable Entity.
+        """
+        response = await client.post(
+            "/notify",
+            json={},
+        )
+
+        assert response.status_code == 422
+
+    async def test_empty_message_returns_422(self, client: AsyncClient):
+        """
+        Empty message string should return 422 Unprocessable Entity.
+        """
+        response = await client.post(
+            "/notify",
+            json={"message": ""},
+        )
+
+        assert response.status_code == 422
+
+    async def test_message_too_long_returns_422(self, client: AsyncClient):
+        """
+        Message over 10000 characters should return 422.
+        """
+        response = await client.post(
+            "/notify",
+            json={"message": "a" * 10001},
+        )
+
+        assert response.status_code == 422
+
+    async def test_invalid_rate_returns_422(self, client: AsyncClient):
+        """
+        Rate outside valid range should return 422.
+        """
+        response = await client.post(
+            "/notify",
+            json={"message": "Test", "rate": 500},
+        )
+
+        assert response.status_code == 422
+
+    async def test_invalid_voice_pattern_returns_422(self, client: AsyncClient):
+        """
+        Voice with invalid characters should return 422.
+        """
+        response = await client.post(
+            "/notify",
+            json={"message": "Test", "voice": "invalid/voice"},
+        )
+
+        assert response.status_code == 422
+
+    async def test_malformed_json_returns_422(self, client: AsyncClient):
+        """
+        Malformed JSON should return 422.
+        """
+        response = await client.post(
+            "/notify",
+            content="not valid json",
+            headers={"Content-Type": "application/json"},
+        )
+
+        assert response.status_code == 422
+
+    async def test_whitespace_message_is_stripped(self, client: AsyncClient):
+        """
+        Whitespace in message should be stripped.
+        """
+        response = await client.post(
+            "/notify",
+            json={"message": "  Hello  "},
+        )
+
+        assert response.status_code == 202
+        data = response.json()
+        assert data["message_length"] == 5  # "Hello" without whitespace
+
+
+class TestHealthEndpoint:
+    """Tests for GET /health endpoint."""
+
+    async def test_health_returns_200(self, client: AsyncClient):
+        """
+        Health endpoint should return 200 when healthy.
+        """
+        response = await client.get("/health")
+
+        assert response.status_code == 200
+
+    async def test_health_returns_status(self, client: AsyncClient):
+        """
+        Health response should include status field.
+        """
+        response = await client.get("/health")
+
+        data = response.json()
+        assert "status" in data
+        assert data["status"] in ["healthy", "unhealthy"]
+
+    async def test_health_returns_uptime(self, client: AsyncClient):
+        """
+        Health response should include uptime in seconds.
+        """
+        response = await client.get("/health")
+
+        data = response.json()
+        assert "uptime_seconds" in data
+        assert isinstance(data["uptime_seconds"], int)
+        assert data["uptime_seconds"] >= 0
+
+    async def test_health_returns_queue_status(self, client: AsyncClient):
+        """
+        Health response should include queue status.
+        """
+        response = await client.get("/health")
+
+        data = response.json()
+        assert "queue" in data
+        assert "size" in data["queue"]
+        assert "capacity" in data["queue"]
+        assert "utilization" in data["queue"]
+
+    async def test_health_returns_tts_engine(self, client: AsyncClient):
+        """
+        Health response should include TTS engine info.
+        """
+        response = await client.get("/health")
+
+        data = response.json()
+        assert "tts_engine" in data
+        assert data["tts_engine"] == "piper"
+
+    async def test_health_returns_audio_output(self, client: AsyncClient):
+        """
+        Health response should include audio output status.
+        """
+        response = await client.get("/health")
+
+        data = response.json()
+        assert "audio_output" in data
+
+
+class TestVoicesEndpoint:
+    """Tests for GET /voices endpoint."""
+
+    async def test_voices_returns_200(self, client: AsyncClient):
+        """
+        Voices endpoint should return 200.
+        """
+        response = await client.get("/voices")
+
+        assert response.status_code == 200
+
+    async def test_voices_returns_list(self, client: AsyncClient):
+        """
+        Voices response should include a list of voices.
+        """
+        response = await client.get("/voices")
+
+        data = response.json()
+        assert "voices" in data
+        assert isinstance(data["voices"], list)
+
+    async def test_voices_returns_default_voice(self, client: AsyncClient):
+        """
+        Voices response should include the default voice.
+        """
+        response = await client.get("/voices")
+
+        data = response.json()
+        assert "default_voice" in data
+        assert data["default_voice"] == "en_US-lessac-medium"
+
+
+class TestOpenAPIDocumentation:
+    """Tests for API documentation endpoints."""
+
+    async def test_openapi_json_available(self, client: AsyncClient):
+        """
+        OpenAPI JSON should be available at /openapi.json.
+        """
+        response = await client.get("/openapi.json")
+
+        assert response.status_code == 200
+        data = response.json()
+        assert "openapi" in data
+        assert "paths" in data
+
+    async def test_docs_endpoint_available(self, client: AsyncClient):
+        """
+        Swagger UI should be available at /docs.
+        """
+        response = await client.get("/docs")
+
+        assert response.status_code == 200
+        assert "text/html" in response.headers.get("content-type", "")
+
+
+class TestCORS:
+    """Tests for CORS middleware."""
+
+    async def test_cors_headers_present(self, client: AsyncClient):
+        """
+        CORS headers should be present in responses.
+        """
+        response = await client.options(
+            "/notify",
+            headers={
+                "Origin": "http://localhost:3000",
+                "Access-Control-Request-Method": "POST",
+            },
+        )
+
+        # FastAPI returns 200 for OPTIONS with CORS
+        assert response.status_code == 200
+        assert "access-control-allow-origin" in response.headers
diff --git a/tests/test_config.py b/tests/test_config.py
new file mode 100644
index 0000000..0046e7a
--- /dev/null
+++ b/tests/test_config.py
@@ -0,0 +1,300 @@
+"""
+TDD Tests for configuration loading.
+
+These tests define the expected behavior for the Settings class which loads
+configuration from environment variables with sensible defaults.
+
+Test Coverage:
+- Default values when no environment variables are set
+- Environment variable overrides
+- Validation of configuration values
+- Path handling for model directory
+"""
+
+import os
+import pytest
+
+
+class TestSettingsDefaults:
+    """Tests for default configuration values."""
+
+    def test_default_host(self, monkeypatch):
+        """
+        Default host should be 0.0.0.0 (listen on all interfaces).
+        """
+        # Clear any existing env vars
+        monkeypatch.delenv("HOST", raising=False)
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.host == "0.0.0.0"
+
+    def test_default_port(self, monkeypatch):
+        """
+        Default port should be 8888.
+        """
+        monkeypatch.delenv("PORT", raising=False)
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.port == 8888
+
+    def test_default_model_dir(self, monkeypatch):
+        """
+        Default model directory should be ./models.
+        """
+        monkeypatch.delenv("MODEL_DIR", raising=False)
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.model_dir == "./models"
+
+    def test_default_voice(self, monkeypatch):
+        """
+        Default voice should be en_US-lessac-medium.
+        """
+        monkeypatch.delenv("DEFAULT_VOICE", raising=False)
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.default_voice == "en_US-lessac-medium"
+
+    def test_default_rate(self, monkeypatch):
+        """
+        Default speech rate should be 170 WPM.
+        """
+        monkeypatch.delenv("DEFAULT_RATE", raising=False)
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.default_rate == 170
+
+    def test_default_queue_max_size(self, monkeypatch):
+        """
+        Default queue max size should be 50.
+        """
+        monkeypatch.delenv("QUEUE_MAX_SIZE", raising=False)
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.queue_max_size == 50
+
+    def test_default_request_timeout(self, monkeypatch):
+        """
+        Default request timeout should be 60 seconds.
+        """
+        monkeypatch.delenv("REQUEST_TIMEOUT_SECONDS", raising=False)
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.request_timeout_seconds == 60
+
+    def test_default_log_level(self, monkeypatch):
+        """
+        Default log level should be INFO.
+        """
+        monkeypatch.delenv("LOG_LEVEL", raising=False)
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.log_level == "INFO"
+
+    def test_default_voice_enabled(self, monkeypatch):
+        """
+        Voice should be enabled by default.
+        """
+        monkeypatch.delenv("VOICE_ENABLED", raising=False)
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.voice_enabled is True
+
+
+class TestSettingsEnvOverrides:
+    """Tests for environment variable overrides."""
+
+    def test_host_override(self, monkeypatch):
+        """
+        HOST environment variable should override default.
+        """
+        monkeypatch.setenv("HOST", "127.0.0.1")
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.host == "127.0.0.1"
+
+    def test_port_override(self, monkeypatch):
+        """
+        PORT environment variable should override default.
+        """
+        monkeypatch.setenv("PORT", "9000")
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.port == 9000
+
+    def test_model_dir_override(self, monkeypatch):
+        """
+        MODEL_DIR environment variable should override default.
+        """
+        monkeypatch.setenv("MODEL_DIR", "/opt/voice-models")
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.model_dir == "/opt/voice-models"
+
+    def test_default_voice_override(self, monkeypatch):
+        """
+        DEFAULT_VOICE environment variable should override default.
+        """
+        monkeypatch.setenv("DEFAULT_VOICE", "en_US-libritts-high")
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.default_voice == "en_US-libritts-high"
+
+    def test_queue_max_size_override(self, monkeypatch):
+        """
+        QUEUE_MAX_SIZE environment variable should override default.
+        """
+        monkeypatch.setenv("QUEUE_MAX_SIZE", "100")
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.queue_max_size == 100
+
+    def test_log_level_override(self, monkeypatch):
+        """
+        LOG_LEVEL environment variable should override default.
+        """
+        monkeypatch.setenv("LOG_LEVEL", "DEBUG")
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.log_level == "DEBUG"
+
+    def test_voice_enabled_false(self, monkeypatch):
+        """
+        VOICE_ENABLED=false should disable voice output.
+        """
+        monkeypatch.setenv("VOICE_ENABLED", "false")
+
+        from app.config import Settings
+
+        settings = Settings()
+        assert settings.voice_enabled is False
+
+
+class TestSettingsValidation:
+    """Tests for configuration validation."""
+
+    def test_port_must_be_positive(self, monkeypatch):
+        """
+        Port must be a positive integer.
+        """
+        monkeypatch.setenv("PORT", "-1")
+
+        from pydantic import ValidationError
+        from app.config import Settings
+
+        with pytest.raises(ValidationError):
+            Settings()
+
+    def test_port_must_be_valid_range(self, monkeypatch):
+        """
+        Port must be in valid range (1-65535).
+        """
+        monkeypatch.setenv("PORT", "70000")
+
+        from pydantic import ValidationError
+        from app.config import Settings
+
+        with pytest.raises(ValidationError):
+            Settings()
+
+    def test_queue_max_size_must_be_positive(self, monkeypatch):
+        """
+        Queue max size must be positive.
+        """
+        monkeypatch.setenv("QUEUE_MAX_SIZE", "0")
+
+        from pydantic import ValidationError
+        from app.config import Settings
+
+        with pytest.raises(ValidationError):
+            Settings()
+
+    def test_request_timeout_must_be_positive(self, monkeypatch):
+        """
+        Request timeout must be positive.
+        """
+        monkeypatch.setenv("REQUEST_TIMEOUT_SECONDS", "0")
+
+        from pydantic import ValidationError
+        from app.config import Settings
+
+        with pytest.raises(ValidationError):
+            Settings()
+
+    def test_default_rate_must_be_in_range(self, monkeypatch):
+        """
+        Default rate must be between 50 and 400.
+        """
+        monkeypatch.setenv("DEFAULT_RATE", "500")
+
+        from pydantic import ValidationError
+        from app.config import Settings
+
+        with pytest.raises(ValidationError):
+            Settings()
+
+    def test_log_level_must_be_valid(self, monkeypatch):
+        """
+        Log level must be a valid Python logging level.
+        """
+        monkeypatch.setenv("LOG_LEVEL", "INVALID")
+
+        from pydantic import ValidationError
+        from app.config import Settings
+
+        with pytest.raises(ValidationError):
+            Settings()
+
+
+class TestGetSettings:
+    """Tests for the get_settings function."""
+
+    def test_get_settings_returns_settings_instance(self, monkeypatch):
+        """
+        get_settings should return a Settings instance.
+        """
+        # Clear cache to ensure fresh settings
+        from app.config import get_settings, Settings
+
+        settings = get_settings()
+        assert isinstance(settings, Settings)
+
+    def test_get_settings_is_cached(self, monkeypatch):
+        """
+        get_settings should return the same cached instance.
+        """
+        from app.config import get_settings
+
+        settings1 = get_settings()
+        settings2 = get_settings()
+        assert settings1 is settings2
diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000..c9edc76
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,388 @@
+"""
+TDD Tests for Pydantic request/response models.
+
+These tests define the API contract for the voice server's request and response models.
+Tests are written BEFORE implementation to drive the design.
+
+Test Coverage:
+- NotifyRequest: Validates incoming TTS requests with message, voice, rate, voice_enabled
+- NotifyResponse: Validates successful queue responses
+- HealthResponse: Validates health check responses
+- ErrorResponse: Validates error response format
+- VoiceInfo/VoicesResponse: Validates voice listing responses
+"""
+
+import pytest
+from datetime import datetime
+from pydantic import ValidationError
+
+
+class TestNotifyRequest:
+    """Tests for the NotifyRequest model - validates incoming TTS requests."""
+
+    def test_valid_request_with_message_only(self):
+        """
+        A minimal valid request should only require the message field.
+        All other fields should use sensible defaults.
+        """
+        from app.models import NotifyRequest
+
+        request = NotifyRequest(message="Hello, world!")
+
+        assert request.message == "Hello, world!"
+        assert request.voice == "en_US-lessac-medium"  # default voice
+        assert request.rate == 170  # default rate
+        assert request.voice_enabled is True  # default enabled
+
+    def test_valid_request_with_all_fields(self):
+        """
+        A request with all fields specified should preserve those values.
+        """
+        from app.models import NotifyRequest
+
+        request = NotifyRequest(
+            message="Test message",
+            voice="en_US-libritts-high",
+            rate=200,
+            voice_enabled=False,
+        )
+
+        assert request.message == "Test message"
+        assert request.voice == "en_US-libritts-high"
+        assert request.rate == 200
+        assert request.voice_enabled is False
+
+    def test_message_is_required(self):
+        """
+        The message field is required - omitting it should raise ValidationError.
+        """
+        from app.models import NotifyRequest
+
+        with pytest.raises(ValidationError) as exc_info:
+            NotifyRequest()
+
+        errors = exc_info.value.errors()
+        assert any(e["loc"] == ("message",) and e["type"] == "missing" for e in errors)
+
+    def test_message_cannot_be_empty(self):
+        """
+        An empty message string should be rejected.
+        """
+        from app.models import NotifyRequest
+
+        with pytest.raises(ValidationError) as exc_info:
+            NotifyRequest(message="")
+
+        errors = exc_info.value.errors()
+        assert any("message" in str(e["loc"]) for e in errors)
+
+    def test_message_minimum_length_is_1(self):
+        """
+        A single character message should be valid.
+        """
+        from app.models import NotifyRequest
+
+        request = NotifyRequest(message="X")
+        assert request.message == "X"
+
+    def test_message_maximum_length_is_10000(self):
+        """
+        Messages up to 10,000 characters should be accepted.
+        """
+        from app.models import NotifyRequest
+
+        long_message = "a" * 10000
+        request = NotifyRequest(message=long_message)
+        assert len(request.message) == 10000
+
+    def test_message_over_10000_characters_rejected(self):
+        """
+        Messages over 10,000 characters should be rejected.
+        """
+        from app.models import NotifyRequest
+
+        too_long = "a" * 10001
+        with pytest.raises(ValidationError) as exc_info:
+            NotifyRequest(message=too_long)
+
+        errors = exc_info.value.errors()
+        assert any("message" in str(e["loc"]) for e in errors)
+
+    def test_message_whitespace_is_stripped(self):
+        """
+        Leading and trailing whitespace should be stripped from messages.
+        """
+        from app.models import NotifyRequest
+
+        request = NotifyRequest(message="  Hello, world!  ")
+        assert request.message == "Hello, world!"
+
+    def test_rate_minimum_is_50(self):
+        """
+        Rate below 50 should be rejected.
+        """
+        from app.models import NotifyRequest
+
+        with pytest.raises(ValidationError) as exc_info:
+            NotifyRequest(message="Test", rate=49)
+
+        errors = exc_info.value.errors()
+        assert any("rate" in str(e["loc"]) for e in errors)
+
+    def test_rate_maximum_is_400(self):
+        """
+        Rate above 400 should be rejected.
+        """
+        from app.models import NotifyRequest
+
+        with pytest.raises(ValidationError) as exc_info:
+            NotifyRequest(message="Test", rate=401)
+
+        errors = exc_info.value.errors()
+        assert any("rate" in str(e["loc"]) for e in errors)
+
+    def test_rate_at_boundaries(self):
+        """
+        Rate values at exact boundaries (50, 400) should be valid.
+        """
+        from app.models import NotifyRequest
+
+        request_min = NotifyRequest(message="Test", rate=50)
+        assert request_min.rate == 50
+
+        request_max = NotifyRequest(message="Test", rate=400)
+        assert request_max.rate == 400
+
+    def test_voice_pattern_validation(self):
+        """
+        Voice names should match expected pattern (alphanumeric, underscores, hyphens).
+        """
+        from app.models import NotifyRequest
+
+        # Valid patterns
+        request = NotifyRequest(message="Test", voice="en_US-lessac-medium")
+        assert request.voice == "en_US-lessac-medium"
+
+        request2 = NotifyRequest(message="Test", voice="voice_123")
+        assert request2.voice == "voice_123"
+
+    def test_invalid_voice_pattern_rejected(self):
+        """
+        Voice names with invalid characters should be rejected.
+        """
+        from app.models import NotifyRequest
+
+        with pytest.raises(ValidationError):
+            NotifyRequest(message="Test", voice="invalid/voice")
+
+        with pytest.raises(ValidationError):
+            NotifyRequest(message="Test", voice="invalid voice")
+
+
+class TestNotifyResponse:
+    """Tests for the NotifyResponse model - returned when request is queued."""
+
+    def test_successful_response_structure(self):
+        """
+        A successful response should contain status, message_length, queue_position.
+        """
+        from app.models import NotifyResponse
+
+        response = NotifyResponse(
+            status="queued",
+            message_length=42,
+            queue_position=3,
+            voice_model="en_US-lessac-medium",
+        )
+
+        assert response.status == "queued"
+        assert response.message_length == 42
+        assert response.queue_position == 3
+        assert response.voice_model == "en_US-lessac-medium"
+
+    def test_estimated_duration_is_optional(self):
+        """
+        Estimated duration can be omitted.
+        """
+        from app.models import NotifyResponse
+
+        response = NotifyResponse(
+            status="queued",
+            message_length=42,
+            queue_position=1,
+            voice_model="en_US-lessac-medium",
+        )
+
+        assert response.estimated_duration is None
+
+    def test_estimated_duration_when_provided(self):
+        """
+        Estimated duration should be preserved when provided.
+        """
+        from app.models import NotifyResponse
+
+        response = NotifyResponse(
+            status="queued",
+            message_length=42,
+            queue_position=1,
+            voice_model="en_US-lessac-medium",
+            estimated_duration=2.5,
+        )
+
+        assert response.estimated_duration == 2.5
+
+
+class TestHealthResponse:
+    """Tests for the HealthResponse model - returned by /health endpoint."""
+
+    def test_healthy_response_structure(self):
+        """
+        A healthy response should contain all required fields.
+        """
+        from app.models import HealthResponse, QueueStatus
+
+        queue_status = QueueStatus(size=2, capacity=50, utilization=4.0)
+        response = HealthResponse(
+            status="healthy",
+            uptime_seconds=3600,
+            queue=queue_status,
+            tts_engine="piper",
+            audio_output="available",
+        )
+
+        assert response.status == "healthy"
+        assert response.uptime_seconds == 3600
+        assert response.queue.size == 2
+        assert response.queue.capacity == 50
+        assert response.tts_engine == "piper"
+        assert response.audio_output == "available"
+
+    def test_unhealthy_response_with_errors(self):
+        """
+        An unhealthy response can include error messages.
+        """
+        from app.models import HealthResponse, QueueStatus
+
+        queue_status = QueueStatus(size=0, capacity=50, utilization=0.0)
+        response = HealthResponse(
+            status="unhealthy",
+            uptime_seconds=100,
+            queue=queue_status,
+            tts_engine="piper",
+            audio_output="unavailable",
+            errors=["Audio device not found", "TTS engine failed to initialize"],
+        )
+
+        assert response.status == "unhealthy"
+        assert len(response.errors) == 2
+        assert "Audio device not found" in response.errors
+
+    def test_statistics_fields_are_optional(self):
+        """
+        Statistics like total_requests and failed_requests are optional.
+        """
+        from app.models import HealthResponse, QueueStatus
+
+        queue_status = QueueStatus(size=0, capacity=50, utilization=0.0)
+        response = HealthResponse(
+            status="healthy",
+            uptime_seconds=0,
+            queue=queue_status,
+            tts_engine="piper",
+            audio_output="available",
+        )
+
+        assert response.total_requests is None
+        assert response.failed_requests is None
+
+
+class TestErrorResponse:
+    """Tests for the ErrorResponse model - returned for error conditions."""
+
+    def test_error_response_structure(self):
+        """
+        An error response should contain error type, detail, and timestamp.
+        """
+        from app.models import ErrorResponse
+
+        response = ErrorResponse(
+            error="validation_error",
+            detail="message field is required",
+        )
+
+        assert response.error == "validation_error"
+        assert response.detail == "message field is required"
+        assert response.timestamp is not None
+
+    def test_timestamp_auto_generated(self):
+        """
+        Timestamp should be auto-generated if not provided.
+        """
+        from app.models import ErrorResponse
+
+        response = ErrorResponse(
+            error="queue_full",
+            detail="TTS queue is full",
+        )
+
+        assert isinstance(response.timestamp, datetime)
+
+    def test_queue_full_error_includes_queue_size(self):
+        """
+        Queue full errors can include the current queue size.
+        """
+        from app.models import ErrorResponse
+
+        response = ErrorResponse(
+            error="queue_full",
+            detail="TTS queue is full, please retry later",
+            queue_size=50,
+        )
+
+        assert response.queue_size == 50
+
+
+class TestVoiceModels:
+    """Tests for voice-related models."""
+
+    def test_voice_info_structure(self):
+        """
+        VoiceInfo should contain name, language, quality, and installation status.
+        """
+        from app.models import VoiceInfo
+
+        voice = VoiceInfo(
+            name="en_US-lessac-medium",
+            language="en_US",
+            quality="medium",
+            size_mb=63.5,
+            installed=True,
+        )
+
+        assert voice.name == "en_US-lessac-medium"
+        assert voice.language == "en_US"
+        assert voice.quality == "medium"
+        assert voice.size_mb == 63.5
+        assert voice.installed is True
+
+    def test_voices_response_structure(self):
+        """
+        VoicesResponse should contain a list of voices and the default voice.
+        """
+        from app.models import VoiceInfo, VoicesResponse
+
+        voice = VoiceInfo(
+            name="en_US-lessac-medium",
+            language="en_US",
+            quality="medium",
+            size_mb=63.5,
+            installed=True,
+        )
+
+        response = VoicesResponse(
+            voices=[voice],
+            default_voice="en_US-lessac-medium",
+        )
+
+        assert len(response.voices) == 1
+        assert response.default_voice == "en_US-lessac-medium"