voice-server/app/models.py
Cal Corum a34aec06f1 Initial commit: Voice server with Piper TTS
A local HTTP service that accepts text via POST and speaks it through
system speakers using Piper TTS neural voice synthesis.

Features:
- POST /notify - Queue text for TTS playback
- GET /health - Health check with TTS/audio/queue status
- GET /voices - List installed voice models
- Async queue processing (no overlapping audio)
- Non-blocking audio via sounddevice
- 73 tests covering API contract

Tech stack:
- FastAPI + Uvicorn
- Piper TTS (neural voices, offline)
- sounddevice (PortAudio)
- Pydantic for validation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-19 00:18:12 -06:00

163 lines
5.1 KiB
Python

"""
Pydantic models for voice-server request/response validation.
This module defines the API contract for all endpoints:
- NotifyRequest/NotifyResponse: POST /notify
- HealthResponse: GET /health
- VoicesResponse: GET /voices
- ErrorResponse: Error responses
"""
from datetime import datetime, timezone
from typing import Annotated
from pydantic import BaseModel, Field, field_validator
class NotifyRequest(BaseModel):
"""
Request model for POST /notify endpoint.
Validates incoming TTS requests with message content and optional parameters.
"""
message: Annotated[
str,
Field(
min_length=1,
max_length=10000,
description="Text to convert to speech (1-10000 characters)",
),
]
voice: Annotated[
str,
Field(
default="en_US-lessac-medium",
pattern=r"^[\w-]+$",
description="Piper voice model name",
),
]
rate: Annotated[
int,
Field(
default=170,
ge=50,
le=400,
description="Speech rate in words per minute (50-400)",
),
]
voice_enabled: Annotated[
bool,
Field(
default=True,
description="Enable/disable TTS playback (for debugging)",
),
]
@field_validator("message", mode="before")
@classmethod
def strip_message_whitespace(cls, v: str) -> str:
"""Strip leading and trailing whitespace from message."""
if isinstance(v, str):
return v.strip()
return v
class NotifyResponse(BaseModel):
"""
Response model for successful POST /notify requests.
Returned when a TTS request is successfully queued for processing.
"""
status: Annotated[str, Field(description="Request status (e.g., 'queued')")]
message_length: Annotated[int, Field(description="Length of the message in characters")]
queue_position: Annotated[int, Field(description="Position in the TTS queue")]
voice_model: Annotated[str, Field(description="Voice model being used")]
estimated_duration: Annotated[
float | None,
Field(default=None, description="Estimated playback duration in seconds"),
]
class QueueStatus(BaseModel):
"""Queue status information for health checks."""
size: Annotated[int, Field(description="Current number of items in queue")]
capacity: Annotated[int, Field(description="Maximum queue capacity")]
utilization: Annotated[float, Field(description="Queue utilization percentage")]
class HealthResponse(BaseModel):
"""
Response model for GET /health endpoint.
Provides comprehensive health status including TTS engine, audio, and queue status.
"""
status: Annotated[str, Field(description="Overall health status ('healthy' or 'unhealthy')")]
uptime_seconds: Annotated[int, Field(description="Server uptime in seconds")]
queue: Annotated[QueueStatus, Field(description="Queue status information")]
tts_engine: Annotated[str, Field(description="TTS engine name")]
audio_output: Annotated[str, Field(description="Audio output status")]
voice_models_loaded: Annotated[
list[str] | None,
Field(default=None, description="List of loaded voice models"),
]
total_requests: Annotated[
int | None,
Field(default=None, description="Total requests processed"),
]
failed_requests: Annotated[
int | None,
Field(default=None, description="Number of failed requests"),
]
errors: Annotated[
list[str] | None,
Field(default=None, description="List of error messages if unhealthy"),
]
timestamp: Annotated[
datetime,
Field(default_factory=lambda: datetime.now(timezone.utc), description="Response timestamp"),
]
class ErrorResponse(BaseModel):
"""
Response model for error conditions.
Used for 4xx and 5xx error responses with consistent structure.
"""
error: Annotated[str, Field(description="Error type identifier")]
detail: Annotated[str, Field(description="Human-readable error description")]
timestamp: Annotated[
datetime,
Field(default_factory=lambda: datetime.now(timezone.utc), description="Error timestamp"),
]
queue_size: Annotated[
int | None,
Field(default=None, description="Current queue size (for queue_full errors)"),
]
class VoiceInfo(BaseModel):
"""Information about a single voice model."""
name: Annotated[str, Field(description="Voice model name")]
language: Annotated[str, Field(description="Language code (e.g., 'en_US')")]
quality: Annotated[str, Field(description="Quality level (low, medium, high)")]
size_mb: Annotated[float, Field(description="Model size in megabytes")]
installed: Annotated[bool, Field(description="Whether the model is installed locally")]
class VoicesResponse(BaseModel):
"""
Response model for GET /voices endpoint.
Lists available voice models and the default voice.
"""
voices: Annotated[list[VoiceInfo], Field(description="List of available voices")]
default_voice: Annotated[str, Field(description="Default voice model name")]