voice-server/app/main.py
Cal Corum a34aec06f1 Initial commit: Voice server with Piper TTS
A local HTTP service that accepts text via POST and speaks it through
system speakers using Piper TTS neural voice synthesis.

Features:
- POST /notify - Queue text for TTS playback
- GET /health - Health check with TTS/audio/queue status
- GET /voices - List installed voice models
- Async queue processing (no overlapping audio)
- Non-blocking audio via sounddevice
- 73 tests covering API contract

Tech stack:
- FastAPI + Uvicorn
- Piper TTS (neural voices, offline)
- sounddevice (PortAudio)
- Pydantic for validation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-19 00:18:12 -06:00

141 lines
3.4 KiB
Python

"""
Voice Server - Local HTTP service for text-to-speech playback.
This module provides the FastAPI application with endpoints for:
- POST /notify: Submit text for TTS playback
- GET /health: Health check endpoint
- GET /voices: List available voice models
"""
import logging
import time
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.audio_player import SounddevicePlayer
from app.config import get_settings
from app.queue_manager import TTSQueueManager
from app.tts_engine import PiperTTSEngine
logger = logging.getLogger(__name__)
# Track server start time for uptime calculation
_start_time: float = 0.0
# Global instances (initialized in lifespan)
tts_engine: PiperTTSEngine | None = None
audio_player: SounddevicePlayer | None = None
queue_manager: TTSQueueManager | None = None
@asynccontextmanager
async def lifespan(app: FastAPI):
"""
Application lifespan handler.
Handles startup and shutdown events:
- Startup: Initialize TTS engine, audio player, queue processor
- Shutdown: Stop audio playback, drain queue
"""
global _start_time, tts_engine, audio_player, queue_manager
settings = get_settings()
_start_time = time.time()
# Initialize TTS engine
logger.info("Initializing TTS engine...")
tts_engine = PiperTTSEngine(
model_dir=settings.model_dir,
default_voice=settings.default_voice,
)
# Initialize audio player
logger.info("Initializing audio player...")
audio_player = SounddevicePlayer(
default_sample_rate=tts_engine.get_sample_rate(),
)
# Initialize and start queue manager
logger.info("Starting queue manager...")
queue_manager = TTSQueueManager(
tts_engine=tts_engine,
audio_player=audio_player,
max_size=settings.queue_max_size,
request_timeout=settings.request_timeout_seconds,
)
await queue_manager.start()
logger.info("Voice server started successfully")
yield
# Shutdown cleanup
logger.info("Shutting down voice server...")
if queue_manager:
await queue_manager.stop()
if audio_player:
audio_player.stop()
logger.info("Voice server stopped")
def create_app() -> FastAPI:
"""
Create and configure the FastAPI application.
Returns a configured FastAPI instance with all routes and middleware.
"""
settings = get_settings()
app = FastAPI(
title="Voice Server",
description="Local HTTP service for text-to-speech playback using Piper TTS",
version="1.0.0",
lifespan=lifespan,
)
# Configure CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Register routes
from app.routes import router
app.include_router(router)
return app
def get_uptime_seconds() -> int:
"""Get server uptime in seconds."""
if _start_time == 0.0:
return 0
return int(time.time() - _start_time)
# Create the application instance
app = create_app()
def run():
"""Run the server using uvicorn (for CLI entry point)."""
import uvicorn
settings = get_settings()
uvicorn.run(
"app.main:app",
host=settings.host,
port=settings.port,
reload=True,
)
if __name__ == "__main__":
run()