""" TTS Data Models and Request/Response structures for multi-backend support """ from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import Dict, Any, Optional from pathlib import Path @dataclass class TTSParameters: """Common TTS parameters with backend-specific extensions""" temperature: float = 0.8 backend_params: Dict[str, Any] = field(default_factory=dict) @dataclass class SpeakerConfig: """Enhanced speaker configuration""" id: str name: str sample_path: str reference_text: Optional[str] = None tts_backend: str = "chatterbox" def validate(self): """Validate speaker configuration based on backend""" if self.tts_backend == "higgs" and not self.reference_text: raise ValueError(f"reference_text required for Higgs backend speaker: {self.name}") sample_path = Path(self.sample_path) if not sample_path.exists() and not sample_path.is_absolute(): # If not absolute, it might be relative to speaker data dir - will be validated later pass @dataclass class OutputConfig: """Output configuration for TTS generation""" filename_base: str output_dir: Optional[Path] = None format: str = "wav" @dataclass class TTSRequest: """Unified TTS request structure""" text: str speaker_config: SpeakerConfig parameters: TTSParameters output_config: OutputConfig @dataclass class TTSResponse: """Unified TTS response structure""" output_path: Path generated_text: Optional[str] = None audio_duration: Optional[float] = None sampling_rate: Optional[int] = None backend_used: str = ""