chatterbox-ui/backend/app/models/tts_models.py

56 lines
1.7 KiB
Python

"""
TTS Data Models and Request/Response structures for multi-backend support
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Dict, Any, Optional
from pathlib import Path
@dataclass
class TTSParameters:
"""Common TTS parameters with backend-specific extensions"""
temperature: float = 0.8
backend_params: Dict[str, Any] = field(default_factory=dict)
@dataclass
class SpeakerConfig:
"""Enhanced speaker configuration"""
id: str
name: str
sample_path: str
reference_text: Optional[str] = None
tts_backend: str = "chatterbox"
def validate(self):
"""Validate speaker configuration based on backend"""
if self.tts_backend == "higgs" and not self.reference_text:
raise ValueError(f"reference_text required for Higgs backend speaker: {self.name}")
sample_path = Path(self.sample_path)
if not sample_path.exists() and not sample_path.is_absolute():
# If not absolute, it might be relative to speaker data dir - will be validated later
pass
@dataclass
class OutputConfig:
"""Output configuration for TTS generation"""
filename_base: str
output_dir: Optional[Path] = None
format: str = "wav"
@dataclass
class TTSRequest:
"""Unified TTS request structure"""
text: str
speaker_config: SpeakerConfig
parameters: TTSParameters
output_config: OutputConfig
@dataclass
class TTSResponse:
"""Unified TTS response structure"""
output_path: Path
generated_text: Optional[str] = None
audio_duration: Optional[float] = None
sampling_rate: Optional[int] = None
backend_used: str = ""