48 lines
3.3 KiB
Python
48 lines
3.3 KiB
Python
from pydantic import BaseModel, Field, validator
|
|
from typing import List, Union, Literal, Optional
|
|
|
|
class DialogItemBase(BaseModel):
|
|
type: str
|
|
|
|
class SpeechItem(DialogItemBase):
|
|
type: Literal['speech'] = 'speech'
|
|
speaker_id: str = Field(..., description="ID of the speaker for this speech segment.")
|
|
text: str = Field(..., description="Text content to be synthesized.")
|
|
exaggeration: Optional[float] = Field(0.5, description="Controls the expressiveness of the speech. Higher values lead to more exaggerated speech. Default from Gradio.")
|
|
cfg_weight: Optional[float] = Field(0.5, description="Classifier-Free Guidance weight. Higher values make the speech more aligned with the prompt text and speaker characteristics. Default from Gradio.")
|
|
temperature: Optional[float] = Field(0.8, description="Controls randomness in generation. Lower values make speech more deterministic, higher values more varied. Default from Gradio.")
|
|
use_existing_audio: Optional[bool] = Field(False, description="If true and audio_url is provided, use the existing audio file instead of generating new audio for this line.")
|
|
audio_url: Optional[str] = Field(None, description="Path or URL to pre-generated audio for this line (used if use_existing_audio is true).")
|
|
|
|
class SilenceItem(DialogItemBase):
|
|
type: Literal['silence'] = 'silence'
|
|
duration: float = Field(..., gt=0, description="Duration of the silence in seconds.")
|
|
use_existing_audio: Optional[bool] = Field(False, description="If true and audio_url is provided, use the existing audio file for silence instead of generating a new silent segment.")
|
|
audio_url: Optional[str] = Field(None, description="Path or URL to pre-generated audio for this silence (used if use_existing_audio is true).")
|
|
|
|
class DialogRequest(BaseModel):
|
|
dialog_items: List[Union[SpeechItem, SilenceItem]] = Field(..., description="A list of speech and silence items.")
|
|
output_base_name: str = Field(..., description="Base name for the output files (e.g., 'my_dialog_v1'). Extensions will be added automatically.")
|
|
|
|
@validator('dialog_items', pre=True, each_item=True)
|
|
def check_item_type(cls, item):
|
|
if not isinstance(item, dict):
|
|
raise ValueError("Each dialog item must be a dictionary.")
|
|
item_type = item.get('type')
|
|
if item_type == 'speech':
|
|
# Pydantic will handle further validation based on SpeechItem model
|
|
return item
|
|
elif item_type == 'silence':
|
|
# Pydantic will handle further validation based on SilenceItem model
|
|
return item
|
|
raise ValueError(f"Unknown dialog item type: {item_type}. Must be 'speech' or 'silence'.")
|
|
|
|
class DialogResponse(BaseModel):
|
|
log: str = Field(description="Log of the dialog generation process.")
|
|
# For now, these URLs might be relative paths or placeholders.
|
|
# Actual serving strategy will determine the final URL format.
|
|
concatenated_audio_url: Optional[str] = Field(None, description="URL/path to the concatenated audio file.")
|
|
zip_archive_url: Optional[str] = Field(None, description="URL/path to the ZIP archive of all audio files.")
|
|
temp_dir_path: Optional[str] = Field(None, description="Path to the temporary directory holding generated files, for server-side reference.")
|
|
error_message: Optional[str] = Field(None, description="Error message if the process failed globally.")
|