23 lines
891 B
Python
Executable File
23 lines
891 B
Python
Executable File
import argparse
|
|
import torchaudio as ta
|
|
from chatterbox.tts import ChatterboxTTS
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Chatterbox TTS audio generation")
|
|
parser.add_argument('--sample', required=True, type=str, help='Prompt/reference audio file (e.g. .wav, .mp3) for the voice')
|
|
parser.add_argument('--output', required=True, type=str, help='Output audio file path (should end with .wav)')
|
|
parser.add_argument('--text', required=True, type=str, help='Text to synthesize')
|
|
args = parser.parse_args()
|
|
|
|
# Load model on MPS (for Apple Silicon)
|
|
model = ChatterboxTTS.from_pretrained(device="mps")
|
|
|
|
# Generate the audio
|
|
wav = model.generate(args.text, audio_prompt_path=args.sample)
|
|
# Save to output .wav
|
|
ta.save(args.output, wav, model.sr)
|
|
print(f"Generated audio saved to {args.output}")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|