import argparse import torchaudio as ta from chatterbox.tts import ChatterboxTTS def main(): parser = argparse.ArgumentParser(description="Chatterbox TTS audio generation") parser.add_argument('--sample', required=True, type=str, help='Prompt/reference audio file (e.g. .wav, .mp3) for the voice') parser.add_argument('--output', required=True, type=str, help='Output audio file path (should end with .wav)') parser.add_argument('--text', required=True, type=str, help='Text to synthesize') args = parser.parse_args() # Load model on MPS (for Apple Silicon) model = ChatterboxTTS.from_pretrained(device="mps") # Generate the audio wav = model.generate(args.text, audio_prompt_path=args.sample) # Save to output .wav ta.save(args.output, wav, model.sr) print(f"Generated audio saved to {args.output}") if __name__ == '__main__': main()