chatterbox-ui/test.py

import torch
import torchaudio as ta
from chatterbox.tts import ChatterboxTTS

# Detect device (Mac with M1/M2/M3/M4)
device = "mps" if torch.backends.mps.is_available() else "cpu"

def safe_load_chatterbox_tts(device="mps"):
    """
    Safely load ChatterboxTTS model with proper device mapping.
    Handles cases where model was saved on CUDA but needs to be loaded on MPS/CPU.
    """
    # Store original torch.load function
    original_torch_load = torch.load

    def patched_torch_load(f, map_location=None, **kwargs):
        # If no map_location is specified and we're loading on non-CUDA device,
        # map CUDA tensors to the target device
        if map_location is None:
            if device == "mps" and torch.backends.mps.is_available():
                map_location = torch.device("mps")
            elif device == "cpu" or not torch.cuda.is_available():
                map_location = torch.device("cpu")
            else:
                map_location = torch.device(device)

        return original_torch_load(f, map_location=map_location, **kwargs)

    # Temporarily patch torch.load
    torch.load = patched_torch_load

    try:
        # Load the model with the patched torch.load
        model = ChatterboxTTS.from_pretrained(device=device)
        return model
    finally:
        # Restore original torch.load
        torch.load = original_torch_load

model = safe_load_chatterbox_tts(device=device)
text = "Today is the day. I want to move like a titan at dawn, sweat like a god forging lightning. No more excuses. From now on, my mornings will be temples of discipline. I am going to work out like the gods… every damn day."

# If you want to synthesize with a different voice, specify the audio prompt
AUDIO_PROMPT_PATH = "YOUR_FILE.wav"
wav = model.generate(
    text,
    audio_prompt_path=AUDIO_PROMPT_PATH,
    exaggeration=2.0,
    cfg_weight=0.5
    )
ta.save("test-2.wav", wav, model.sr)