Clean up memory management in cbx-audiobook.py

- Use singleton pattern from TTSService for efficient model management - Remove complex manual memory cleanup code - Simplify CLI arguments by removing redundant memory management options - Load model once at start, let singleton handle efficient reuse - Remove keep-model-loaded and cleanup-interval options - Streamline generation logic to match backend service patterns 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
Add cbx-audiobook.py and import_helper.py
2025-06-27 00:01:13 -05:00 · 2025-06-26 15:04:55 -05:00 · 2025-06-26 14:56:53 -05:00
15 changed files with 1738 additions and 32 deletions
--- a/.aider.chat.history.md
+++ b/.aider.chat.history.md
--- a/.aider.input.history
+++ b/.aider.input.history
@ -0,0 +1,9 @@
+
+# 2025-06-14 18:21:08.215816
+yes
+
+# 2025-06-14 18:21:29.450580
+/model
+
+# 2025-06-14 18:22:01.292648
+/exit
--- a/.aider.tags.cache.v4/cache.db
+++ b/.aider.tags.cache.v4/cache.db
--- a/.opencode/init
+++ b/.opencode/init
--- a/.opencode/opencode.db
+++ b/.opencode/opencode.db
--- a/.opencode/opencode.db-shm
+++ b/.opencode/opencode.db-shm
--- a/.opencode/opencode.db-wal
+++ b/.opencode/opencode.db-wal
--- a/OpenCode.md
+++ b/OpenCode.md
@ -0,0 +1,36 @@
+# OpenCode.md
+
+## Build/Test Commands
+```bash
+# Backend setup and run (from project root)
+pip install -r backend/requirements.txt
+uvicorn backend.app.main:app --reload --host 0.0.0.0 --port 8000
+
+# Frontend tests
+npm test                    # Run all Jest tests
+npm test -- --testNamePattern="getSpeakers"  # Run single test
+
+# Backend API test
+python backend/run_api_test.py
+
+# Alternative interface
+python gradio_app.py
+```
+
+## Code Style Guidelines
+
+### Python (Backend)
+- **Imports**: Standard library first, third-party, then local imports with blank lines between groups
+- **Types**: Use type hints extensively (`List[Speaker]`, `Optional[str]`, `Dict[str, Any]`)
+- **Classes**: PascalCase (`SpeakerManagementService`, `DialogRequest`)
+- **Functions/Variables**: snake_case (`get_speakers`, `speaker_id`, `audio_url`)
+- **Error Handling**: Use FastAPI `HTTPException` with descriptive messages
+- **Models**: Pydantic models with Field descriptions and validators
+
+### JavaScript (Frontend)
+- **Modules**: ES6 modules with explicit imports/exports
+- **Functions**: camelCase with JSDoc comments (`getSpeakers`, `addSpeaker`)
+- **Constants**: UPPER_SNAKE_CASE (`API_BASE_URL`)
+- **Error Handling**: Comprehensive try/catch with detailed error messages
+- **Async**: Use async/await consistently, handle response.ok checks
+- **Testing**: Jest with descriptive test names and comprehensive mocking
--- a/backend/app/main.py
+++ b/backend/app/main.py
@ -4,6 +4,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from pathlib import Path
 from app.routers import speakers, dialog # Import the routers
 from app import config
+from app.services.tts_service import get_global_tts_service

 app = FastAPI(
    title="Chatterbox TTS API",
@ -37,4 +38,21 @@ config.DIALOG_GENERATED_DIR.mkdir(parents=True, exist_ok=True)
 # Mount StaticFiles to serve generated dialogs
 app.mount("/generated_audio", StaticFiles(directory=config.DIALOG_GENERATED_DIR), name="generated_audio")

+# Application lifecycle events for TTS model management
+@app.on_event("startup")
+async def startup_event():
+    """Load TTS model on application startup."""
+    print("🚀 Starting Chatterbox TTS API...")
+    tts_service = get_global_tts_service()
+    tts_service.load_model()
+    print("✅ TTS model loaded and ready!")
+
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Unload TTS model on application shutdown."""
+    print("🔄 Shutting down Chatterbox TTS API...")
+    tts_service = get_global_tts_service()
+    tts_service.unload_model()
+    print("✅ TTS model unloaded. Goodbye!")
+
 # Further endpoints for speakers, dialog generation, etc., will be added here.
--- a/backend/app/routers/dialog.py
+++ b/backend/app/routers/dialog.py
@ -4,7 +4,7 @@ import shutil
 import os

 from app.models.dialog_models import DialogRequest, DialogResponse
-from app.services.tts_service import TTSService
+from app.services.tts_service import TTSService, get_global_tts_service
 from app.services.speaker_service import SpeakerManagementService
 from app.services.dialog_processor_service import DialogProcessorService
 from app.services.audio_manipulation_service import AudioManipulationService
@ -17,8 +17,8 @@ router = APIRouter()
 # For now, direct instantiation or simple Depends is fine.

 def get_tts_service():
-    # Consider making device configurable
-    return TTSService(device="mps") 
+    # Return the global singleton instance
+    return get_global_tts_service(device="mps") 

 def get_speaker_management_service():
    return SpeakerManagementService()
@ -128,19 +128,7 @@ async def generate_line(
            detail=error_detail
        )

-async def manage_tts_model_lifecycle(tts_service: TTSService, task_function, *args, **kwargs):
-    """Loads TTS model, executes task, then unloads model."""
-    try:
-        print("API: Loading TTS model...")
-        tts_service.load_model()
-        return await task_function(*args, **kwargs)
-    except Exception as e:
-        # Log or handle specific exceptions if needed before re-raising
-        print(f"API: Error during TTS model lifecycle or task execution: {e}")
-        raise
-    finally:
-        print("API: Unloading TTS model...")
-        tts_service.unload_model()
+# Note: manage_tts_model_lifecycle function removed - model lifecycle now managed at application startup/shutdown

 async def process_dialog_flow(
    request: DialogRequest,
@ -274,10 +262,8 @@ async def generate_dialog_endpoint(
    - Concatenates all audio segments into a single file.
    - Creates a ZIP archive of all individual segments and the concatenated file.
    """
-    # Wrap the core processing logic with model loading/unloading
-    return await manage_tts_model_lifecycle(
-        tts_service, 
-        process_dialog_flow, 
+    # Model is now loaded at startup and kept loaded - no per-request lifecycle management needed
+    return await process_dialog_flow(
        request=request, 
        dialog_processor=dialog_processor, 
        audio_manipulator=audio_manipulator,
--- a/backend/app/services/tts_service.py
+++ b/backend/app/services/tts_service.py
@ -41,10 +41,22 @@ def safe_load_chatterbox_tts(device):
        return ChatterboxTTS.from_pretrained(device=device)

 class TTSService:
+    _instance = None
+    _initialized = False
+    
+    def __new__(cls, device: str = "mps"):
+        """Singleton pattern - ensures only one instance exists."""
+        if cls._instance is None:
+            cls._instance = super(TTSService, cls).__new__(cls)
+        return cls._instance
+    
    def __init__(self, device: str = "mps"): # Default to MPS for Macs, can be "cpu" or "cuda"
-        self.device = device
-        self.model = None
-        self._ensure_output_dir_exists()
+        # Only initialize once to prevent resetting the model
+        if not self._initialized:
+            self.device = device
+            self.model = None
+            self._ensure_output_dir_exists()
+            TTSService._initialized = True

    def _ensure_output_dir_exists(self):
        """Ensures the TTS output directory exists."""
@ -62,12 +74,12 @@ class TTSService:
                # Potentially raise an exception or handle appropriately
                raise
        else:
-            print("ChatterboxTTS model already loaded.")
+            print("[Singleton] ChatterboxTTS model already loaded.")

    def unload_model(self):
        """Unloads the model and clears memory."""
        if self.model is not None:
-            print("Unloading ChatterboxTTS model and clearing cache...")
+            print("[Singleton] Unloading ChatterboxTTS model and clearing cache...")
            del self.model
            self.model = None
            if self.device == "cuda":
@ -76,7 +88,9 @@ class TTSService:
                if hasattr(torch.mps, "empty_cache"): # Check if empty_cache is available for MPS
                    torch.mps.empty_cache()
            gc.collect() # Explicitly run garbage collection
-            print("Model unloaded and memory cleared.")
+            print("[Singleton] Model unloaded and memory cleared.")
+        else:
+            print("[Singleton] Model was not loaded, nothing to unload.")

    async def generate_speech(
        self,
@ -94,10 +108,7 @@ class TTSService:
        Saves the output to a .wav file.
        """
        if self.model is None:
-            self.load_model()
-        
-        if self.model is None: # Check again if loading failed
-            raise RuntimeError("TTS model is not loaded. Cannot generate speech.")
+            raise RuntimeError("TTS model is not loaded. Model should be loaded at application startup.")

        # Ensure speaker_sample_path is valid
        speaker_sample_p = Path(speaker_sample_path)
@ -130,10 +141,20 @@ class TTSService:
            # For now, we keep it loaded. Memory management might need refinement.
            pass

+# Global singleton instance access
+_global_tts_service = None
+
+def get_global_tts_service(device: str = "mps") -> TTSService:
+    """Get the global singleton TTS service instance."""
+    global _global_tts_service
+    if _global_tts_service is None:
+        _global_tts_service = TTSService(device=device)
+    return _global_tts_service
+
 # Example usage (for testing, not part of the service itself)
 if __name__ == "__main__":
    async def main_test():
-        tts_service = TTSService(device="mps")
+        tts_service = get_global_tts_service(device="mps")
        try:
            tts_service.load_model()
            
--- a/cbx-audiobook.py
+++ b/cbx-audiobook.py
@ -0,0 +1,404 @@
+#!/usr/bin/env python
+"""
+Chatterbox Audiobook Generator
+
+This script converts a text file into an audiobook using the Chatterbox TTS system.
+It parses the text file into manageable chunks, generates audio for each chunk,
+and assembles them into a complete audiobook.
+"""
+
+import argparse
+import asyncio
+import gc
+import os
+import re
+import subprocess
+import sys
+import torch
+from pathlib import Path
+import uuid
+
+# Import helper to fix Python path
+import import_helper
+
+# Import backend services
+from backend.app.services.tts_service import TTSService
+from backend.app.services.speaker_service import SpeakerManagementService
+from backend.app.services.audio_manipulation_service import AudioManipulationService
+from backend.app.config import DIALOG_GENERATED_DIR, TTS_TEMP_OUTPUT_DIR
+
+class AudiobookGenerator:
+    def __init__(self, speaker_id, output_base_name, device="mps", 
+                 exaggeration=0.5, cfg_weight=0.5, temperature=0.8,
+                 pause_between_sentences=0.5, pause_between_paragraphs=1.0,
+                 use_subprocess=False):
+        """
+        Initialize the audiobook generator.
+        
+        Args:
+            speaker_id: ID of the speaker to use
+            output_base_name: Base name for output files
+            device: Device to use for TTS (mps, cuda, cpu)
+            exaggeration: Controls expressiveness (0.0-1.0)
+            cfg_weight: Controls alignment with speaker characteristics (0.0-1.0)
+            temperature: Controls randomness in generation (0.0-1.0)
+            pause_between_sentences: Pause duration between sentences in seconds
+            pause_between_paragraphs: Pause duration between paragraphs in seconds
+            use_subprocess: If True, uses separate processes for each chunk (slower but guarantees memory release)
+        """
+        self.speaker_id = speaker_id
+        self.output_base_name = output_base_name
+        self.device = device
+        self.exaggeration = exaggeration
+        self.cfg_weight = cfg_weight
+        self.temperature = temperature
+        self.pause_between_sentences = pause_between_sentences
+        self.pause_between_paragraphs = pause_between_paragraphs
+        self.use_subprocess = use_subprocess
+        
+        # Initialize services
+        self.tts_service = TTSService(device=device)
+        self.speaker_service = SpeakerManagementService()
+        self.audio_manipulator = AudioManipulationService()
+        
+        # Create output directories
+        self.output_dir = DIALOG_GENERATED_DIR / output_base_name
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self.temp_dir = TTS_TEMP_OUTPUT_DIR / output_base_name
+        self.temp_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Validate speaker
+        self._validate_speaker()
+    
+    def _validate_speaker(self):
+        """Validate that the specified speaker exists."""
+        speaker_info = self.speaker_service.get_speaker_by_id(self.speaker_id)
+        if not speaker_info:
+            raise ValueError(f"Speaker ID '{self.speaker_id}' not found.")
+        if not speaker_info.sample_path:
+            raise ValueError(f"Speaker ID '{self.speaker_id}' has no sample path defined.")
+        
+        # Store speaker info for later use
+        self.speaker_info = speaker_info
+    
+    
+    async def _generate_chunk_subprocess(self, chunk, segment_filename_base, speaker_sample_path):
+        """
+        Generate a single chunk using cbx-generate.py in a subprocess.
+        This guarantees memory is released when the process exits.
+        """
+        output_file = self.temp_dir / f"{segment_filename_base}.wav"
+        
+        # Use cbx-generate.py for single chunk generation
+        cmd = [
+            sys.executable, "cbx-generate.py",
+            "--sample", str(speaker_sample_path),
+            "--output", str(output_file),
+            "--text", chunk,
+            "--device", self.device
+        ]
+        
+        print(f"Running subprocess: {' '.join(cmd[:4])} ... (text truncated)")
+        
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=300,  # 5 minute timeout per chunk
+                cwd=Path(__file__).parent  # Run from project root
+            )
+            
+            if result.returncode != 0:
+                raise RuntimeError(f"Subprocess failed: {result.stderr}")
+            
+            if not output_file.exists():
+                raise RuntimeError(f"Output file not created: {output_file}")
+            
+            print(f"Subprocess completed successfully: {output_file}")
+            return output_file
+            
+        except subprocess.TimeoutExpired:
+            raise RuntimeError(f"Subprocess timed out after 5 minutes")
+        except Exception as e:
+            raise RuntimeError(f"Subprocess error: {e}")
+    
+    def split_text_into_chunks(self, text, max_length=300):
+        """
+        Split text into chunks suitable for TTS processing.
+        
+        This uses the same logic as the DialogProcessorService._split_text method
+        but adds additional paragraph handling.
+        """
+        # Split text into paragraphs first
+        paragraphs = re.split(r'\n\s*\n', text)
+        paragraphs = [p.strip() for p in paragraphs if p.strip()]
+        
+        all_chunks = []
+        
+        for paragraph in paragraphs:
+            # Split paragraph into sentences
+            sentences = re.split(r'(?<=[.!?\u2026])\s+|(?<=[.!?\u2026])(?=[\"\')\]\}\u201d\u2019])|(?<=[.!?\u2026])$', paragraph.strip())
+            sentences = [s.strip() for s in sentences if s and s.strip()]
+            
+            chunks = []
+            current_chunk = ""
+            
+            for sentence in sentences:
+                if not sentence:
+                    continue
+                if not current_chunk:  # First sentence for this chunk
+                    current_chunk = sentence
+                elif len(current_chunk) + len(sentence) + 1 <= max_length:
+                    current_chunk += " " + sentence
+                else:
+                    chunks.append(current_chunk)
+                    current_chunk = sentence
+            
+            if current_chunk:  # Add the last chunk
+                chunks.append(current_chunk)
+            
+            # Further split any chunks that are still too long
+            paragraph_chunks = []
+            for chunk in chunks:
+                if len(chunk) > max_length:
+                    # Simple split by length if a sentence itself is too long
+                    for i in range(0, len(chunk), max_length):
+                        paragraph_chunks.append(chunk[i:i+max_length])
+                else:
+                    paragraph_chunks.append(chunk)
+            
+            # Add paragraph marker
+            if paragraph_chunks:
+                all_chunks.append({"type": "paragraph", "chunks": paragraph_chunks})
+        
+        return all_chunks
+    
+    async def generate_audiobook(self, text_file_path):
+        """
+        Generate an audiobook from a text file.
+        
+        Args:
+            text_file_path: Path to the text file to convert
+        
+        Returns:
+            Path to the generated audiobook file
+        """
+        # Read the text file
+        text_path = Path(text_file_path)
+        if not text_path.exists():
+            raise FileNotFoundError(f"Text file not found: {text_file_path}")
+        
+        with open(text_path, 'r', encoding='utf-8') as f:
+            text = f.read()
+        
+        print(f"Processing text file: {text_file_path}")
+        print(f"Text length: {len(text)} characters")
+        
+        # Split text into chunks
+        paragraphs = self.split_text_into_chunks(text)
+        total_chunks = sum(len(p["chunks"]) for p in paragraphs)
+        print(f"Split into {len(paragraphs)} paragraphs with {total_chunks} total chunks")
+        
+        # Generate audio for each chunk
+        segment_results = []
+        chunk_count = 0
+        
+        # Load model once at the start (singleton will handle reuse)
+        print("Loading TTS model...")
+        self.tts_service.load_model()
+        
+        try:
+            for para_idx, paragraph in enumerate(paragraphs):
+                print(f"Processing paragraph {para_idx+1}/{len(paragraphs)}")
+                
+                for chunk_idx, chunk in enumerate(paragraph["chunks"]):
+                    chunk_count += 1
+                    print(f"  Generating audio for chunk {chunk_count}/{total_chunks}: {chunk[:50]}...")
+                    
+                    # Generate unique filename for this chunk
+                    segment_filename_base = f"{self.output_base_name}_p{para_idx}_c{chunk_idx}_{uuid.uuid4().hex[:8]}"
+                    
+                    try:
+                        # Get absolute speaker sample path
+                        speaker_sample_path = Path(self.speaker_info.sample_path)
+                        if not speaker_sample_path.is_absolute():
+                            from backend.app.config import SPEAKER_DATA_BASE_DIR
+                            speaker_sample_path = SPEAKER_DATA_BASE_DIR / speaker_sample_path
+                        
+                        # Generate speech for this chunk
+                        if self.use_subprocess:
+                            # Use subprocess for guaranteed memory release
+                            segment_output_path = await self._generate_chunk_subprocess(
+                                chunk=chunk,
+                                segment_filename_base=segment_filename_base,
+                                speaker_sample_path=speaker_sample_path
+                            )
+                        else:
+                            # Generate speech using the TTS service (model already loaded)
+                            segment_output_path = await self.tts_service.generate_speech(
+                                text=chunk,
+                                speaker_id=self.speaker_id,
+                                speaker_sample_path=str(speaker_sample_path),
+                                output_filename_base=segment_filename_base,
+                                output_dir=self.temp_dir,
+                                exaggeration=self.exaggeration,
+                                cfg_weight=self.cfg_weight,
+                                temperature=self.temperature
+                            )
+                        
+                        # Add to segment results
+                        segment_results.append({
+                            "type": "speech",
+                            "path": str(segment_output_path)
+                        })
+                        
+                        # Add pause between sentences
+                        if chunk_idx < len(paragraph["chunks"]) - 1:
+                            segment_results.append({
+                                "type": "silence",
+                                "duration": self.pause_between_sentences
+                            })
+                    
+                    except Exception as e:
+                        print(f"Error generating speech for chunk: {e}")
+                        # Continue with next chunk
+            
+                # Add longer pause between paragraphs
+                if para_idx < len(paragraphs) - 1:
+                    segment_results.append({
+                        "type": "silence",
+                        "duration": self.pause_between_paragraphs
+                    })
+        
+        finally:
+            # Optionally unload model at the end (singleton manages this efficiently)
+            if not self.use_subprocess:
+                print("Unloading TTS model...")
+                self.tts_service.unload_model()
+        
+        # Concatenate all segments
+        print("Concatenating audio segments...")
+        concatenated_filename = f"{self.output_base_name}_audiobook.wav"
+        concatenated_path = self.output_dir / concatenated_filename
+        
+        self.audio_manipulator.concatenate_audio_segments(
+            segment_results=segment_results,
+            output_concatenated_path=concatenated_path
+        )
+        
+        # Create ZIP archive with all files
+        print("Creating ZIP archive...")
+        zip_filename = f"{self.output_base_name}_audiobook.zip"
+        zip_path = self.output_dir / zip_filename
+        
+        # Collect all speech segment files
+        speech_segment_paths = [
+            Path(s["path"]) for s in segment_results 
+            if s["type"] == "speech" and Path(s["path"]).exists()
+        ]
+        
+        self.audio_manipulator.create_zip_archive(
+            segment_file_paths=speech_segment_paths,
+            concatenated_audio_path=concatenated_path,
+            output_zip_path=zip_path
+        )
+        
+        print(f"Audiobook generation complete!")
+        print(f"Audiobook file: {concatenated_path}")
+        print(f"ZIP archive: {zip_path}")
+        
+        return concatenated_path
+
+async def main():
+    parser = argparse.ArgumentParser(description="Generate an audiobook from a text file using Chatterbox TTS")
+    
+    # Create a mutually exclusive group for the main operation vs listing speakers
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument("--list-speakers", action="store_true", help="List available speakers and exit")
+    group.add_argument("text_file", nargs="?", help="Path to the text file to convert")
+    
+    # Other arguments
+    parser.add_argument("--speaker", "-s", help="ID of the speaker to use")
+    parser.add_argument("--output", "-o", help="Base name for output files (default: derived from text filename)")
+    parser.add_argument("--device", default="mps", choices=["mps", "cuda", "cpu"], help="Device to use for TTS (default: mps)")
+    parser.add_argument("--exaggeration", type=float, default=0.5, help="Controls expressiveness (0.0-1.0, default: 0.5)")
+    parser.add_argument("--cfg-weight", type=float, default=0.5, help="Controls alignment with speaker (0.0-1.0, default: 0.5)")
+    parser.add_argument("--temperature", type=float, default=0.8, help="Controls randomness (0.0-1.0, default: 0.8)")
+    parser.add_argument("--sentence-pause", type=float, default=0.5, help="Pause between sentences in seconds (default: 0.5)")
+    parser.add_argument("--paragraph-pause", type=float, default=1.0, help="Pause between paragraphs in seconds (default: 1.0)")
+    parser.add_argument("--force-cpu-on-oom", action="store_true", help="Automatically switch to CPU if MPS/CUDA runs out of memory")
+    parser.add_argument("--max-chunk-length", type=int, default=300, help="Maximum chunk length for text splitting (default: 300)")
+    parser.add_argument("--use-subprocess", action="store_true", help="Use separate processes for each chunk (slower but reduces memory usage)")
+    
+    args = parser.parse_args()
+    
+    # List speakers if requested
+    if args.list_speakers:
+        speaker_service = SpeakerManagementService()
+        speakers = speaker_service.get_speakers()
+        print("Available speakers:")
+        for speaker in speakers:
+            print(f"  {speaker.id}: {speaker.name}")
+        return
+    
+    # Validate required arguments for audiobook generation
+    if not args.text_file:
+        parser.error("text_file is required when not using --list-speakers")
+    
+    if not args.speaker:
+        parser.error("--speaker/-s is required when not using --list-speakers")
+    
+    # Determine output base name if not provided
+    if not args.output:
+        text_path = Path(args.text_file)
+        args.output = text_path.stem
+    
+    try:
+        # Create audiobook generator
+        generator = AudiobookGenerator(
+            speaker_id=args.speaker,
+            output_base_name=args.output,
+            device=args.device,
+            exaggeration=args.exaggeration,
+            cfg_weight=args.cfg_weight,
+            temperature=args.temperature,
+            pause_between_sentences=args.sentence_pause,
+            pause_between_paragraphs=args.paragraph_pause,
+            use_subprocess=args.use_subprocess
+        )
+        
+        # Generate audiobook with automatic fallback
+        try:
+            await generator.generate_audiobook(args.text_file)
+        except (RuntimeError, torch.OutOfMemoryError) as e:
+            if args.force_cpu_on_oom and "out of memory" in str(e).lower() and args.device != "cpu":
+                print(f"\n⚠️  {args.device.upper()} out of memory: {e}")
+                print("🔄 Automatically switching to CPU and retrying...")
+                
+                # Create new generator with CPU
+                generator = AudiobookGenerator(
+                    speaker_id=args.speaker,
+                    output_base_name=args.output,
+                    device="cpu",
+                    exaggeration=args.exaggeration,
+                    cfg_weight=args.cfg_weight,
+                    temperature=args.temperature,
+                    pause_between_sentences=args.sentence_pause,
+                    pause_between_paragraphs=args.paragraph_pause,
+                    use_subprocess=args.use_subprocess
+                )
+                
+                await generator.generate_audiobook(args.text_file)
+                print("✅ Successfully completed using CPU fallback!")
+            else:
+                raise
+        
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        return 1
+    
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(asyncio.run(main()))
--- a/import_helper.py
+++ b/import_helper.py
@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+"""
+Import helper module for CLI scripts that need to import backend services.
+This ensures the Python path is set up correctly to import from the backend directory.
+"""
+
+import sys
+from pathlib import Path
+
+# Add the project root to the Python path
+PROJECT_ROOT = Path(__file__).parent.resolve()
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+# Add the backend directory to the Python path for app.* imports
+BACKEND_ROOT = PROJECT_ROOT / "backend"
+if str(BACKEND_ROOT) not in sys.path:
+    sys.path.insert(0, str(BACKEND_ROOT))
+
+# Verify that we can import from backend
+try:
+    from backend.app.config import PROJECT_ROOT as CONFIG_PROJECT_ROOT
+    from app.services.tts_service import TTSService
+    from app.services.speaker_service import SpeakerManagementService
+except ImportError as e:
+    print(f"Warning: Could not import backend services: {e}")
+    print(f"Make sure you're running from the project root directory: {PROJECT_ROOT}")
+    print(f"Backend directory: {BACKEND_ROOT}")
--- a/speaker_data/speakers.yaml
+++ b/speaker_data/speakers.yaml
@ -28,3 +28,15 @@ dd3552d9-f4e8-49ed-9892-f9e67afcf23c:
 2cdd6d3d-c533-44bf-a5f6-cc83bd089d32:
  name: Grace
  sample_path: speaker_samples/2cdd6d3d-c533-44bf-a5f6-cc83bd089d32.wav
+fdbfa71b-7647-4574-a1c0-31350348b434:
+  name: Elthea
+  sample_path: speaker_samples/fdbfa71b-7647-4574-a1c0-31350348b434.wav
+44cfc6c1-78ec-4278-920a-8ad067cd1eba:
+  name: Eddie
+  sample_path: speaker_samples/44cfc6c1-78ec-4278-920a-8ad067cd1eba.wav
+a25c52cc-ad56-46d2-9209-62fa7aebb150:
+  name: Charlotte
+  sample_path: speaker_samples/a25c52cc-ad56-46d2-9209-62fa7aebb150.wav
+aeb43113-586c-4ab8-86e6-3b26737b9816:
+  name: Announcer1
+  sample_path: speaker_samples/aeb43113-586c-4ab8-86e6-3b26737b9816.wav
--- a/start_servers.py
+++ b/start_servers.py
@ -1,4 +1,4 @@
-#!/Users/stwhite/CODE/chatterbox-ui/.venv/bin/python
+#!/Volumes/SAM2/CODE/chatterbox-test/.venv/bin/python
 """
 Startup script that launches both the backend and frontend servers concurrently.
 """