diff --git a/cbx-audiobook.py b/cbx-audiobook.py index 94c95c3..486bc05 100755 --- a/cbx-audiobook.py +++ b/cbx-audiobook.py @@ -31,7 +31,7 @@ class AudiobookGenerator: def __init__(self, speaker_id, output_base_name, device="mps", exaggeration=0.5, cfg_weight=0.5, temperature=0.8, pause_between_sentences=0.5, pause_between_paragraphs=1.0, - keep_model_loaded=False, cleanup_interval=10, use_subprocess=False): + use_subprocess=False): """ Initialize the audiobook generator. @@ -44,8 +44,6 @@ class AudiobookGenerator: temperature: Controls randomness in generation (0.0-1.0) pause_between_sentences: Pause duration between sentences in seconds pause_between_paragraphs: Pause duration between paragraphs in seconds - keep_model_loaded: If True, keeps model loaded across chunks (more efficient but uses more memory) - cleanup_interval: How often to perform deep cleanup when keep_model_loaded=True use_subprocess: If True, uses separate processes for each chunk (slower but guarantees memory release) """ self.speaker_id = speaker_id @@ -56,10 +54,7 @@ class AudiobookGenerator: self.temperature = temperature self.pause_between_sentences = pause_between_sentences self.pause_between_paragraphs = pause_between_paragraphs - self.keep_model_loaded = keep_model_loaded - self.cleanup_interval = cleanup_interval self.use_subprocess = use_subprocess - self.chunk_counter = 0 # Initialize services self.tts_service = TTSService(device=device) @@ -86,47 +81,6 @@ class AudiobookGenerator: # Store speaker info for later use self.speaker_info = speaker_info - def _cleanup_memory(self): - """Force memory cleanup and garbage collection.""" - print("Performing memory cleanup...") - - # Force garbage collection multiple times for thorough cleanup - for _ in range(3): - gc.collect() - - # Clear device-specific caches - if self.device == "cuda" and torch.cuda.is_available(): - torch.cuda.empty_cache() - torch.cuda.synchronize() - # Additional CUDA cleanup - try: - torch.cuda.reset_peak_memory_stats() - except: - pass - elif self.device == "mps" and torch.backends.mps.is_available(): - if hasattr(torch.mps, "empty_cache"): - torch.mps.empty_cache() - if hasattr(torch.mps, "synchronize"): - torch.mps.synchronize() - # Try to free MPS memory more aggressively - try: - import os - # This forces MPS to release memory back to the system - if hasattr(torch.mps, "set_per_process_memory_fraction"): - current_allocated = torch.mps.current_allocated_memory() if hasattr(torch.mps, "current_allocated_memory") else 0 - if current_allocated > 0: - torch.mps.empty_cache() - except: - pass - - # Additional aggressive cleanup - if hasattr(torch, '_C') and hasattr(torch._C, '_cuda_clearCublasWorkspaces'): - try: - torch._C._cuda_clearCublasWorkspaces() - except: - pass - - print("Memory cleanup completed.") async def _generate_chunk_subprocess(self, chunk, segment_filename_base, speaker_sample_path): """ @@ -250,10 +204,9 @@ class AudiobookGenerator: segment_results = [] chunk_count = 0 - # Pre-load model if keeping it loaded - if self.keep_model_loaded: - print("Pre-loading TTS model for batch processing...") - self.tts_service.load_model() + # Load model once at the start (singleton will handle reuse) + print("Loading TTS model...") + self.tts_service.load_model() try: for para_idx, paragraph in enumerate(paragraphs): @@ -261,7 +214,6 @@ class AudiobookGenerator: for chunk_idx, chunk in enumerate(paragraph["chunks"]): chunk_count += 1 - self.chunk_counter += 1 print(f" Generating audio for chunk {chunk_count}/{total_chunks}: {chunk[:50]}...") # Generate unique filename for this chunk @@ -283,12 +235,7 @@ class AudiobookGenerator: speaker_sample_path=speaker_sample_path ) else: - # Load model for this chunk (if not keeping loaded) - if not self.keep_model_loaded: - print("Loading TTS model...") - self.tts_service.load_model() - - # Generate speech using the TTS service + # Generate speech using the TTS service (model already loaded) segment_output_path = await self.tts_service.generate_speech( text=chunk, speaker_id=self.speaker_id, @@ -300,26 +247,6 @@ class AudiobookGenerator: temperature=self.temperature ) - # Memory management strategy based on model lifecycle - if self.use_subprocess: - # No memory management needed - subprocess handles it - pass - elif self.keep_model_loaded: - # Light cleanup after each chunk - if self.chunk_counter % self.cleanup_interval == 0: - print(f"Performing periodic deep cleanup (chunk {self.chunk_counter})") - self._cleanup_memory() - else: - # Explicit memory cleanup after generation - self._cleanup_memory() - - # Unload model after generation - print("Unloading TTS model...") - self.tts_service.unload_model() - - # Additional memory cleanup after model unload - self._cleanup_memory() - # Add to segment results segment_results.append({ "type": "speech", @@ -335,13 +262,6 @@ class AudiobookGenerator: except Exception as e: print(f"Error generating speech for chunk: {e}") - # Ensure model is unloaded if there was an error and not using subprocess - if not self.use_subprocess: - if not self.keep_model_loaded and self.tts_service.model is not None: - print("Unloading TTS model after error...") - self.tts_service.unload_model() - # Force cleanup after error - self._cleanup_memory() # Continue with next chunk # Add longer pause between paragraphs @@ -352,11 +272,10 @@ class AudiobookGenerator: }) finally: - # Always unload model at the end if it was kept loaded - if self.keep_model_loaded and self.tts_service.model is not None: - print("Final cleanup: Unloading TTS model...") + # Optionally unload model at the end (singleton manages this efficiently) + if not self.use_subprocess: + print("Unloading TTS model...") self.tts_service.unload_model() - self._cleanup_memory() # Concatenate all segments print("Concatenating audio segments...") @@ -389,11 +308,6 @@ class AudiobookGenerator: print(f"Audiobook file: {concatenated_path}") print(f"ZIP archive: {zip_path}") - # Ensure model is unloaded at the end (just in case) - if self.tts_service.model is not None: - print("Final check: Unloading TTS model...") - self.tts_service.unload_model() - return concatenated_path async def main(): @@ -413,11 +327,9 @@ async def main(): parser.add_argument("--temperature", type=float, default=0.8, help="Controls randomness (0.0-1.0, default: 0.8)") parser.add_argument("--sentence-pause", type=float, default=0.5, help="Pause between sentences in seconds (default: 0.5)") parser.add_argument("--paragraph-pause", type=float, default=1.0, help="Pause between paragraphs in seconds (default: 1.0)") - parser.add_argument("--keep-model-loaded", action="store_true", help="Keep model loaded between chunks (faster but uses more memory)") - parser.add_argument("--cleanup-interval", type=int, default=10, help="How often to perform deep cleanup when keeping model loaded (default: 10)") parser.add_argument("--force-cpu-on-oom", action="store_true", help="Automatically switch to CPU if MPS/CUDA runs out of memory") parser.add_argument("--max-chunk-length", type=int, default=300, help="Maximum chunk length for text splitting (default: 300)") - parser.add_argument("--use-subprocess", action="store_true", help="Use separate processes for each chunk (guarantees memory release but slower)") + parser.add_argument("--use-subprocess", action="store_true", help="Use separate processes for each chunk (slower but reduces memory usage)") args = parser.parse_args() @@ -453,8 +365,6 @@ async def main(): temperature=args.temperature, pause_between_sentences=args.sentence_pause, pause_between_paragraphs=args.paragraph_pause, - keep_model_loaded=args.keep_model_loaded, - cleanup_interval=args.cleanup_interval, use_subprocess=args.use_subprocess ) @@ -476,8 +386,6 @@ async def main(): temperature=args.temperature, pause_between_sentences=args.sentence_pause, pause_between_paragraphs=args.paragraph_pause, - keep_model_loaded=args.keep_model_loaded, - cleanup_interval=args.cleanup_interval, use_subprocess=args.use_subprocess )