#!/usr/bin/env python3 """ Migration script for existing speakers to new format Adds tts_backend and reference_text fields to existing speaker data """ import sys import yaml from pathlib import Path from datetime import datetime # Add project root to path project_root = Path(__file__).parent.parent.parent sys.path.append(str(project_root)) from backend.app.services.speaker_service import SpeakerManagementService from backend.app.models.speaker_models import Speaker def backup_speakers_file(speakers_file_path: Path) -> Path: """Create a backup of the existing speakers file""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") backup_path = speakers_file_path.parent / f"speakers_backup_{timestamp}.yaml" if speakers_file_path.exists(): with open(speakers_file_path, 'r') as src, open(backup_path, 'w') as dst: dst.write(src.read()) print(f"✓ Created backup: {backup_path}") return backup_path else: print("⚠ No existing speakers file to backup") return None def analyze_existing_speakers(service: SpeakerManagementService) -> dict: """Analyze current speakers data structure""" analysis = { "total_speakers": len(service.speakers_data), "needs_migration": 0, "already_migrated": 0, "sample_speaker_data": None, "missing_fields": set() } for speaker_id, speaker_data in service.speakers_data.items(): needs_migration = False # Check for missing fields if "tts_backend" not in speaker_data: analysis["missing_fields"].add("tts_backend") needs_migration = True if "reference_text" not in speaker_data: analysis["missing_fields"].add("reference_text") needs_migration = True if needs_migration: analysis["needs_migration"] += 1 if not analysis["sample_speaker_data"]: analysis["sample_speaker_data"] = { "id": speaker_id, "current_data": speaker_data.copy() } else: analysis["already_migrated"] += 1 return analysis def interactive_migration_prompt(analysis: dict) -> bool: """Ask user for confirmation before migrating""" print("\n=== Speaker Migration Analysis ===") print(f"Total speakers: {analysis['total_speakers']}") print(f"Need migration: {analysis['needs_migration']}") print(f"Already migrated: {analysis['already_migrated']}") if analysis["missing_fields"]: print(f"Missing fields: {', '.join(analysis['missing_fields'])}") if analysis["sample_speaker_data"]: print("\nExample current speaker data:") sample_data = analysis["sample_speaker_data"]["current_data"] for key, value in sample_data.items(): print(f" {key}: {value}") print("\nAfter migration will have:") print(f" tts_backend: chatterbox (default)") print(f" reference_text: null (default)") if analysis["needs_migration"] == 0: print("\n✓ All speakers are already migrated!") return False print(f"\nThis will migrate {analysis['needs_migration']} speakers.") response = input("Continue with migration? (y/N): ").lower().strip() return response in ['y', 'yes'] def validate_migrated_speakers(service: SpeakerManagementService) -> dict: """Validate all speakers after migration""" print("\n=== Validating Migrated Speakers ===") validation_results = service.validate_all_speakers() print(f"✓ Valid speakers: {validation_results['valid_speakers']}") if validation_results['invalid_speakers'] > 0: print(f"❌ Invalid speakers: {validation_results['invalid_speakers']}") for error in validation_results['validation_errors']: print(f" - {error['speaker_name']} ({error['speaker_id']}): {error['error']}") return validation_results def show_backend_statistics(service: SpeakerManagementService): """Show speaker distribution across backends""" print("\n=== Backend Distribution ===") stats = service.get_backend_statistics() print(f"Total speakers: {stats['total_speakers']}") for backend, backend_stats in stats['backends'].items(): print(f"\n{backend.upper()} Backend:") print(f" Count: {backend_stats['count']}") print(f" With reference text: {backend_stats['with_reference_text']}") print(f" Without reference text: {backend_stats['without_reference_text']}") def main(): """Run the migration process""" print("=== Speaker Data Migration Tool ===") print("This tool migrates existing speaker data to support multiple TTS backends\n") try: # Initialize service print("Loading speaker data...") service = SpeakerManagementService() # Analyze current state analysis = analyze_existing_speakers(service) # Show analysis and get confirmation if not interactive_migration_prompt(analysis): print("Migration cancelled.") return 0 # Create backup print("\nCreating backup...") from backend.app import config backup_path = backup_speakers_file(config.SPEAKERS_YAML_FILE) # Perform migration print("\nPerforming migration...") migration_stats = service.migrate_existing_speakers() print(f"\n=== Migration Results ===") print(f"Total speakers processed: {migration_stats['total_speakers']}") print(f"Speakers migrated: {migration_stats['migrated_count']}") print(f"Already migrated: {migration_stats['already_migrated']}") if migration_stats['migrations_performed']: print(f"\nMigrated speakers:") for migration in migration_stats['migrations_performed']: print(f" - {migration['speaker_name']}: {', '.join(migration['migrations'])}") # Validate results validation_results = validate_migrated_speakers(service) # Show backend distribution show_backend_statistics(service) # Final status if validation_results['invalid_speakers'] == 0: print(f"\n✅ Migration completed successfully!") print(f"All {migration_stats['total_speakers']} speakers are now using the new format.") if backup_path: print(f"Original data backed up to: {backup_path}") else: print(f"\n⚠ Migration completed with {validation_results['invalid_speakers']} validation errors.") print("Please check the error details above.") return 1 return 0 except Exception as e: print(f"\n❌ Migration failed: {e}") import traceback traceback.print_exc() return 1 if __name__ == "__main__": exit(main())