chatterbox-ui/backend/migrations/migrate_speakers.py

183 lines
6.9 KiB
Python

#!/usr/bin/env python3
"""
Migration script for existing speakers to new format
Adds tts_backend and reference_text fields to existing speaker data
"""
import sys
import yaml
from pathlib import Path
from datetime import datetime
# Add project root to path
project_root = Path(__file__).parent.parent.parent
sys.path.append(str(project_root))
from backend.app.services.speaker_service import SpeakerManagementService
from backend.app.models.speaker_models import Speaker
def backup_speakers_file(speakers_file_path: Path) -> Path:
"""Create a backup of the existing speakers file"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_path = speakers_file_path.parent / f"speakers_backup_{timestamp}.yaml"
if speakers_file_path.exists():
with open(speakers_file_path, 'r') as src, open(backup_path, 'w') as dst:
dst.write(src.read())
print(f"✓ Created backup: {backup_path}")
return backup_path
else:
print("⚠ No existing speakers file to backup")
return None
def analyze_existing_speakers(service: SpeakerManagementService) -> dict:
"""Analyze current speakers data structure"""
analysis = {
"total_speakers": len(service.speakers_data),
"needs_migration": 0,
"already_migrated": 0,
"sample_speaker_data": None,
"missing_fields": set()
}
for speaker_id, speaker_data in service.speakers_data.items():
needs_migration = False
# Check for missing fields
if "tts_backend" not in speaker_data:
analysis["missing_fields"].add("tts_backend")
needs_migration = True
if "reference_text" not in speaker_data:
analysis["missing_fields"].add("reference_text")
needs_migration = True
if needs_migration:
analysis["needs_migration"] += 1
if not analysis["sample_speaker_data"]:
analysis["sample_speaker_data"] = {
"id": speaker_id,
"current_data": speaker_data.copy()
}
else:
analysis["already_migrated"] += 1
return analysis
def interactive_migration_prompt(analysis: dict) -> bool:
"""Ask user for confirmation before migrating"""
print("\n=== Speaker Migration Analysis ===")
print(f"Total speakers: {analysis['total_speakers']}")
print(f"Need migration: {analysis['needs_migration']}")
print(f"Already migrated: {analysis['already_migrated']}")
if analysis["missing_fields"]:
print(f"Missing fields: {', '.join(analysis['missing_fields'])}")
if analysis["sample_speaker_data"]:
print("\nExample current speaker data:")
sample_data = analysis["sample_speaker_data"]["current_data"]
for key, value in sample_data.items():
print(f" {key}: {value}")
print("\nAfter migration will have:")
print(f" tts_backend: chatterbox (default)")
print(f" reference_text: null (default)")
if analysis["needs_migration"] == 0:
print("\n✓ All speakers are already migrated!")
return False
print(f"\nThis will migrate {analysis['needs_migration']} speakers.")
response = input("Continue with migration? (y/N): ").lower().strip()
return response in ['y', 'yes']
def validate_migrated_speakers(service: SpeakerManagementService) -> dict:
"""Validate all speakers after migration"""
print("\n=== Validating Migrated Speakers ===")
validation_results = service.validate_all_speakers()
print(f"✓ Valid speakers: {validation_results['valid_speakers']}")
if validation_results['invalid_speakers'] > 0:
print(f"❌ Invalid speakers: {validation_results['invalid_speakers']}")
for error in validation_results['validation_errors']:
print(f" - {error['speaker_name']} ({error['speaker_id']}): {error['error']}")
return validation_results
def show_backend_statistics(service: SpeakerManagementService):
"""Show speaker distribution across backends"""
print("\n=== Backend Distribution ===")
stats = service.get_backend_statistics()
print(f"Total speakers: {stats['total_speakers']}")
for backend, backend_stats in stats['backends'].items():
print(f"\n{backend.upper()} Backend:")
print(f" Count: {backend_stats['count']}")
print(f" With reference text: {backend_stats['with_reference_text']}")
print(f" Without reference text: {backend_stats['without_reference_text']}")
def main():
"""Run the migration process"""
print("=== Speaker Data Migration Tool ===")
print("This tool migrates existing speaker data to support multiple TTS backends\n")
try:
# Initialize service
print("Loading speaker data...")
service = SpeakerManagementService()
# Analyze current state
analysis = analyze_existing_speakers(service)
# Show analysis and get confirmation
if not interactive_migration_prompt(analysis):
print("Migration cancelled.")
return 0
# Create backup
print("\nCreating backup...")
from backend.app import config
backup_path = backup_speakers_file(config.SPEAKERS_YAML_FILE)
# Perform migration
print("\nPerforming migration...")
migration_stats = service.migrate_existing_speakers()
print(f"\n=== Migration Results ===")
print(f"Total speakers processed: {migration_stats['total_speakers']}")
print(f"Speakers migrated: {migration_stats['migrated_count']}")
print(f"Already migrated: {migration_stats['already_migrated']}")
if migration_stats['migrations_performed']:
print(f"\nMigrated speakers:")
for migration in migration_stats['migrations_performed']:
print(f" - {migration['speaker_name']}: {', '.join(migration['migrations'])}")
# Validate results
validation_results = validate_migrated_speakers(service)
# Show backend distribution
show_backend_statistics(service)
# Final status
if validation_results['invalid_speakers'] == 0:
print(f"\n✅ Migration completed successfully!")
print(f"All {migration_stats['total_speakers']} speakers are now using the new format.")
if backup_path:
print(f"Original data backed up to: {backup_path}")
else:
print(f"\n⚠ Migration completed with {validation_results['invalid_speakers']} validation errors.")
print("Please check the error details above.")
return 1
return 0
except Exception as e:
print(f"\n❌ Migration failed: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == "__main__":
exit(main())