183 lines
6.9 KiB
Python
183 lines
6.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Migration script for existing speakers to new format
|
|
Adds tts_backend and reference_text fields to existing speaker data
|
|
"""
|
|
import sys
|
|
import yaml
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
# Add project root to path
|
|
project_root = Path(__file__).parent.parent.parent
|
|
sys.path.append(str(project_root))
|
|
|
|
from backend.app.services.speaker_service import SpeakerManagementService
|
|
from backend.app.models.speaker_models import Speaker
|
|
|
|
def backup_speakers_file(speakers_file_path: Path) -> Path:
|
|
"""Create a backup of the existing speakers file"""
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
backup_path = speakers_file_path.parent / f"speakers_backup_{timestamp}.yaml"
|
|
|
|
if speakers_file_path.exists():
|
|
with open(speakers_file_path, 'r') as src, open(backup_path, 'w') as dst:
|
|
dst.write(src.read())
|
|
print(f"✓ Created backup: {backup_path}")
|
|
return backup_path
|
|
else:
|
|
print("⚠ No existing speakers file to backup")
|
|
return None
|
|
|
|
def analyze_existing_speakers(service: SpeakerManagementService) -> dict:
|
|
"""Analyze current speakers data structure"""
|
|
analysis = {
|
|
"total_speakers": len(service.speakers_data),
|
|
"needs_migration": 0,
|
|
"already_migrated": 0,
|
|
"sample_speaker_data": None,
|
|
"missing_fields": set()
|
|
}
|
|
|
|
for speaker_id, speaker_data in service.speakers_data.items():
|
|
needs_migration = False
|
|
|
|
# Check for missing fields
|
|
if "tts_backend" not in speaker_data:
|
|
analysis["missing_fields"].add("tts_backend")
|
|
needs_migration = True
|
|
|
|
if "reference_text" not in speaker_data:
|
|
analysis["missing_fields"].add("reference_text")
|
|
needs_migration = True
|
|
|
|
if needs_migration:
|
|
analysis["needs_migration"] += 1
|
|
if not analysis["sample_speaker_data"]:
|
|
analysis["sample_speaker_data"] = {
|
|
"id": speaker_id,
|
|
"current_data": speaker_data.copy()
|
|
}
|
|
else:
|
|
analysis["already_migrated"] += 1
|
|
|
|
return analysis
|
|
|
|
def interactive_migration_prompt(analysis: dict) -> bool:
|
|
"""Ask user for confirmation before migrating"""
|
|
print("\n=== Speaker Migration Analysis ===")
|
|
print(f"Total speakers: {analysis['total_speakers']}")
|
|
print(f"Need migration: {analysis['needs_migration']}")
|
|
print(f"Already migrated: {analysis['already_migrated']}")
|
|
|
|
if analysis["missing_fields"]:
|
|
print(f"Missing fields: {', '.join(analysis['missing_fields'])}")
|
|
|
|
if analysis["sample_speaker_data"]:
|
|
print("\nExample current speaker data:")
|
|
sample_data = analysis["sample_speaker_data"]["current_data"]
|
|
for key, value in sample_data.items():
|
|
print(f" {key}: {value}")
|
|
|
|
print("\nAfter migration will have:")
|
|
print(f" tts_backend: chatterbox (default)")
|
|
print(f" reference_text: null (default)")
|
|
|
|
if analysis["needs_migration"] == 0:
|
|
print("\n✓ All speakers are already migrated!")
|
|
return False
|
|
|
|
print(f"\nThis will migrate {analysis['needs_migration']} speakers.")
|
|
response = input("Continue with migration? (y/N): ").lower().strip()
|
|
return response in ['y', 'yes']
|
|
|
|
def validate_migrated_speakers(service: SpeakerManagementService) -> dict:
|
|
"""Validate all speakers after migration"""
|
|
print("\n=== Validating Migrated Speakers ===")
|
|
validation_results = service.validate_all_speakers()
|
|
|
|
print(f"✓ Valid speakers: {validation_results['valid_speakers']}")
|
|
|
|
if validation_results['invalid_speakers'] > 0:
|
|
print(f"❌ Invalid speakers: {validation_results['invalid_speakers']}")
|
|
for error in validation_results['validation_errors']:
|
|
print(f" - {error['speaker_name']} ({error['speaker_id']}): {error['error']}")
|
|
|
|
return validation_results
|
|
|
|
def show_backend_statistics(service: SpeakerManagementService):
|
|
"""Show speaker distribution across backends"""
|
|
print("\n=== Backend Distribution ===")
|
|
stats = service.get_backend_statistics()
|
|
|
|
print(f"Total speakers: {stats['total_speakers']}")
|
|
for backend, backend_stats in stats['backends'].items():
|
|
print(f"\n{backend.upper()} Backend:")
|
|
print(f" Count: {backend_stats['count']}")
|
|
print(f" With reference text: {backend_stats['with_reference_text']}")
|
|
print(f" Without reference text: {backend_stats['without_reference_text']}")
|
|
|
|
def main():
|
|
"""Run the migration process"""
|
|
print("=== Speaker Data Migration Tool ===")
|
|
print("This tool migrates existing speaker data to support multiple TTS backends\n")
|
|
|
|
try:
|
|
# Initialize service
|
|
print("Loading speaker data...")
|
|
service = SpeakerManagementService()
|
|
|
|
# Analyze current state
|
|
analysis = analyze_existing_speakers(service)
|
|
|
|
# Show analysis and get confirmation
|
|
if not interactive_migration_prompt(analysis):
|
|
print("Migration cancelled.")
|
|
return 0
|
|
|
|
# Create backup
|
|
print("\nCreating backup...")
|
|
from backend.app import config
|
|
backup_path = backup_speakers_file(config.SPEAKERS_YAML_FILE)
|
|
|
|
# Perform migration
|
|
print("\nPerforming migration...")
|
|
migration_stats = service.migrate_existing_speakers()
|
|
|
|
print(f"\n=== Migration Results ===")
|
|
print(f"Total speakers processed: {migration_stats['total_speakers']}")
|
|
print(f"Speakers migrated: {migration_stats['migrated_count']}")
|
|
print(f"Already migrated: {migration_stats['already_migrated']}")
|
|
|
|
if migration_stats['migrations_performed']:
|
|
print(f"\nMigrated speakers:")
|
|
for migration in migration_stats['migrations_performed']:
|
|
print(f" - {migration['speaker_name']}: {', '.join(migration['migrations'])}")
|
|
|
|
# Validate results
|
|
validation_results = validate_migrated_speakers(service)
|
|
|
|
# Show backend distribution
|
|
show_backend_statistics(service)
|
|
|
|
# Final status
|
|
if validation_results['invalid_speakers'] == 0:
|
|
print(f"\n✅ Migration completed successfully!")
|
|
print(f"All {migration_stats['total_speakers']} speakers are now using the new format.")
|
|
if backup_path:
|
|
print(f"Original data backed up to: {backup_path}")
|
|
else:
|
|
print(f"\n⚠ Migration completed with {validation_results['invalid_speakers']} validation errors.")
|
|
print("Please check the error details above.")
|
|
return 1
|
|
|
|
return 0
|
|
|
|
except Exception as e:
|
|
print(f"\n❌ Migration failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return 1
|
|
|
|
if __name__ == "__main__":
|
|
exit(main()) |