diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..e639148 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,102 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Common Development Commands + +### Backend (FastAPI) + +```bash +# Install backend dependencies (run from project root) +pip install -r backend/requirements.txt + +# Run backend development server (run from project root) +uvicorn backend.app.main:app --reload --host 0.0.0.0 --port 8000 + +# Run backend tests +python backend/run_api_test.py + +# Backend API accessible at http://127.0.0.1:8000 +# API docs at http://127.0.0.1:8000/docs +``` + +### Frontend Testing + +```bash +# Run frontend tests +npm test +``` + +### Alternative Interfaces + +```bash +# Run Gradio interface (standalone TTS app) +python gradio_app.py +``` + +## Architecture Overview + +This is a full-stack TTS (Text-to-Speech) application with three interfaces: + +1. **Modern web frontend** (vanilla JS) - Interactive dialog editor at `frontend/` +2. **FastAPI backend** - REST API at `backend/` +3. **Gradio interface** - Alternative UI in `gradio_app.py` + +### Frontend-Backend Communication + +- **Frontend**: Vanilla JS (ES6 modules) serving on port 8001 +- **Backend**: FastAPI serving on port 8000 +- **API Base**: `http://localhost:8000/api` +- **CORS**: Configured for frontend communication +- **File Serving**: Generated audio served via `/generated_audio/` endpoint + +### Key API Endpoints + +- `/api/speakers/` - Speaker CRUD operations +- `/api/dialog/generate/` - Full dialog generation +- `/api/dialog/generate_line/` - Single line generation +- `/generated_audio/` - Static audio file serving + +### Backend Service Architecture + +Located in `backend/app/services/`: + +- **TTSService**: Chatterbox TTS model lifecycle management +- **SpeakerManagementService**: Speaker data and sample management +- **DialogProcessorService**: Dialog script to audio processing +- **AudioManipulationService**: Audio concatenation and ZIP creation + +### Frontend Architecture + +- **Modular design**: `api.js` (API layer) + `app.js` (app logic) +- **No framework**: Modern vanilla JavaScript with ES6+ features +- **Interactive editor**: Table-based dialog creation with drag-drop reordering + +### Data Flow + +1. User creates dialog in frontend table editor +2. Frontend sends dialog items to `/api/dialog/generate/` +3. Backend processes speech/silence items via services +4. TTS generates audio, segments concatenated +5. ZIP archive created with all outputs +6. Frontend receives URLs for playback/download + +### Speaker Configuration + +- **Location**: `speaker_data/speakers.yaml` and `speaker_data/speaker_samples/` +- **Format**: YAML config referencing WAV audio samples +- **Management**: Both API endpoints and file-based configuration + +### Output Organization + +- `dialog_output/` - Generated dialog files +- `single_output/` - Single utterance outputs +- `tts_outputs/` - Raw TTS generation files +- Generated ZIPs contain organized file structure + +## Development Setup Notes + +- Python virtual environment expected at project root (`.venv`) +- Backend commands run from project root, not `backend/` directory +- Frontend served separately (typically port 8001) +- Speaker samples must be WAV format in `speaker_data/speaker_samples/` diff --git a/frontend/index.html b/frontend/index.html index bb7fdb6..02c0180 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -43,6 +43,11 @@ +
+ + + +
diff --git a/frontend/js/app.js b/frontend/js/app.js index a8bc536..46767dd 100644 --- a/frontend/js/app.js +++ b/frontend/js/app.js @@ -125,6 +125,9 @@ function initializeDialogEditor() { const addSilenceLineBtn = document.getElementById('add-silence-line-btn'); const outputBaseNameInput = document.getElementById('output-base-name'); const generateDialogBtn = document.getElementById('generate-dialog-btn'); + const saveScriptBtn = document.getElementById('save-script-btn'); + const loadScriptBtn = document.getElementById('load-script-btn'); + const loadScriptInput = document.getElementById('load-script-input'); // Results Display Elements const generationLogPre = document.getElementById('generation-log-content'); // Corrected ID @@ -507,6 +510,176 @@ function initializeDialogEditor() { }); } + // --- Save/Load Script Functionality --- + function saveDialogScript() { + if (dialogItems.length === 0) { + alert('No dialog items to save. Please add some speech or silence lines first.'); + return; + } + + // Filter out UI-specific fields and create clean data for export + const exportData = dialogItems.map(item => { + const cleanItem = { + type: item.type + }; + + if (item.type === 'speech') { + cleanItem.speaker_id = item.speaker_id; + cleanItem.text = item.text; + } else if (item.type === 'silence') { + cleanItem.duration = item.duration; + } + + return cleanItem; + }); + + // Convert to JSONL format (one JSON object per line) + const jsonlContent = exportData.map(item => JSON.stringify(item)).join('\n'); + + // Create and download file + const blob = new Blob([jsonlContent], { type: 'application/jsonl' }); + const url = URL.createObjectURL(blob); + const link = document.createElement('a'); + + // Generate filename with timestamp + const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); + const filename = `dialog_script_${timestamp}.jsonl`; + + link.href = url; + link.download = filename; + link.style.display = 'none'; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + URL.revokeObjectURL(url); + + console.log(`Dialog script saved as ${filename}`); + } + + function loadDialogScript(file) { + if (!file) { + alert('Please select a file to load.'); + return; + } + + const reader = new FileReader(); + reader.onload = function(e) { + try { + const content = e.target.result; + const lines = content.trim().split('\n'); + const loadedItems = []; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i].trim(); + if (!line) continue; // Skip empty lines + + try { + const item = JSON.parse(line); + const validatedItem = validateDialogItem(item, i + 1); + if (validatedItem) { + loadedItems.push(normalizeDialogItem(validatedItem)); + } + } catch (parseError) { + console.error(`Error parsing line ${i + 1}:`, parseError); + alert(`Error parsing line ${i + 1}: ${parseError.message}`); + return; + } + } + + if (loadedItems.length === 0) { + alert('No valid dialog items found in the file.'); + return; + } + + // Confirm replacement if existing items + if (dialogItems.length > 0) { + const confirmed = confirm( + `This will replace your current dialog (${dialogItems.length} items) with the loaded script (${loadedItems.length} items). Continue?` + ); + if (!confirmed) return; + } + + // Replace current dialog + dialogItems.splice(0, dialogItems.length, ...loadedItems); + renderDialogItems(); + + console.log(`Loaded ${loadedItems.length} dialog items from script`); + alert(`Successfully loaded ${loadedItems.length} dialog items.`); + + } catch (error) { + console.error('Error loading dialog script:', error); + alert(`Error loading dialog script: ${error.message}`); + } + }; + + reader.onerror = function() { + alert('Error reading file. Please try again.'); + }; + + reader.readAsText(file); + } + + function validateDialogItem(item, lineNumber) { + if (!item || typeof item !== 'object') { + throw new Error(`Line ${lineNumber}: Invalid item format`); + } + + if (!item.type || !['speech', 'silence'].includes(item.type)) { + throw new Error(`Line ${lineNumber}: Invalid or missing type. Must be 'speech' or 'silence'`); + } + + if (item.type === 'speech') { + if (!item.speaker_id || typeof item.speaker_id !== 'string') { + throw new Error(`Line ${lineNumber}: Speech items must have a valid speaker_id`); + } + if (!item.text || typeof item.text !== 'string') { + throw new Error(`Line ${lineNumber}: Speech items must have text`); + } + + // Check if speaker exists in available speakers + const speakerExists = availableSpeakersCache.some(speaker => speaker.id === item.speaker_id); + if (availableSpeakersCache.length > 0 && !speakerExists) { + console.warn(`Line ${lineNumber}: Speaker '${item.speaker_id}' not found in available speakers`); + // Don't throw error, just warn - speaker might be added later + } + + return { + type: 'speech', + speaker_id: item.speaker_id, + text: item.text + }; + } else if (item.type === 'silence') { + if (typeof item.duration !== 'number' || item.duration <= 0) { + throw new Error(`Line ${lineNumber}: Silence items must have a positive duration number`); + } + + return { + type: 'silence', + duration: item.duration + }; + } + } + + // Event handlers for save/load + if (saveScriptBtn) { + saveScriptBtn.addEventListener('click', saveDialogScript); + } + + if (loadScriptBtn && loadScriptInput) { + loadScriptBtn.addEventListener('click', () => { + loadScriptInput.click(); + }); + + loadScriptInput.addEventListener('change', (e) => { + const file = e.target.files[0]; + if (file) { + loadDialogScript(file); + // Reset input so same file can be loaded again + e.target.value = ''; + } + }); + } + console.log('Dialog Editor Initialized'); renderDialogItems(); // Initial render (empty) }