Spaces:
Running
Running
File size: 5,807 Bytes
b9eca36 644b6c1 b9eca36 dd1b723 b9eca36 dd1b723 b9eca36 644b6c1 b9eca36 dd1b723 b9eca36 dd1b723 b9eca36 dd1b723 b9eca36 dd1b723 b9eca36 dd1b723 b9eca36 dd1b723 b9eca36 dd1b723 b9eca36 dd1b723 b9eca36 dd1b723 b9eca36 dd1b723 b9eca36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
// Configuration
const MODEL_ID = 'onnx-community/Supertonic-TTS-ONNX';
const VOICE_BASE_URL = 'https://huggingface.co/onnx-community/Supertonic-TTS-ONNX/resolve/main/voices/';
// DOM Elements
const generateBtn = document.getElementById('generate-btn');
const inputText = document.getElementById('input-text');
const voiceSelect = document.getElementById('voice-select');
const gpuToggle = document.getElementById('gpu-toggle');
const deviceLabel = document.getElementById('device-label');
const statusContainer = document.getElementById('status-container');
const statusText = document.getElementById('status-text');
const progressBar = document.getElementById('progress-bar');
const outputCard = document.getElementById('output-card');
const audioPlayer = document.getElementById('audio-player');
const downloadLink = document.getElementById('download-link');
const errorMsg = document.getElementById('error-msg');
// State
let ttsPipeline = null;
let currentDevice = 'wasm';
// Helper: Check WebGPU support
async function checkWebGPU() {
if (!navigator.gpu) {
gpuToggle.disabled = true;
deviceLabel.innerText = "WebGPU not supported (CPU only)";
return false;
}
return true;
}
checkWebGPU();
// UI Event Listeners
gpuToggle.addEventListener('change', (e) => {
const useGPU = e.target.checked;
currentDevice = useGPU ? 'webgpu' : 'wasm';
deviceLabel.innerText = useGPU ? 'Run on WebGPU' : 'Run on CPU';
// Reset pipeline to force reload with new device setting next time
ttsPipeline = null;
});
inputText.addEventListener('input', () => {
document.querySelector('.char-count').innerText = `${inputText.value.length} / 500`;
});
generateBtn.addEventListener('click', async () => {
const text = inputText.value.trim();
if (!text) return;
resetUI();
statusContainer.classList.remove('hidden');
generateBtn.disabled = true;
try {
// 1. Initialize Pipeline if needed
if (!ttsPipeline) {
updateStatus('Loading model... (this may take a moment)', 0);
// Import pipeline from window (set in HTML)
const { pipeline } = window;
ttsPipeline = await pipeline('text-to-speech', MODEL_ID, {
device: currentDevice,
dtype: 'fp32', // Required for this specific model as per prompt
progress_callback: (data) => {
if (data.status === 'progress') {
updateStatus(`Downloading ${data.file}...`, data.progress);
} else if (data.status === 'ready') {
updateStatus('Model ready!', 100);
}
}
});
}
// 2. Generate Audio
updateStatus('Generating audio...', 100);
progressBar.classList.add('pulsing'); // Add animation for inference time
const voiceFile = voiceSelect.value;
const speaker_embeddings = `${VOICE_BASE_URL}${voiceFile}`;
// Run inference
const output = await ttsPipeline(text, {
speaker_embeddings: speaker_embeddings
});
// 3. Process Output
// output.audio is a Float32Array, output.sampling_rate is a number
const wavUrl = createWavUrl(output.audio, output.sampling_rate);
audioPlayer.src = wavUrl;
downloadLink.href = wavUrl;
outputCard.classList.remove('hidden');
// Auto-play result
try {
await audioPlayer.play();
} catch (e) {
console.log("Auto-play blocked by browser policy");
}
} catch (err) {
console.error(err);
showError(err.message);
} finally {
generateBtn.disabled = false;
progressBar.classList.remove('pulsing');
statusContainer.classList.add('hidden');
}
});
// Helper: Update Progress UI
function updateStatus(text, progressPercent) {
statusText.innerText = text;
progressBar.style.width = `${progressPercent}%`;
}
function resetUI() {
outputCard.classList.add('hidden');
errorMsg.classList.add('hidden');
progressBar.style.width = '0%';
}
function showError(msg) {
errorMsg.innerText = `Error: ${msg}`;
errorMsg.classList.remove('hidden');
}
// Audio Utility: Convert Float32Array to WAV Blob URL
function createWavUrl(audioData, sampleRate) {
const buffer = encodeWAV(audioData, sampleRate);
const blob = new Blob([buffer], { type: 'audio/wav' });
return URL.createObjectURL(blob);
}
function encodeWAV(samples, sampleRate) {
const buffer = new ArrayBuffer(44 + samples.length * 2);
const view = new DataView(buffer);
// RIFF chunk descriptor
writeString(view, 0, 'RIFF');
view.setUint32(4, 36 + samples.length * 2, true);
writeString(view, 8, 'WAVE');
// fmt sub-chunk
writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true); // PCM format
view.setUint16(22, 1, true); // Mono
view.setUint32(24, sampleRate, true);
view.setUint32(28, sampleRate * 2, true);
view.setUint16(32, 2, true);
view.setUint16(34, 16, true); // 16-bit
// data sub-chunk
writeString(view, 36, 'data');
view.setUint32(40, samples.length * 2, true);
// Write PCM samples
floatTo16BitPCM(view, 44, samples);
return buffer;
}
function writeString(view, offset, string) {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
}
function floatTo16BitPCM(view, offset, input) {
for (let i = 0; i < input.length; i++, offset += 2) {
let s = Math.max(-1, Math.min(1, input[i]));
s = s < 0 ? s * 0x8000 : s * 0x7FFF;
view.setInt16(offset, s, true);
}
} |