🎨 优化扩展模块,完成ai接入和对话功能
This commit is contained in:
566
data/st-core-scripts/scripts/extensions/tts/tts-webui.js
Normal file
566
data/st-core-scripts/scripts/extensions/tts/tts-webui.js
Normal file
@@ -0,0 +1,566 @@
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { TtsWebuiProvider };
|
||||
|
||||
class TtsWebuiProvider {
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
|
||||
audioElement = document.createElement('audio');
|
||||
audioContext = null;
|
||||
audioWorkletNode = null;
|
||||
currentVolume = 1.0; // Track current volume
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
model: 'chatterbox',
|
||||
speed: 1,
|
||||
volume: 1.0,
|
||||
available_voices: [''],
|
||||
provider_endpoint: 'http://127.0.0.1:7778/v1/audio/speech',
|
||||
streaming: true,
|
||||
stream_chunk_size: 100,
|
||||
desired_length: 80,
|
||||
max_length: 200,
|
||||
halve_first_chunk: true,
|
||||
exaggeration: 0.5,
|
||||
cfg_weight: 0.5,
|
||||
temperature: 0.8,
|
||||
device: 'auto',
|
||||
dtype: 'float32',
|
||||
cpu_offload: false,
|
||||
chunked: true,
|
||||
cache_voice: false,
|
||||
tokens_per_slice: 1000,
|
||||
remove_milliseconds: 45,
|
||||
remove_milliseconds_start: 25,
|
||||
chunk_overlap_method: 'zero',
|
||||
seed: -1,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<h4 class="textAlignCenter">TTS WebUI Settings</h4>
|
||||
|
||||
<div class="flex gap10px marginBot10 alignItemsFlexEnd">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_endpoint">Provider Endpoint:</label>
|
||||
<input id="tts_webui_endpoint" type="text" class="text_pole" maxlength="500" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_model">Model:</label>
|
||||
<input id="tts_webui_model" type="text" class="text_pole" maxlength="500" value="${this.defaultSettings.model}"/>
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_voices">Available Voices (comma separated):</label>
|
||||
<input id="tts_webui_voices" type="text" class="text_pole" value="${this.defaultSettings.available_voices.join()}"/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_streaming" class="checkbox_label alignItemsCenter flexGap5">
|
||||
<input id="tts_webui_streaming" type="checkbox" />
|
||||
<span>Streaming</span>
|
||||
</label>
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_volume">Volume: <span id="tts_webui_volume_output">${this.defaultSettings.volume}</span></label>
|
||||
<input type="range" id="tts_webui_volume" value="${this.defaultSettings.volume}" min="0" max="2" step="0.1">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<h4 class="textAlignCenter">Generation Settings</h4>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_exaggeration">Exaggeration: <span id="tts_webui_exaggeration_output">${this.defaultSettings.exaggeration}</span></label>
|
||||
<input id="tts_webui_exaggeration" type="range" value="${this.defaultSettings.exaggeration}" min="0" max="2" step="0.1" />
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_cfg_weight">CFG Weight: <span id="tts_webui_cfg_weight_output">${this.defaultSettings.cfg_weight}</span></label>
|
||||
<input id="tts_webui_cfg_weight" type="range" value="${this.defaultSettings.cfg_weight}" min="0" max="2" step="0.1" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_temperature">Temperature: <span id="tts_webui_temperature_output">${this.defaultSettings.temperature}</span></label>
|
||||
<input id="tts_webui_temperature" type="range" value="${this.defaultSettings.temperature}" min="0" max="2" step="0.1" />
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_seed">Seed (-1 for random):</label>
|
||||
<input id="tts_webui_seed" type="text" class="text_pole" value="${this.defaultSettings.seed}"/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<h4 class="textAlignCenter">Chunking</h4>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_chunked" class="checkbox_label alignItemsCenter flexGap5">
|
||||
<input id="tts_webui_chunked" type="checkbox" />
|
||||
<span>Split prompt into chunks</span>
|
||||
</label>
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_halve_first_chunk" class="checkbox_label alignItemsCenter flexGap5">
|
||||
<input id="tts_webui_halve_first_chunk" type="checkbox" />
|
||||
<span>Halve First Chunk</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_desired_length">Desired Length: <span id="tts_webui_desired_length_output">${this.defaultSettings.desired_length}</span></label>
|
||||
<input id="tts_webui_desired_length" type="range" value="${this.defaultSettings.desired_length}" min="25" max="300" step="5" />
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_max_length">Max Length: <span id="tts_webui_max_length_output">${this.defaultSettings.max_length}</span></label>
|
||||
<input id="tts_webui_max_length" type="range" value="${this.defaultSettings.max_length}" min="50" max="450" step="5" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<h4 class="textAlignCenter">Model</h4>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_device">Device:</label>
|
||||
<select id="tts_webui_device">
|
||||
<option value="auto" ${this.defaultSettings.device === 'auto' ? 'selected' : ''}>Auto</option>
|
||||
<option value="cuda" ${this.defaultSettings.device === 'cuda' ? 'selected' : ''}>CUDA</option>
|
||||
<option value="mps" ${this.defaultSettings.device === 'mps' ? 'selected' : ''}>MPS</option>
|
||||
<option value="cpu" ${this.defaultSettings.device === 'cpu' ? 'selected' : ''}>CPU</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_dtype">Data Type:</label>
|
||||
<select id="tts_webui_dtype">
|
||||
<option value="float32" ${this.defaultSettings.dtype === 'float32' ? 'selected' : ''}>Float32</option>
|
||||
<option value="float16" ${this.defaultSettings.dtype === 'float16' ? 'selected' : ''}>Float16</option>
|
||||
<option value="bfloat16" ${this.defaultSettings.dtype === 'bfloat16' ? 'selected' : ''}>BFloat16</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_cpu_offload" class="checkbox_label alignItemsCenter flexGap5">
|
||||
<input id="tts_webui_cpu_offload" type="checkbox" />
|
||||
<span>CPU Offload</span>
|
||||
</label>
|
||||
</div>
|
||||
<div class="flex1">
|
||||
<!-- Empty for spacing -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<h4 class="textAlignCenter">Streaming (Advanced Settings)</h4>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_tokens_per_slice">Tokens Per Slice: <span id="tts_webui_tokens_per_slice_output">${this.defaultSettings.tokens_per_slice}</span></label>
|
||||
<input id="tts_webui_tokens_per_slice" type="range" value="${this.defaultSettings.tokens_per_slice}" min="15" max="1000" step="1" />
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_chunk_overlap_method">Chunk Overlap Method:</label>
|
||||
<select id="tts_webui_chunk_overlap_method">
|
||||
<option value="zero" ${this.defaultSettings.chunk_overlap_method === 'zero' ? 'selected' : ''}>Zero</option>
|
||||
<option value="full" ${this.defaultSettings.chunk_overlap_method === 'full' ? 'selected' : ''}>Full</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_remove_milliseconds">Remove Milliseconds: <span id="tts_webui_remove_milliseconds_output">${this.defaultSettings.remove_milliseconds}</span></label>
|
||||
<input id="tts_webui_remove_milliseconds" type="range" value="${this.defaultSettings.remove_milliseconds}" min="0" max="100" step="1" />
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_remove_milliseconds_start">Remove Milliseconds Start: <span id="tts_webui_remove_milliseconds_start_output">${this.defaultSettings.remove_milliseconds_start}</span></label>
|
||||
<input id="tts_webui_remove_milliseconds_start" type="range" value="${this.defaultSettings.remove_milliseconds_start}" min="0" max="100" step="1" />
|
||||
</div>
|
||||
</div>`;
|
||||
return html;
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#tts_webui_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#tts_webui_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_model').val(this.settings.model);
|
||||
$('#tts_webui_model').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_voices').val(this.settings.available_voices.join());
|
||||
$('#tts_webui_voices').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_streaming').prop('checked', this.settings.streaming);
|
||||
$('#tts_webui_streaming').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_volume').val(this.settings.volume);
|
||||
$('#tts_webui_volume').on('input', () => {
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
$('#tts_webui_stream_chunk_size').val(this.settings.stream_chunk_size);
|
||||
$('#tts_webui_stream_chunk_size').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_desired_length').val(this.settings.desired_length);
|
||||
$('#tts_webui_desired_length').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_max_length').val(this.settings.max_length);
|
||||
$('#tts_webui_max_length').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_halve_first_chunk').prop('checked', this.settings.halve_first_chunk);
|
||||
$('#tts_webui_halve_first_chunk').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_exaggeration').val(this.settings.exaggeration);
|
||||
$('#tts_webui_exaggeration').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_cfg_weight').val(this.settings.cfg_weight);
|
||||
$('#tts_webui_cfg_weight').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_temperature').val(this.settings.temperature);
|
||||
$('#tts_webui_temperature').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_device').val(this.settings.device);
|
||||
$('#tts_webui_device').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_dtype').val(this.settings.dtype);
|
||||
$('#tts_webui_dtype').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_cpu_offload').prop('checked', this.settings.cpu_offload);
|
||||
$('#tts_webui_cpu_offload').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_chunked').prop('checked', this.settings.chunked);
|
||||
$('#tts_webui_chunked').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_tokens_per_slice').val(this.settings.tokens_per_slice);
|
||||
$('#tts_webui_tokens_per_slice').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_remove_milliseconds').val(this.settings.remove_milliseconds);
|
||||
$('#tts_webui_remove_milliseconds').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_remove_milliseconds_start').val(this.settings.remove_milliseconds_start);
|
||||
$('#tts_webui_remove_milliseconds_start').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_chunk_overlap_method').val(this.settings.chunk_overlap_method);
|
||||
$('#tts_webui_chunk_overlap_method').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_seed').val(this.settings.seed);
|
||||
$('#tts_webui_seed').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
// Update output labels
|
||||
$('#tts_webui_volume_output').text(this.settings.volume);
|
||||
$('#tts_webui_desired_length_output').text(this.settings.desired_length);
|
||||
$('#tts_webui_max_length_output').text(this.settings.max_length);
|
||||
$('#tts_webui_exaggeration_output').text(this.settings.exaggeration);
|
||||
$('#tts_webui_cfg_weight_output').text(this.settings.cfg_weight);
|
||||
$('#tts_webui_temperature_output').text(this.settings.temperature);
|
||||
$('#tts_webui_tokens_per_slice_output').text(this.settings.tokens_per_slice);
|
||||
$('#tts_webui_remove_milliseconds_output').text(this.settings.remove_milliseconds);
|
||||
$('#tts_webui_remove_milliseconds_start_output').text(this.settings.remove_milliseconds_start);
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.debug('OpenAI Compatible TTS: Settings loaded');
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update dynamically
|
||||
this.settings.provider_endpoint = String($('#tts_webui_endpoint').val());
|
||||
this.settings.model = String($('#tts_webui_model').val());
|
||||
this.settings.available_voices = String($('#tts_webui_voices').val()).split(',');
|
||||
this.settings.volume = Number($('#tts_webui_volume').val());
|
||||
this.settings.streaming = $('#tts_webui_streaming').is(':checked');
|
||||
this.settings.stream_chunk_size = Number($('#tts_webui_stream_chunk_size').val());
|
||||
this.settings.desired_length = Number($('#tts_webui_desired_length').val());
|
||||
this.settings.max_length = Number($('#tts_webui_max_length').val());
|
||||
this.settings.halve_first_chunk = $('#tts_webui_halve_first_chunk').is(':checked');
|
||||
this.settings.exaggeration = Number($('#tts_webui_exaggeration').val());
|
||||
this.settings.cfg_weight = Number($('#tts_webui_cfg_weight').val());
|
||||
this.settings.temperature = Number($('#tts_webui_temperature').val());
|
||||
this.settings.device = String($('#tts_webui_device').val());
|
||||
this.settings.dtype = String($('#tts_webui_dtype').val());
|
||||
this.settings.cpu_offload = $('#tts_webui_cpu_offload').is(':checked');
|
||||
this.settings.chunked = $('#tts_webui_chunked').is(':checked');
|
||||
this.settings.tokens_per_slice = Number($('#tts_webui_tokens_per_slice').val());
|
||||
this.settings.remove_milliseconds = Number($('#tts_webui_remove_milliseconds').val());
|
||||
this.settings.remove_milliseconds_start = Number($('#tts_webui_remove_milliseconds_start').val());
|
||||
this.settings.chunk_overlap_method = String($('#tts_webui_chunk_overlap_method').val());
|
||||
this.settings.seed = parseInt($('#tts_webui_seed').val()) || -1;
|
||||
|
||||
// Apply volume change immediately
|
||||
this.setVolume(this.settings.volume);
|
||||
|
||||
// Update output labels
|
||||
$('#tts_webui_volume_output').text(this.settings.volume);
|
||||
$('#tts_webui_desired_length_output').text(this.settings.desired_length);
|
||||
$('#tts_webui_max_length_output').text(this.settings.max_length);
|
||||
$('#tts_webui_exaggeration_output').text(this.settings.exaggeration);
|
||||
$('#tts_webui_cfg_weight_output').text(this.settings.cfg_weight);
|
||||
$('#tts_webui_temperature_output').text(this.settings.temperature);
|
||||
$('#tts_webui_tokens_per_slice_output').text(this.settings.tokens_per_slice);
|
||||
$('#tts_webui_remove_milliseconds_output').text(this.settings.remove_milliseconds);
|
||||
$('#tts_webui_remove_milliseconds_start_output').text(this.settings.remove_milliseconds_start);
|
||||
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
console.info('TTS voices refreshed');
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
oaicVoice => oaicVoice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
|
||||
if (this.settings.streaming) {
|
||||
// Stream audio in real-time
|
||||
await this.processStreamingAudio(response);
|
||||
// Return empty string since audio is already played via AudioWorklet
|
||||
return '';
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
// Try to fetch voices from the provider endpoint
|
||||
try {
|
||||
const voicesEndpoint = this.settings.provider_endpoint.replace('/speech', '/voices/' + this.settings.model);
|
||||
const response = await fetch(voicesEndpoint);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const responseJson = await response.json();
|
||||
console.info('Discovered voices from provider:', responseJson);
|
||||
|
||||
this.voices = responseJson.voices.map(({ value, label }) => ({
|
||||
name: label,
|
||||
voice_id: value,
|
||||
lang: 'en-US',
|
||||
}));
|
||||
|
||||
return this.voices;
|
||||
} catch (error) {
|
||||
console.warn('Voice discovery failed, using configured voices:', error);
|
||||
}
|
||||
|
||||
// Fallback to configured voices
|
||||
this.voices = this.settings.available_voices.map(name => ({
|
||||
name, voice_id: name, lang: 'en-US',
|
||||
}));
|
||||
|
||||
return this.voices;
|
||||
}
|
||||
|
||||
async initAudioWorklet(wavSampleRate) {
|
||||
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: wavSampleRate });
|
||||
|
||||
// Load the PCM processor from separate file
|
||||
const processorUrl = './scripts/extensions/tts/lib/pcm-processor.js';
|
||||
await this.audioContext.audioWorklet.addModule(processorUrl);
|
||||
this.audioWorkletNode = new AudioWorkletNode(this.audioContext, 'pcm-processor');
|
||||
this.audioWorkletNode.connect(this.audioContext.destination);
|
||||
}
|
||||
|
||||
parseWavHeader(buffer) {
|
||||
const view = new DataView(buffer);
|
||||
// Sample rate is at bytes 24-27 (little endian)
|
||||
const sampleRate = view.getUint32(24, true);
|
||||
// Number of channels is at bytes 22-23 (little endian)
|
||||
const channels = view.getUint16(22, true);
|
||||
// Bits per sample is at bytes 34-35 (little endian)
|
||||
const bitsPerSample = view.getUint16(34, true);
|
||||
|
||||
return { sampleRate, channels, bitsPerSample };
|
||||
}
|
||||
|
||||
async processStreamingAudio(response) {
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const reader = response.body.getReader();
|
||||
let headerParsed = false;
|
||||
let wavInfo = null;
|
||||
|
||||
const processStream = async ({ done, value }) => {
|
||||
if (done) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!headerParsed) {
|
||||
// Parse WAV header to get sample rate
|
||||
wavInfo = this.parseWavHeader(value.buffer);
|
||||
console.log('WAV Info:', wavInfo);
|
||||
|
||||
// Initialize AudioWorklet with correct sample rate
|
||||
await this.initAudioWorklet(wavInfo.sampleRate);
|
||||
|
||||
// Skip WAV header (first 44 bytes typically)
|
||||
const pcmData = value.slice(44);
|
||||
this.audioWorkletNode.port.postMessage({ pcmData });
|
||||
headerParsed = true;
|
||||
|
||||
const next = await reader.read();
|
||||
return processStream(next);
|
||||
}
|
||||
|
||||
// Send PCM data to AudioWorklet for immediate playback
|
||||
this.audioWorkletNode.port.postMessage({ pcmData: value });
|
||||
const next = await reader.read();
|
||||
return processStream(next);
|
||||
};
|
||||
|
||||
const firstChunk = await reader.read();
|
||||
await processStream(firstChunk);
|
||||
}
|
||||
|
||||
async previewTtsVoice(voiceId) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
|
||||
const text = getPreviewString('en-US');
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
|
||||
if (this.settings.streaming) {
|
||||
// Use shared streaming method
|
||||
await this.processStreamingAudio(response);
|
||||
} else {
|
||||
// For non-streaming, response is a fetch Response object
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
const settings = this.settings;
|
||||
const streaming = settings.streaming;
|
||||
|
||||
const chatterboxParams = [
|
||||
'desired_length',
|
||||
'max_length',
|
||||
'halve_first_chunk',
|
||||
'exaggeration',
|
||||
'cfg_weight',
|
||||
'temperature',
|
||||
'device',
|
||||
'dtype',
|
||||
'cpu_offload',
|
||||
'chunked',
|
||||
'cache_voice',
|
||||
'tokens_per_slice',
|
||||
'remove_milliseconds',
|
||||
'remove_milliseconds_start',
|
||||
'chunk_overlap_method',
|
||||
'seed',
|
||||
];
|
||||
const getParams = settings => Object.fromEntries(
|
||||
Object.entries(settings).filter(([key]) =>
|
||||
chatterboxParams.includes(key),
|
||||
),
|
||||
);
|
||||
|
||||
const requestBody = {
|
||||
model: settings.model,
|
||||
voice: voiceId,
|
||||
input: inputText,
|
||||
response_format: 'wav',
|
||||
speed: settings.speed,
|
||||
stream: streaming,
|
||||
params: getParams(settings),
|
||||
};
|
||||
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': streaming ? 'no-cache' : undefined,
|
||||
};
|
||||
|
||||
if (streaming) {
|
||||
headers['Cache-Control'] = 'no-cache';
|
||||
}
|
||||
|
||||
const response = await fetch(settings.provider_endpoint, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(
|
||||
`HTTP ${response.status}: ${await response.text()}`,
|
||||
);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
setVolume(volume) {
|
||||
// Clamp volume between 0.0 and 2.0 (0% to 200%)
|
||||
this.currentVolume = Math.max(0, Math.min(2.0, volume));
|
||||
|
||||
// Set volume for regular audio element (non-streaming)
|
||||
this.audioElement.volume = Math.min(this.currentVolume, 1.0); // HTML audio element max is 1.0
|
||||
|
||||
// Set volume for AudioWorklet (streaming)
|
||||
if (this.audioWorkletNode) {
|
||||
this.audioWorkletNode.port.postMessage({ volume: this.currentVolume });
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user