🎨 优化扩展模块,完成ai接入和对话功能

This commit is contained in:
2026-02-12 23:12:28 +08:00
parent 4e611d3a5e
commit 572f3aa15b
779 changed files with 194400 additions and 3136 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,208 @@
import { event_types, eventSource, getRequestHeaders } from '../../../script.js';
import { SECRET_KEYS, secret_state } from '../../secrets.js';
import { getPreviewString, saveTtsProviderSettings } from './index.js';
export { AzureTtsProvider };
class AzureTtsProvider {
//########//
// Config //
//########//
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
defaultSettings = {
region: '',
voiceMap: {},
};
get settingsHtml() {
let html = `
<div class="azure_tts_settings">
<div class="flex-container alignItemsBaseline">
<h4 for="azure_tts_key" class="flex1 margin0">
<a href="https://portal.azure.com/" target="_blank">Azure TTS Key</a>
</h4>
<div id="azure_tts_key" class="menu_button menu_button_icon manage-api-keys" data-key="api_key_azure_tts">
<i class="fa-solid fa-key"></i>
<span>Click to set</span>
</div>
</div>
<label for="azure_tts_region">Region:</label>
<input id="azure_tts_region" type="text" class="text_pole" placeholder="e.g. westus" />
<hr>
</div>
`;
return html;
}
constructor() {
this.handler = async function (/** @type {string} */ key) {
if (key !== SECRET_KEYS.AZURE_TTS) return;
$('#azure_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.AZURE_TTS]);
await this.onRefreshClick();
}.bind(this);
}
dispose() {
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.removeListener(event, this.handler);
});
}
onSettingsChange() {
// Update dynamically
this.settings.region = String($('#azure_tts_region').val());
// Reset voices
this.voices = [];
saveTtsProviderSettings();
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
$('#azure_tts_region').val(this.settings.region).on('input', () => this.onSettingsChange());
$('#azure_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.AZURE_TTS]);
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.on(event, this.handler);
});
try {
await this.checkReady();
console.debug('Azure: Settings loaded');
} catch {
console.debug('Azure: Settings loaded, but not ready');
}
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
if (secret_state[SECRET_KEYS.AZURE_TTS]) {
await this.fetchTtsVoiceObjects();
} else {
this.voices = [];
}
}
async onRefreshClick() {
await this.checkReady();
}
//#################//
// TTS Interfaces //
//#################//
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
voice => voice.name == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
if (!secret_state[SECRET_KEYS.AZURE_TTS]) {
console.warn('Azure TTS API Key not set');
return [];
}
if (!this.settings.region) {
console.warn('Azure TTS region not set');
return [];
}
const response = await fetch('/api/azure/list', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
region: this.settings.region,
}),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
let responseJson = await response.json();
responseJson = responseJson
.sort((a, b) => a.Locale.localeCompare(b.Locale) || a.ShortName.localeCompare(b.ShortName))
.map(x => ({ name: x.ShortName, voice_id: x.ShortName, preview_url: false, lang: x.Locale }));
return responseJson;
}
/**
* Preview TTS for a given voice ID.
* @param {string} id Voice ID
*/
async previewTtsVoice(id) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const voice = await this.getVoice(id);
const text = getPreviewString(voice.lang);
const response = await this.fetchTtsGeneration(text, id);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
const audio = await response.blob();
const url = URL.createObjectURL(audio);
this.audioElement.src = url;
this.audioElement.play();
this.audioElement.onended = () => URL.revokeObjectURL(url);
}
async fetchTtsGeneration(text, voiceId) {
if (!secret_state[SECRET_KEYS.AZURE_TTS]) {
throw new Error('Azure TTS API Key not set');
}
if (!this.settings.region) {
throw new Error('Azure TTS region not set');
}
const response = await fetch('/api/azure/generate', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
text: text,
voice: voiceId,
region: this.settings.region,
}),
});
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
}

View File

@@ -0,0 +1,649 @@
import { saveTtsProviderSettings } from './index.js';
export { ChatterboxTtsProvider };
class ChatterboxTtsProvider {
//########//
// Config //
//########//
settings = {};
constructor() {
// Initialize with default settings
this.settings = {
provider_endpoint: this.settings.provider_endpoint || 'http://localhost:8004',
voice_mode: this.settings.voice_mode || 'predefined',
predefined_voice: this.settings.predefined_voice || 'S1',
reference_voice: this.settings.reference_voice || '',
temperature: this.settings.temperature || 0.8,
exaggeration: this.settings.exaggeration || 0.5,
cfg_weight: this.settings.cfg_weight || 0.5,
seed: this.settings.seed || -1,
speed_factor: this.settings.speed_factor || 1.0,
language: this.settings.language || 'en',
split_text: this.settings.split_text || true,
chunk_size: this.settings.chunk_size || 120,
output_format: this.settings.output_format || 'wav',
voiceMap: this.settings.voiceMap || {},
};
}
ready = false;
voices = [];
separator = '. ';
audioElement = document.createElement('audio');
languageLabels = {
'English': 'en',
'Spanish': 'es',
'French': 'fr',
'German': 'de',
'Italian': 'it',
'Portuguese': 'pt',
'Polish': 'pl',
'Turkish': 'tr',
'Russian': 'ru',
'Dutch': 'nl',
'Czech': 'cs',
'Arabic': 'ar',
'Chinese': 'zh-cn',
'Japanese': 'ja',
'Korean': 'ko',
'Hindi': 'hi',
};
get settingsHtml() {
let html = `<div class="chatterbox-settings-container">
<div class="chatterbox-settings-header">
<h3>Chatterbox TTS Settings</h3>
<div class="status-indicator">
Status: <span id="chatterbox-status" class="offline">Offline</span>
</div>
</div>`;
// Server endpoint
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-endpoint">Server Endpoint:</label>
<input id="chatterbox-endpoint" type="text" class="text_pole" value="${this.settings.provider_endpoint}" />
</div>`;
// Language selection
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-language">Language:</label>
<select id="chatterbox-language">`;
for (let language in this.languageLabels) {
html += `<option value="${this.languageLabels[language]}" ${this.languageLabels[language] === this.settings.language ? 'selected' : ''}>${language}</option>`;
}
html += `</select>
</div>`;
// Generation parameters
html += `<div class="chatterbox-params-section">
<h4>Generation Parameters</h4>`;
// Temperature
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-temperature">Temperature: <span id="chatterbox-temperature-value">${this.settings.temperature}</span></label>
<input id="chatterbox-temperature" type="range" min="0" max="1" step="0.1" value="${this.settings.temperature}" />
</div>`;
// Exaggeration
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-exaggeration">Exaggeration: <span id="chatterbox-exaggeration-value">${this.settings.exaggeration}</span></label>
<input id="chatterbox-exaggeration" type="range" min="0" max="2" step="0.1" value="${this.settings.exaggeration}" />
</div>`;
// CFG Weight
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-cfg-weight">CFG Weight: <span id="chatterbox-cfg-weight-value">${this.settings.cfg_weight}</span></label>
<input id="chatterbox-cfg-weight" type="range" min="0" max="1" step="0.1" value="${this.settings.cfg_weight}" />
</div>`;
// Speed Factor
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-speed">Speed Factor: <span id="chatterbox-speed-value">${this.settings.speed_factor}</span></label>
<input id="chatterbox-speed" type="range" min="0.5" max="2" step="0.1" value="${this.settings.speed_factor}" />
</div>`;
// Seed
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-seed">Seed (-1 for random):</label>
<input id="chatterbox-seed" class="text_pole" type="number" min="-1" value="${this.settings.seed}" />
</div>`;
// Text chunking
html += `<div class="chatterbox-setting-row">
<label class="checkbox_label">
<input type="checkbox" id="chatterbox-split-text" ${this.settings.split_text ? 'checked' : ''} />
Split long texts into chunks
</label>
</div>`;
// Chunk size
html += `<div class="chatterbox-setting-row" id="chunk-size-row" ${!this.settings.split_text ? 'style="display: none;"' : ''}>
<label for="chatterbox-chunk-size">Chunk Size:</label>
<input id="chatterbox-chunk-size" class="text_pole" type="number" min="50" max="500" value="${this.settings.chunk_size}" />
</div>`;
// Output format
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-format">Output Format:</label>
<select id="chatterbox-format">
<option value="wav" ${this.settings.output_format === 'wav' ? 'selected' : ''}>WAV</option>
<option value="opus" ${this.settings.output_format === 'opus' ? 'selected' : ''}>Opus</option>
</select>
</div>`;
html += '</div>'; // End params section
// Footer with links
html += `<div class="chatterbox-footer">
<a href="${this.settings.provider_endpoint}" target="_blank">Chatterbox Web UI</a> |
<a href="https://github.com/devnen/Chatterbox-TTS-Server" target="_blank">Documentation</a>
</div>`;
html += '</div>'; // End container
// Add CSS styles
html += `<style>
.chatterbox-settings-container {
padding: 10px;
}
.chatterbox-settings-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 15px;
}
.chatterbox-settings-header h3 {
margin: 0;
}
.chatterbox-settings-container .status-indicator {
font-weight: bold;
}
#chatterbox-status.ready { color: #4CAF50; }
#chatterbox-status.offline { color: #f44336; }
#chatterbox-status.processing { color: #2196F3; }
.chatterbox-setting-row {
margin-bottom: 10px;
display: flex;
align-items: center;
gap: 10px;
}
.chatterbox-setting-row label {
flex: 0 0 150px;
}
.chatterbox-setting-row label.checkbox_label {
flex-basis: auto;
}
.chatterbox-setting-row input[type="text"],
.chatterbox-setting-row input[type="number"],
.chatterbox-setting-row select {
flex: 1;
}
.chatterbox-setting-row input[type="range"] {
flex: 1;
}
.chatterbox-params-section {
margin-top: 15px;
padding-top: 15px;
border-top: 1px solid #ccc;
}
.chatterbox-params-section h4 {
margin-top: 0;
margin-bottom: 10px;
}
.chatterbox-footer {
margin-top: 15px;
padding-top: 15px;
border-top: 1px solid #ccc;
text-align: center;
font-size: 0.9em;
}
</style>`;
return html;
}
//######################//
// Startup & Initialize //
//######################//
async loadSettings(settings) {
this.updateStatus('Offline');
if (Object.keys(settings).length === 0) {
console.info('Using default Chatterbox TTS Provider settings');
} else {
// Populate settings with provided values
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
}
}
}
// Update UI elements
this.updateUIFromSettings();
console.debug('ChatterboxTTS: Settings loaded');
try {
// Check if TTS provider is ready
await this.checkReady();
if (this.ready) {
// Fetch all voice types for the voice map
await this.fetchTtsVoiceObjects();
this.updateStatus('Ready');
}
this.setupEventListeners();
} catch (error) {
console.error('Error loading Chatterbox settings:', error);
this.updateStatus('Offline');
}
}
updateUIFromSettings() {
$('#chatterbox-endpoint').val(this.settings.provider_endpoint);
$('#chatterbox-language').val(this.settings.language);
$('#chatterbox-temperature').val(this.settings.temperature);
$('#chatterbox-temperature-value').text(this.settings.temperature);
$('#chatterbox-exaggeration').val(this.settings.exaggeration);
$('#chatterbox-exaggeration-value').text(this.settings.exaggeration);
$('#chatterbox-cfg-weight').val(this.settings.cfg_weight);
$('#chatterbox-cfg-weight-value').text(this.settings.cfg_weight);
$('#chatterbox-speed').val(this.settings.speed_factor);
$('#chatterbox-speed-value').text(this.settings.speed_factor);
$('#chatterbox-seed').val(this.settings.seed);
$('#chatterbox-split-text').prop('checked', this.settings.split_text);
$('#chatterbox-chunk-size').val(this.settings.chunk_size);
$('#chatterbox-format').val(this.settings.output_format);
// Show/hide chunk size based on split text
if (this.settings.split_text) {
$('#chunk-size-row').show();
} else {
$('#chunk-size-row').hide();
}
}
//##############################//
// Check Server is Available //
//##############################//
async checkReady() {
try {
const response = await fetch(`${this.settings.provider_endpoint}/api/ui/initial-data`);
if (!response.ok) {
throw new Error(`HTTP Error Response: ${response.status} ${response.statusText}`);
}
const data = await response.json();
// Check if we got valid data
if (data) {
this.ready = true;
console.log('Chatterbox TTS service is ready.');
} else {
this.ready = false;
console.log('Chatterbox TTS service returned invalid data.');
}
} catch (error) {
console.error('Error checking Chatterbox TTS service readiness:', error);
this.ready = false;
}
}
//######################//
// Get Available Voices //
//######################//
async fetchTtsVoiceObjects() {
try {
// Always fetch predefined voices
const predefinedResponse = await fetch(`${this.settings.provider_endpoint}/get_predefined_voices`);
if (!predefinedResponse.ok) {
throw new Error(`HTTP ${predefinedResponse.status}: ${predefinedResponse.statusText}`);
}
const predefinedData = await predefinedResponse.json();
// Transform predefined voices
const predefinedVoices = predefinedData.map(voice => ({
name: voice.display_name,
voice_id: voice.voice_id || voice.filename,
preview_url: null,
lang: voice.language || 'en',
}));
// Always try to fetch reference voices
let referenceVoices = [];
try {
const refResponse = await fetch(`${this.settings.provider_endpoint}/get_reference_files`);
if (refResponse.ok) {
const refData = await refResponse.json();
referenceVoices = refData.map(filename => ({
name: `[Clone] ${filename}`,
voice_id: `ref_${filename}`,
preview_url: null,
lang: 'en',
}));
}
} catch (error) {
console.warn('Failed to fetch reference voices:', error);
}
// Combine all voices
this.voices = [...predefinedVoices, ...referenceVoices];
console.log(`Loaded ${this.voices.length} voices (${predefinedVoices.length} predefined, ${referenceVoices.length} reference)`);
return this.voices;
} catch (error) {
console.error('Error fetching Chatterbox voices:', error);
this.voices = [];
return [];
}
}
// Alias for internal use
async fetchVoices() {
return this.fetchTtsVoiceObjects();
}
//###########################//
// Setup Event Listeners //
//###########################//
setupEventListeners() {
// Server endpoint change
$('#chatterbox-endpoint').on('input', () => {
this.settings.provider_endpoint = $('#chatterbox-endpoint').val();
this.onSettingsChange();
});
// Language
$('#chatterbox-language').on('change', (e) => {
this.settings.language = e.target.value;
this.onSettingsChange();
});
// Parameter sliders
$('#chatterbox-temperature').on('input', (e) => {
this.settings.temperature = parseFloat(e.target.value);
$('#chatterbox-temperature-value').text(this.settings.temperature);
this.onSettingsChange();
});
$('#chatterbox-exaggeration').on('input', (e) => {
this.settings.exaggeration = parseFloat(e.target.value);
$('#chatterbox-exaggeration-value').text(this.settings.exaggeration);
this.onSettingsChange();
});
$('#chatterbox-cfg-weight').on('input', (e) => {
this.settings.cfg_weight = parseFloat(e.target.value);
$('#chatterbox-cfg-weight-value').text(this.settings.cfg_weight);
this.onSettingsChange();
});
$('#chatterbox-speed').on('input', (e) => {
this.settings.speed_factor = parseFloat(e.target.value);
$('#chatterbox-speed-value').text(this.settings.speed_factor);
this.onSettingsChange();
});
// Seed
$('#chatterbox-seed').on('change', (e) => {
this.settings.seed = parseInt(e.target.value);
this.onSettingsChange();
});
// Text splitting
$('#chatterbox-split-text').on('change', (e) => {
this.settings.split_text = e.target.checked;
if (e.target.checked) {
$('#chunk-size-row').show();
} else {
$('#chunk-size-row').hide();
}
this.onSettingsChange();
});
$('#chatterbox-chunk-size').on('change', (e) => {
this.settings.chunk_size = parseInt(e.target.value);
this.onSettingsChange();
});
// Output format
$('#chatterbox-format').on('change', (e) => {
this.settings.output_format = e.target.value;
this.onSettingsChange();
});
}
//#############################//
// Store ST interface settings //
//#############################//
onSettingsChange() {
// Save the updated settings
saveTtsProviderSettings();
}
//#########################//
// Handle Reload button //
//#########################//
async onRefreshClick() {
try {
this.updateStatus('Processing');
await this.checkReady();
if (this.ready) {
await this.fetchTtsVoiceObjects();
this.updateStatus('Ready');
} else {
this.updateStatus('Offline');
}
} catch (error) {
console.error('Error during refresh:', error);
this.updateStatus('Offline');
}
}
//##################//
// Preview Voice //
//##################//
async previewTtsVoice(voiceId) {
try {
this.updateStatus('Processing');
const previewText = 'Hello! This is a preview of the selected voice.';
// Determine if this is a reference voice
let isReferenceVoice = false;
let actualVoiceId = voiceId;
if (voiceId && voiceId.startsWith('ref_')) {
isReferenceVoice = true;
actualVoiceId = voiceId.substring(4); // Remove 'ref_' prefix
}
// Generate preview using the main TTS endpoint
const requestBody = {
text: previewText,
voice_mode: isReferenceVoice ? 'clone' : 'predefined',
temperature: this.settings.temperature,
exaggeration: this.settings.exaggeration,
cfg_weight: this.settings.cfg_weight,
seed: this.settings.seed >= 0 ? this.settings.seed : Math.floor(Math.random() * 2147483648), // Use random seed if -1
speed_factor: this.settings.speed_factor,
language: this.settings.language,
split_text: false, // Don't split for preview
output_format: this.settings.output_format,
};
// Add voice-specific parameters
if (isReferenceVoice) {
requestBody.reference_audio_filename = actualVoiceId;
} else {
requestBody.predefined_voice_id = actualVoiceId;
}
const response = await fetch(`${this.settings.provider_endpoint}/tts`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
// Get the audio blob and play it
const audioBlob = await response.blob();
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
audio.addEventListener('ended', () => {
URL.revokeObjectURL(audioUrl);
this.updateStatus('Ready');
});
await audio.play();
} catch (error) {
console.error('Error previewing voice:', error);
this.updateStatus('Ready');
throw error;
}
}
//#####################//
// Get Voice Object //
//#####################//
async getVoice(voiceName) {
// Ensure voices are loaded
if (this.voices.length === 0) {
await this.fetchTtsVoiceObjects();
}
// Find the voice object by name or voice_id
let match = this.voices.find(voice =>
voice.name === voiceName ||
voice.voice_id === voiceName ||
voice.display_name === voiceName,
);
if (!match) {
console.warn(`Voice not found: ${voiceName}`);
// Check if it's a reference voice that wasn't in the list
if (voiceName && voiceName.startsWith('ref_')) {
const filename = voiceName.substring(4);
return {
name: `[Clone] ${filename}`,
voice_id: voiceName,
preview_url: null,
lang: 'en',
};
}
// Return a default voice object
return {
name: voiceName || 'Default',
voice_id: voiceName || this.settings.predefined_voice || 'S1',
preview_url: null,
lang: 'en',
};
}
return match;
}
//##################//
// Generate TTS //
//##################//
async generateTts(inputText, voiceId) {
try {
this.updateStatus('Processing');
// Determine if this is a reference voice
let isReferenceVoice = false;
let actualVoiceId = voiceId;
if (voiceId && voiceId.startsWith('ref_')) {
isReferenceVoice = true;
actualVoiceId = voiceId.substring(4); // Remove 'ref_' prefix
}
// Prepare the request body
const requestBody = {
text: inputText,
voice_mode: isReferenceVoice ? 'clone' : 'predefined',
temperature: this.settings.temperature,
exaggeration: this.settings.exaggeration,
cfg_weight: this.settings.cfg_weight,
seed: this.settings.seed >= 0 ? this.settings.seed : Math.floor(Math.random() * 2147483648), // Use random seed if -1
speed_factor: this.settings.speed_factor,
language: this.settings.language,
split_text: this.settings.split_text,
chunk_size: this.settings.chunk_size,
output_format: this.settings.output_format,
};
// Add voice-specific parameters
if (isReferenceVoice) {
requestBody.reference_audio_filename = actualVoiceId;
} else {
requestBody.predefined_voice_id = actualVoiceId || this.settings.predefined_voice;
}
console.log('Generating TTS with params:', requestBody);
const response = await fetch(`${this.settings.provider_endpoint}/tts`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache',
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
const errorText = await response.text();
console.error('TTS generation error:', errorText);
throw new Error(`HTTP ${response.status}: ${errorText}`);
}
this.updateStatus('Ready');
// Return the response directly - SillyTavern expects a Response object
return response;
} catch (error) {
console.error('Error in generateTts:', error);
this.updateStatus('Ready');
throw error;
}
}
//######################//
// Update Status //
//######################//
updateStatus(status) {
const statusElement = document.getElementById('chatterbox-status');
if (statusElement) {
statusElement.textContent = status;
statusElement.className = status.toLowerCase();
}
}
}

View File

@@ -0,0 +1,233 @@
import { event_types, eventSource, getRequestHeaders } from '../../../script.js';
import { SECRET_KEYS, secret_state } from '../../secrets.js';
import { getPreviewString, saveTtsProviderSettings } from './index.js';
export { ChutesTtsProvider };
class ChutesTtsProvider {
settings;
voices = [];
models = [];
separator = ' . ';
defaultSettings = {
voiceMap: {},
model: 'kokoro',
speed: 1,
};
get settingsHtml() {
let html = `
<div class="flex-container alignItemsCenter">
<div class="flex1">Chutes TTS API</div>
<div id="chutes_tts_key" class="menu_button menu_button_icon manage-api-keys" data-key="api_key_chutes">
<i class="fa-solid fa-key"></i>
<span>API Key</span>
</div>
</div>
<div class="flex-container flexFlowColumn">
<div class="flex1">
<label for="chutes_tts_model">Model</label>
<select id="chutes_tts_model" class="text_pole"></select>
</div>
<div>
<label for="chutes_tts_speed">Speed <span id="chutes_tts_speed_output"></span></label>
<input type="range" id="chutes_tts_speed" value="1" min="0.25" max="3" step="0.05">
</div>
</div>`;
return html;
}
constructor() {
this.handler = async function (/** @type {string} */ key) {
if (key !== SECRET_KEYS.CHUTES) return;
$('#chutes_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.CHUTES]);
await this.onRefreshClick();
}.bind(this);
}
dispose() {
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.removeListener(event, this.handler);
});
}
onSettingsChange() {
this.settings.model = $('#chutes_tts_model').val();
this.settings.speed = Number($('#chutes_tts_speed').val());
saveTtsProviderSettings();
}
async loadSettings(settings) {
if (Object.keys(settings).length === 0) {
Object.assign(settings, this.defaultSettings);
}
this.settings = settings;
if (!this.settings.voiceMap) {
this.settings.voiceMap = {};
}
// Update UI
$('#chutes_tts_model').val(this.settings.model);
$('#chutes_tts_speed').val(this.settings.speed);
$('#chutes_tts_speed_output').text(this.settings.speed);
$('#chutes_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.CHUTES]);
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.on(event, this.handler);
});
await this.checkReady();
$('#chutes_tts_model').on('change', () => this.onSettingsChange());
$('#chutes_tts_speed').on('input', () => {
const value = $('#chutes_tts_speed').val();
$('#chutes_tts_speed_output').text(String(value));
this.onSettingsChange();
});
}
async checkReady() {
await this.updateModels();
if (this.models.length === 0) {
// No models available
}
await this.updateVoices();
}
async onRefreshClick() {
return await this.checkReady();
}
async updateModels() {
// For Chutes TTS, we always use the Kokoro model currently.
this.models = ['kokoro'];
$('#chutes_tts_model').empty();
$('#chutes_tts_model').append($('<option>').val('kokoro').text('Kokoro'));
$('#chutes_tts_model').val('kokoro');
this.settings.model = 'kokoro';
}
async updateVoices() {
// Kokoro voices list
const kokoroVoices = [
{ id: 'af_alloy', name: 'Alloy (Female)', lang: 'en-US' },
{ id: 'af_aoede', name: 'Aoede (Female)', lang: 'en-US' },
{ id: 'af_bella', name: 'Bella (Female)', lang: 'en-US' },
{ id: 'af_heart', name: 'Heart (Female) - Default', lang: 'en-US' },
{ id: 'af_jessica', name: 'Jessica (Female)', lang: 'en-US' },
{ id: 'af_kore', name: 'Kore (Female)', lang: 'en-US' },
{ id: 'af_nicole', name: 'Nicole (Female)', lang: 'en-US' },
{ id: 'af_nova', name: 'Nova (Female)', lang: 'en-US' },
{ id: 'af_river', name: 'River (Female)', lang: 'en-US' },
{ id: 'af_sarah', name: 'Sarah (Female)', lang: 'en-US' },
{ id: 'af_sky', name: 'Sky (Female)', lang: 'en-US' },
{ id: 'am_adam', name: 'Adam (Male)', lang: 'en-US' },
{ id: 'am_echo', name: 'Echo (Male)', lang: 'en-US' },
{ id: 'am_eric', name: 'Eric (Male)', lang: 'en-US' },
{ id: 'am_fenrir', name: 'Fenrir (Male)', lang: 'en-US' },
{ id: 'am_liam', name: 'Liam (Male)', lang: 'en-US' },
{ id: 'am_michael', name: 'Michael (Male)', lang: 'en-US' },
{ id: 'am_onyx', name: 'Onyx (Male)', lang: 'en-US' },
{ id: 'am_puck', name: 'Puck (Male)', lang: 'en-US' },
{ id: 'am_santa', name: 'Santa (Male)', lang: 'en-US' },
{ id: 'bf_alice', name: 'Alice (British Female)', lang: 'en-GB' },
{ id: 'bf_emma', name: 'Emma (British Female)', lang: 'en-GB' },
{ id: 'bf_isabella', name: 'Isabella (British Female)', lang: 'en-GB' },
{ id: 'bf_lily', name: 'Lily (British Female)', lang: 'en-GB' },
{ id: 'bm_daniel', name: 'Daniel (British Male)', lang: 'en-GB' },
{ id: 'bm_fable', name: 'Fable (British Male)', lang: 'en-GB' },
{ id: 'bm_george', name: 'George (British Male)', lang: 'en-GB' },
{ id: 'bm_lewis', name: 'Lewis (British Male)', lang: 'en-GB' },
{ id: 'ef_dora', name: 'Dora (European Female)', lang: 'es-ES' },
{ id: 'em_alex', name: 'Alex (European Male)', lang: 'es-ES' },
{ id: 'em_santa', name: 'Santa (European Male)', lang: 'es-ES' },
{ id: 'ff_siwis', name: 'Siwis (French Female)', lang: 'fr-FR' },
{ id: 'hf_alpha', name: 'Alpha (Hindi Female)', lang: 'hi-IN' },
{ id: 'hf_beta', name: 'Beta (Hindi Female)', lang: 'hi-IN' },
{ id: 'hm_omega', name: 'Omega (Hindi Male)', lang: 'hi-IN' },
{ id: 'hm_psi', name: 'Psi (Hindi Male)', lang: 'hi-IN' },
{ id: 'if_sara', name: 'Sara (Italian Female)', lang: 'it-IT' },
{ id: 'im_nicola', name: 'Nicola (Italian Male)', lang: 'it-IT' },
{ id: 'jf_alpha', name: 'Alpha (Japanese Female)', lang: 'ja-JP' },
{ id: 'jf_gongitsune', name: 'Gongitsune (Japanese Female)', lang: 'ja-JP' },
{ id: 'jf_nezumi', name: 'Nezumi (Japanese Female)', lang: 'ja-JP' },
{ id: 'jf_tebukuro', name: 'Tebukuro (Japanese Female)', lang: 'ja-JP' },
{ id: 'jm_kumo', name: 'Kumo (Japanese Male)', lang: 'ja-JP' },
{ id: 'pf_dora', name: 'Dora (Portuguese Female)', lang: 'pt-PT' },
{ id: 'pm_alex', name: 'Alex (Portuguese Male)', lang: 'pt-PT' },
{ id: 'pm_santa', name: 'Santa (Portuguese Male)', lang: 'pt-PT' },
{ id: 'zf_xiaobei', name: 'Xiaobei (Chinese Female)', lang: 'zh-CN' },
{ id: 'zf_xiaoni', name: 'Xiaoni (Chinese Female)', lang: 'zh-CN' },
{ id: 'zf_xiaoxiao', name: 'Xiaoxiao (Chinese Female)', lang: 'zh-CN' },
{ id: 'zf_xiaoyi', name: 'Xiaoyi (Chinese Female)', lang: 'zh-CN' },
{ id: 'zm_yunjian', name: 'Yunjian (Chinese Male)', lang: 'zh-CN' },
{ id: 'zm_yunxi', name: 'Yunxi (Chinese Male)', lang: 'zh-CN' },
{ id: 'zm_yunxia', name: 'Yunxia (Chinese Male)', lang: 'zh-CN' },
{ id: 'zm_yunyang', name: 'Yunyang (Chinese Male)', lang: 'zh-CN' },
];
this.voices = kokoroVoices.map(v => ({
name: v.name,
voice_id: v.id,
lang: v.lang,
}));
}
async getVoice(voiceName) {
if (this.voices.length === 0) {
await this.updateVoices();
}
const voice = this.voices.find(v => v.name === voiceName || v.voice_id === voiceName);
return voice || this.voices.find(v => v.voice_id === 'af_heart');
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
async fetchTtsGeneration(text, voiceId) {
const apiKey = secret_state[SECRET_KEYS.CHUTES];
if (!apiKey) {
throw new Error('No Chutes API key found');
}
const response = await fetch('/api/openai/chutes/generate-voice', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
input: text,
voice: voiceId || 'af_heart',
speed: this.settings.speed || 1,
}),
});
if (!response.ok) {
const error = await response.text();
throw new Error(`Chutes TTS failed: ${error}`);
}
return response;
}
async fetchTtsVoiceObjects() {
if (this.voices.length === 0) {
await this.updateVoices();
}
const voiceIds = this.voices
.map(voice => ({ name: voice.name, voice_id: voice.voice_id, preview_url: false }));
return voiceIds;
}
async previewTtsVoice(voiceId) {
const text = getPreviewString(voiceId);
await this.generateTts(text, voiceId);
}
}

View File

@@ -0,0 +1,771 @@
/*
TODO:
- Hide voice map its just confusing
- Delete useless call
*/
import { doExtrasFetch, extension_settings, getApiUrl, modules } from '../../extensions.js';
import { initVoiceMap } from './index.js';
import { POPUP_TYPE, callGenericPopup } from '../../popup.js';
export { CoquiTtsProvider };
const DEBUG_PREFIX = '<Coqui TTS module> ';
let inApiCall = false;
let coquiApiModels = {}; // Initialized only once
let coquiApiModelsFull = {}; // Initialized only once
let coquiLocalModels = []; // Initialized only once
let coquiLocalModelsReceived = false;
/*
coquiApiModels format [language][dataset][name]:coqui-api-model-id, example:
{
"en": {
"vctk": {
"vits": "tts_models/en/vctk/vits"
}
},
"ja": {
"kokoro": {
"tacotron2-DDC": "tts_models/ja/kokoro/tacotron2-DDC"
}
}
}
*/
const languageLabels = {
'multilingual': 'Multilingual',
'en': 'English',
'fr': 'French',
'es': 'Spanish',
'ja': 'Japanese',
};
function throwIfModuleMissing() {
if (!modules.includes('coqui-tts')) {
const message = 'Coqui TTS module not loaded. Add coqui-tts to enable-modules and restart the Extras API.';
// toastr.error(message, { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
throw new Error(DEBUG_PREFIX, message);
}
}
function resetModelSettings() {
$('#coqui_api_model_settings_language').val('none');
$('#coqui_api_model_settings_speaker').val('none');
}
class CoquiTtsProvider {
//#############################//
// Extension UI and Settings //
//#############################//
settings;
defaultSettings = {
voiceMap: {},
customVoices: {},
voiceIds: [],
voiceMapDict: {},
};
get settingsHtml() {
let html = `
<div class="flex wide100p flexGap10 alignitemscenter">
<div>
<div style="flex: 50%;">
<small>To use CoquiTTS, select the origin, language, and model, then click Add Voice. The voice will then be available to add to a character. Voices are saved globally. </small><br>
<label for="coqui_voicename_select">Select Saved Voice:</label>
<select id="coqui_voicename_select">
<!-- Populated by JS -->
</select>
<div class="tts_block">
<input id="coqui_remove_voiceId_mapping" class="menu_button" type="button" value="Remove Voice" />
<input id="coqui_add_voiceId_mapping" class="menu_button" type="button" value="Add Voice" />
</div>
<label for="coqui_model_origin">Models:</label>
<select id="coqui_model_origin">gpu_mode
<option value="none">Select Origin</option>
<option value="coqui-api">Coqui API (Tested)</option>
<option value="coqui-api-full">Coqui API (Experimental)</option>
<option value="local">My Models</option>
</select>
<div id="coqui_api_model_div">
<select id="coqui_api_language">
<!-- Populated by JS and request -->
</select>
<select id="coqui_api_model_name">
<!-- Populated by JS and request -->
</select>
<div id="coqui_api_model_settings">
<select id="coqui_api_model_settings_language">
<!-- Populated by JS and request -->
</select>
<select id="coqui_api_model_settings_speaker">
<!-- Populated by JS and request -->
</select>
</div>
<span id="coqui_api_model_install_status">Model installed on extras server</span>
<input id="coqui_api_model_install_button" class="menu_button" type="button" value="Install" />
</div>
<div id="coqui_local_model_div">
<select id="coqui_local_model_name">
<!-- Populated by JS and request -->
</select>
</div>
</div>
</div>
</div>
`;
return html;
}
async loadSettings(settings) {
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw DEBUG_PREFIX + `Invalid setting passed to extension: ${key}`;
}
}
await initLocalModels();
this.updateCustomVoices(); // Overide any manual modification
$('#coqui_api_model_div').hide();
$('#coqui_local_model_div').hide();
$('#coqui_api_language').show();
$('#coqui_api_model_name').hide();
$('#coqui_api_model_settings').hide();
$('#coqui_api_model_install_status').hide();
$('#coqui_api_model_install_button').hide();
let that = this;
$('#coqui_model_origin').on('change', function () { that.onModelOriginChange(); });
$('#coqui_api_language').on('change', function () { that.onModelLanguageChange(); });
$('#coqui_api_model_name').on('change', function () { that.onModelNameChange(); });
$('#coqui_remove_voiceId_mapping').on('click', function () { that.onRemoveClick(); });
$('#coqui_add_voiceId_mapping').on('click', function () { that.onAddClick(); });
// Load coqui-api settings from json file
await fetch('/scripts/extensions/tts/coqui_api_models_settings.json')
.then(response => response.json())
.then(json => {
coquiApiModels = json;
console.debug(DEBUG_PREFIX,'initialized coqui-api model list to', coquiApiModels);
/*
$('#coqui_api_language')
.find('option')
.remove()
.end()
.append('<option value="none">Select model language</option>')
.val('none');
for(let language in coquiApiModels) {
$("#coqui_api_language").append(new Option(languageLabels[language],language));
console.log(DEBUG_PREFIX,"added language",language);
}*/
});
// Load coqui-api FULL settings from json file
await fetch('/scripts/extensions/tts/coqui_api_models_settings_full.json')
.then(response => response.json())
.then(json => {
coquiApiModelsFull = json;
console.debug(DEBUG_PREFIX,'initialized coqui-api full model list to', coquiApiModelsFull);
/*
$('#coqui_api_full_language')
.find('option')
.remove()
.end()
.append('<option value="none">Select model language</option>')
.val('none');
for(let language in coquiApiModelsFull) {
$("#coqui_api_full_language").append(new Option(languageLabels[language],language));
console.log(DEBUG_PREFIX,"added language",language);
}*/
});
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady(){
throwIfModuleMissing();
await this.fetchTtsVoiceObjects();
}
updateCustomVoices() {
// Takes voiceMapDict and converts it to a string to save to voiceMap
this.settings.customVoices = {};
for (let voiceName in this.settings.voiceMapDict) {
const voiceId = this.settings.voiceMapDict[voiceName];
this.settings.customVoices[voiceName] = voiceId['model_id'];
if (voiceId['model_language'] != null)
this.settings.customVoices[voiceName] += '[' + voiceId['model_language'] + ']';
if (voiceId['model_speaker'] != null)
this.settings.customVoices[voiceName] += '[' + voiceId['model_speaker'] + ']';
}
// Update UI select list with voices
$('#coqui_voicename_select').empty();
$('#coqui_voicename_select')
.find('option')
.remove()
.end()
.append('<option value="none">Select Voice</option>')
.val('none');
for (const voiceName in this.settings.voiceMapDict) {
$('#coqui_voicename_select').append(new Option(voiceName, voiceName));
}
this.onSettingsChange();
}
onSettingsChange() {
console.debug(DEBUG_PREFIX, 'Settings changes', this.settings);
extension_settings.tts.Coqui = this.settings;
}
async onRefreshClick() {
this.checkReady();
}
async onAddClick() {
if (inApiCall) {
return; //TODO: block dropdown
}
// Ask user for voiceId name to save voice
const voiceName = await callGenericPopup('Name of Coqui voice to add to voice select dropdown:', POPUP_TYPE.INPUT);
const model_origin = $('#coqui_model_origin').val();
const model_language = $('#coqui_api_language').val();
const model_name = $('#coqui_api_model_name').val();
let model_setting_language = $('#coqui_api_model_settings_language').val();
let model_setting_speaker = $('#coqui_api_model_settings_speaker').val();
if (!voiceName) {
toastr.error('Voice name empty, please enter one.', DEBUG_PREFIX + ' voice mapping voice name', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
this.updateCustomVoices(); // Overide any manual modification
return;
}
if (model_origin == 'none') {
toastr.error('Origin not selected, please select one.', DEBUG_PREFIX + ' voice mapping origin', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
this.updateCustomVoices(); // Overide any manual modification
return;
}
if (model_origin == 'local') {
const model_id = $('#coqui_local_model_name').val();
if (model_name == 'none') {
toastr.error('Model not selected, please select one.', DEBUG_PREFIX + ' voice mapping model', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
this.updateCustomVoices(); // Overide any manual modification
return;
}
this.settings.voiceMapDict[voiceName] = { model_type: 'local', model_id: 'local/' + model_id };
console.debug(DEBUG_PREFIX, 'Registered new voice map: ', voiceName, ':', this.settings.voiceMapDict[voiceName]);
this.updateCustomVoices(); // Overide any manual modification
return;
}
if (model_language == 'none') {
toastr.error('Language not selected, please select one.', DEBUG_PREFIX + ' voice mapping language', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
this.updateCustomVoices(); // Overide any manual modification
return;
}
if (model_name == 'none') {
toastr.error('Model not selected, please select one.', DEBUG_PREFIX + ' voice mapping model', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
this.updateCustomVoices(); // Overide any manual modification
return;
}
if (model_setting_language == 'none')
model_setting_language = null;
if (model_setting_speaker == 'none')
model_setting_speaker = null;
const tokens = $('#coqui_api_model_name').val().split('/');
const model_dataset = tokens[0];
const model_label = tokens[1];
const model_id = 'tts_models/' + model_language + '/' + model_dataset + '/' + model_label;
let modelDict = coquiApiModels;
if (model_origin == 'coqui-api-full')
modelDict = coquiApiModelsFull;
if (model_setting_language == null & 'languages' in modelDict[model_language][model_dataset][model_label]) {
toastr.error('Model language not selected, please select one.', DEBUG_PREFIX + ' voice mapping model language', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
return;
}
if (model_setting_speaker == null & 'speakers' in modelDict[model_language][model_dataset][model_label]) {
toastr.error('Model speaker not selected, please select one.', DEBUG_PREFIX + ' voice mapping model speaker', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
return;
}
console.debug(DEBUG_PREFIX, 'Current custom voices: ', this.settings.customVoices);
this.settings.voiceMapDict[voiceName] = { model_type: 'coqui-api', model_id: model_id, model_language: model_setting_language, model_speaker: model_setting_speaker };
console.debug(DEBUG_PREFIX, 'Registered new voice map: ', voiceName, ':', this.settings.voiceMapDict[voiceName]);
this.updateCustomVoices();
initVoiceMap(); // Update TTS extension voiceMap
let successMsg = voiceName + ':' + model_id;
if (model_setting_language != null)
successMsg += '[' + model_setting_language + ']';
if (model_setting_speaker != null)
successMsg += '[' + model_setting_speaker + ']';
toastr.info(successMsg, DEBUG_PREFIX + ' voice map updated', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
return;
}
async getVoice(voiceName) {
let match = await this.fetchTtsVoiceObjects();
match = match.filter(
voice => voice.name == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found in CoquiTTS Provider voice list`;
}
return match;
}
async onRemoveClick() {
const voiceName = $('#coqui_voicename_select').val();
if (voiceName === 'none') {
toastr.error('Voice not selected, please select one.', DEBUG_PREFIX + ' voice mapping voiceId', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
return;
}
// Todo erase from voicemap
delete (this.settings.voiceMapDict[voiceName]);
this.updateCustomVoices();
initVoiceMap(); // Update TTS extension voiceMap
}
async onModelOriginChange() {
throwIfModuleMissing();
resetModelSettings();
const model_origin = $('#coqui_model_origin').val();
if (model_origin == 'none') {
$('#coqui_local_model_div').hide();
$('#coqui_api_model_div').hide();
}
// show coqui model selected list (SAFE)
if (model_origin == 'coqui-api') {
$('#coqui_local_model_div').hide();
$('#coqui_api_language')
.find('option')
.remove()
.end()
.append('<option value="none">Select model language</option>')
.val('none');
for(let language in coquiApiModels) {
let languageLabel = language;
if (language in languageLabels)
languageLabel = languageLabels[language];
$('#coqui_api_language').append(new Option(languageLabel,language));
console.log(DEBUG_PREFIX,'added language',languageLabel,'(',language,')');
}
$('#coqui_api_model_div').show();
}
// show coqui model full list (UNSAFE)
if (model_origin == 'coqui-api-full') {
$('#coqui_local_model_div').hide();
$('#coqui_api_language')
.find('option')
.remove()
.end()
.append('<option value="none">Select model language</option>')
.val('none');
for(let language in coquiApiModelsFull) {
let languageLabel = language;
if (language in languageLabels)
languageLabel = languageLabels[language];
$('#coqui_api_language').append(new Option(languageLabel,language));
console.log(DEBUG_PREFIX,'added language',languageLabel,'(',language,')');
}
$('#coqui_api_model_div').show();
}
// show local model list
if (model_origin == 'local') {
$('#coqui_api_model_div').hide();
$('#coqui_local_model_div').show();
}
}
async onModelLanguageChange() {
throwIfModuleMissing();
resetModelSettings();
$('#coqui_api_model_settings').hide();
const model_origin = $('#coqui_model_origin').val();
const model_language = $('#coqui_api_language').val();
console.debug(model_language);
if (model_language == 'none') {
$('#coqui_api_model_name').hide();
return;
}
$('#coqui_api_model_name').show();
$('#coqui_api_model_name')
.find('option')
.remove()
.end()
.append('<option value="none">Select model</option>')
.val('none');
let modelDict = coquiApiModels;
if (model_origin == 'coqui-api-full')
modelDict = coquiApiModelsFull;
for(let model_dataset in modelDict[model_language])
for(let model_name in modelDict[model_language][model_dataset]) {
const model_id = model_dataset + '/' + model_name;
const model_label = model_name + ' (' + model_dataset + ' dataset)';
$('#coqui_api_model_name').append(new Option(model_label, model_id));
}
}
async onModelNameChange() {
throwIfModuleMissing();
resetModelSettings();
$('#coqui_api_model_settings').hide();
const model_origin = $('#coqui_model_origin').val();
// No model selected
if ($('#coqui_api_model_name').val() == 'none') {
$('#coqui_api_model_install_button').off('click');
$('#coqui_api_model_install_button').hide();
return;
}
// Get languages and speakers options
const model_language = $('#coqui_api_language').val();
const tokens = $('#coqui_api_model_name').val().split('/');
const model_dataset = tokens[0];
const model_name = tokens[1];
let modelDict = coquiApiModels;
if (model_origin == 'coqui-api-full')
modelDict = coquiApiModelsFull;
const model_settings = modelDict[model_language][model_dataset][model_name];
if ('languages' in model_settings) {
$('#coqui_api_model_settings').show();
$('#coqui_api_model_settings_language').show();
$('#coqui_api_model_settings_language')
.find('option')
.remove()
.end()
.append('<option value="none">Select language</option>')
.val('none');
for (let i = 0; i < model_settings['languages'].length; i++) {
const language_label = JSON.stringify(model_settings['languages'][i]).replaceAll('"', '');
$('#coqui_api_model_settings_language').append(new Option(language_label, i));
}
}
else {
$('#coqui_api_model_settings_language').hide();
}
if ('speakers' in model_settings) {
$('#coqui_api_model_settings').show();
$('#coqui_api_model_settings_speaker').show();
$('#coqui_api_model_settings_speaker')
.find('option')
.remove()
.end()
.append('<option value="none">Select speaker</option>')
.val('none');
for (let i = 0; i < model_settings['speakers'].length; i++) {
const speaker_label = JSON.stringify(model_settings['speakers'][i]).replaceAll('"', '');
$('#coqui_api_model_settings_speaker').append(new Option(speaker_label, i));
}
}
else {
$('#coqui_api_model_settings_speaker').hide();
}
$('#coqui_api_model_install_status').text('Requesting model to extras server...');
$('#coqui_api_model_install_status').show();
// Check if already installed and propose to do it otherwise
const model_id = modelDict[model_language][model_dataset][model_name]['id'];
console.debug(DEBUG_PREFIX,'Check if model is already installed',model_id);
let result = await CoquiTtsProvider.checkmodel_state(model_id);
result = await result.json();
const model_state = result['model_state'];
console.debug(DEBUG_PREFIX, ' Model state:', model_state);
if (model_state == 'installed') {
$('#coqui_api_model_install_status').text('Model already installed on extras server');
$('#coqui_api_model_install_button').hide();
}
else {
let action = 'download';
if (model_state == 'corrupted') {
action = 'repare';
//toastr.error("Click install button to reinstall the model "+$("#coqui_api_model_name").find(":selected").text(), DEBUG_PREFIX+" corrupted model install", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
$('#coqui_api_model_install_status').text('Model found but incomplete try install again (maybe still downloading)'); // (remove and download again)
}
else {
toastr.info('Click download button to install the model ' + $('#coqui_api_model_name').find(':selected').text(), DEBUG_PREFIX + ' model not installed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
$('#coqui_api_model_install_status').text('Model not found on extras server');
}
const onModelNameChange_pointer = this.onModelNameChange;
$('#coqui_api_model_install_button').off('click').on('click', async function () {
try {
$('#coqui_api_model_install_status').text('Downloading model...');
$('#coqui_api_model_install_button').hide();
//toastr.info("For model "+model_id, DEBUG_PREFIX+" Started "+action, { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
let apiResult = await CoquiTtsProvider.installModel(model_id, action);
apiResult = await apiResult.json();
console.debug(DEBUG_PREFIX, 'Response:', apiResult);
if (apiResult['status'] == 'done') {
$('#coqui_api_model_install_status').text('Model installed and ready to use!');
$('#coqui_api_model_install_button').hide();
onModelNameChange_pointer();
}
if (apiResult['status'] == 'downloading') {
toastr.error('Check extras console for progress', DEBUG_PREFIX + ' already downloading', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
$('#coqui_api_model_install_status').text('Already downloading a model, check extras console!');
$('#coqui_api_model_install_button').show();
}
} catch (error) {
console.error(error);
toastr.error(error, DEBUG_PREFIX + ' error with model download', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
onModelNameChange_pointer();
}
// will refresh model status
});
$('#coqui_api_model_install_button').show();
return;
}
}
//#############################//
// API Calls //
//#############################//
/*
Check model installation state, return one of ["installed", "corrupted", "absent"]
*/
static async checkmodel_state(model_id) {
throwIfModuleMissing();
const url = new URL(getApiUrl());
url.pathname = '/api/text-to-speech/coqui/coqui-api/check-model-state';
const apiResult = await doExtrasFetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache',
},
body: JSON.stringify({
'model_id': model_id,
}),
});
if (!apiResult.ok) {
toastr.error(apiResult.statusText, DEBUG_PREFIX + ' Check model state request failed');
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
}
return apiResult;
}
static async installModel(model_id, action) {
throwIfModuleMissing();
const url = new URL(getApiUrl());
url.pathname = '/api/text-to-speech/coqui/coqui-api/install-model';
const apiResult = await doExtrasFetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache',
},
body: JSON.stringify({
'model_id': model_id,
'action': action,
}),
});
if (!apiResult.ok) {
toastr.error(apiResult.statusText, DEBUG_PREFIX + ' Install model ' + model_id + ' request failed');
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
}
return apiResult;
}
/*
Retrieve user custom models
*/
static async getLocalModelList() {
throwIfModuleMissing();
const url = new URL(getApiUrl());
url.pathname = '/api/text-to-speech/coqui/local/get-models';
const apiResult = await doExtrasFetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache',
},
body: JSON.stringify({
'model_id': 'model_id',
'action': 'action',
}),
});
if (!apiResult.ok) {
toastr.error(apiResult.statusText, DEBUG_PREFIX + ' Get local model list request failed');
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
}
return apiResult;
}
// Expect voiceId format to be like:
// tts_models/multilingual/multi-dataset/your_tts[2][1]
// tts_models/en/ljspeech/glow-tts
// ts_models/ja/kokoro/tacotron2-DDC
async generateTts(text, voiceId) {
throwIfModuleMissing();
voiceId = this.settings.customVoices[voiceId];
const url = new URL(getApiUrl());
url.pathname = '/api/text-to-speech/coqui/generate-tts';
let language = 'none';
let speaker = 'none';
const tokens = voiceId.replaceAll(']', '').replaceAll('"', '').split('[');
const model_id = tokens[0];
console.debug(DEBUG_PREFIX, 'Preparing TTS request for', tokens);
// First option
if (tokens.length > 1) {
const option1 = tokens[1];
if (model_id.includes('multilingual'))
language = option1;
else
speaker = option1;
}
// Second option
if (tokens.length > 2)
speaker = tokens[2];
const apiResult = await doExtrasFetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache',
},
body: JSON.stringify({
'text': text,
'model_id': model_id,
'language_id': parseInt(language),
'speaker_id': parseInt(speaker),
}),
});
if (!apiResult.ok) {
toastr.error(apiResult.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
}
return apiResult;
}
// Dirty hack to say not implemented
async fetchTtsVoiceObjects() {
const voiceIds = Object
.keys(this.settings.voiceMapDict)
.map(voice => ({ name: voice, voice_id: voice, preview_url: false }));
return voiceIds;
}
// Do nothing
previewTtsVoice(id) {
return;
}
async fetchTtsFromHistory(history_item_id) {
return Promise.resolve(history_item_id);
}
}
async function initLocalModels() {
if (!modules.includes('coqui-tts'))
return;
// Initialized local model once
if (!coquiLocalModelsReceived) {
let result = await CoquiTtsProvider.getLocalModelList();
result = await result.json();
coquiLocalModels = result['models_list'];
$('#coqui_local_model_name').show();
$('#coqui_local_model_name')
.find('option')
.remove()
.end()
.append('<option value="none">Select model</option>')
.val('none');
for (const model_dataset of coquiLocalModels)
$('#coqui_local_model_name').append(new Option(model_dataset, model_dataset));
coquiLocalModelsReceived = true;
}
}

View File

@@ -0,0 +1,190 @@
{
"multilingual": {
"multi-dataset": {
"your_tts": {
"id": "tts_models/multilingual/multi-dataset/your_tts",
"languages": [
"en",
"fr-fr",
"pt-br"
],
"speakers": [
"female-en-5",
"female-en-5\n",
"female-pt-4\n",
"male-en-2",
"male-en-2\n",
"male-pt-3\n"
]
}
}
},
"en": {
"ljspeech": {
"tacotron2-DDC": {
"id": "tts_models/en/ljspeech/tacotron2-DDC"
},
"glow-tts": {
"id": "tts_models/en/ljspeech/glow-tts"
},
"speedy-speech": {
"id": "tts_models/en/ljspeech/speedy-speech"
},
"vits": {
"id": "tts_models/en/ljspeech/vits"
}
},
"vctk": {
"vits": {
"id": "tts_models/en/vctk/vits",
"speakers": [
"ED\n",
"p225",
"p226",
"p227",
"p228",
"p229",
"p230",
"p231",
"p232",
"p233",
"p234",
"p236",
"p237",
"p238",
"p239",
"p240",
"p241",
"p243",
"p244",
"p245",
"p246",
"p247",
"p248",
"p249",
"p250",
"p251",
"p252",
"p253",
"p254",
"p255",
"p256",
"p257",
"p258",
"p259",
"p260",
"p261",
"p262",
"p263",
"p264",
"p265",
"p266",
"p267",
"p268",
"p269",
"p270",
"p271",
"p272",
"p273",
"p274",
"p275",
"p276",
"p277",
"p278",
"p279",
"p280",
"p281",
"p282",
"p283",
"p284",
"p285",
"p286",
"p287",
"p288",
"p292",
"p293",
"p294",
"p295",
"p297",
"p298",
"p299",
"p300",
"p301",
"p302",
"p303",
"p304",
"p305",
"p306",
"p307",
"p308",
"p310",
"p311",
"p312",
"p313",
"p314",
"p316",
"p317",
"p318",
"p323",
"p326",
"p329",
"p330",
"p333",
"p334",
"p335",
"p336",
"p339",
"p340",
"p341",
"p343",
"p345",
"p347",
"p351",
"p360",
"p361",
"p362",
"p363",
"p364",
"p374",
"p376"
]
}
},
"jenny": {
"jenny": {
"id": "tts_models/en/jenny/jenny"
}
}
},
"es": {
"mai": {
"tacotron2-DDC": {
"id": "tts_models/es/mai/tacotron2-DDC"
}
},
"css10": {
"vits": {
"id": "tts_models/es/css10/vits"
}
}
},
"fr": {
"mai": {
"tacotron2-DDC": {
"id": "tts_models/fr/mai/tacotron2-DDC"
}
},
"css10": {
"vits": {
"id": "tts_models/fr/css10/vits"
}
}
},
"ja": {
"kokoro": {
"tacotron2-DDC": {
"id": "tts_models/ja/kokoro/tacotron2-DDC"
}
}
}
}

View File

@@ -0,0 +1,870 @@
{
"multilingual": {
"multi-dataset": {
"your_tts": {
"id": "tts_models/multilingual/multi-dataset/your_tts",
"languages": [
"en",
"fr-fr",
"pt-br"
],
"speakers": [
"female-en-5",
"female-en-5\n",
"female-pt-4\n",
"male-en-2",
"male-en-2\n",
"male-pt-3\n"
]
},
"bark": {
"id": "tts_models/multilingual/multi-dataset/bark"
}
}
},
"bg": {
"cv": {
"vits": {
"id": "tts_models/bg/cv/vits"
}
}
},
"cs": {
"cv": {
"vits": {
"id": "tts_models/cs/cv/vits"
}
}
},
"da": {
"cv": {
"vits": {
"id": "tts_models/da/cv/vits"
}
}
},
"et": {
"cv": {
"vits": {
"id": "tts_models/et/cv/vits"
}
}
},
"ga": {
"cv": {
"vits": {
"id": "tts_models/ga/cv/vits"
}
}
},
"en": {
"ek1": {
"tacotron2": {
"id": "tts_models/en/ek1/tacotron2"
}
},
"ljspeech": {
"tacotron2-DDC": {
"id": "tts_models/en/ljspeech/tacotron2-DDC"
},
"tacotron2-DDC_ph": {
"id": "tts_models/en/ljspeech/tacotron2-DDC_ph"
},
"glow-tts": {
"id": "tts_models/en/ljspeech/glow-tts"
},
"speedy-speech": {
"id": "tts_models/en/ljspeech/speedy-speech"
},
"tacotron2-DCA": {
"id": "tts_models/en/ljspeech/tacotron2-DCA"
},
"vits": {
"id": "tts_models/en/ljspeech/vits"
},
"vits--neon": {
"id": "tts_models/en/ljspeech/vits--neon"
},
"fast_pitch": {
"id": "tts_models/en/ljspeech/fast_pitch"
},
"overflow": {
"id": "tts_models/en/ljspeech/overflow"
},
"neural_hmm": {
"id": "tts_models/en/ljspeech/neural_hmm"
}
},
"vctk": {
"vits": {
"id": "tts_models/en/vctk/vits",
"speakers": [
"ED\n",
"p225",
"p226",
"p227",
"p228",
"p229",
"p230",
"p231",
"p232",
"p233",
"p234",
"p236",
"p237",
"p238",
"p239",
"p240",
"p241",
"p243",
"p244",
"p245",
"p246",
"p247",
"p248",
"p249",
"p250",
"p251",
"p252",
"p253",
"p254",
"p255",
"p256",
"p257",
"p258",
"p259",
"p260",
"p261",
"p262",
"p263",
"p264",
"p265",
"p266",
"p267",
"p268",
"p269",
"p270",
"p271",
"p272",
"p273",
"p274",
"p275",
"p276",
"p277",
"p278",
"p279",
"p280",
"p281",
"p282",
"p283",
"p284",
"p285",
"p286",
"p287",
"p288",
"p292",
"p293",
"p294",
"p295",
"p297",
"p298",
"p299",
"p300",
"p301",
"p302",
"p303",
"p304",
"p305",
"p306",
"p307",
"p308",
"p310",
"p311",
"p312",
"p313",
"p314",
"p316",
"p317",
"p318",
"p323",
"p326",
"p329",
"p330",
"p333",
"p334",
"p335",
"p336",
"p339",
"p340",
"p341",
"p343",
"p345",
"p347",
"p351",
"p360",
"p361",
"p362",
"p363",
"p364",
"p374",
"p376"
]
},
"fast_pitch": {
"id": "tts_models/en/vctk/fast_pitch",
"speakers": [
"VCTK_p225",
"VCTK_p226",
"VCTK_p227",
"VCTK_p228",
"VCTK_p229",
"VCTK_p230",
"VCTK_p231",
"VCTK_p232",
"VCTK_p233",
"VCTK_p234",
"VCTK_p236",
"VCTK_p237",
"VCTK_p238",
"VCTK_p239",
"VCTK_p240",
"VCTK_p241",
"VCTK_p243",
"VCTK_p244",
"VCTK_p245",
"VCTK_p246",
"VCTK_p247",
"VCTK_p248",
"VCTK_p249",
"VCTK_p250",
"VCTK_p251",
"VCTK_p252",
"VCTK_p253",
"VCTK_p254",
"VCTK_p255",
"VCTK_p256",
"VCTK_p257",
"VCTK_p258",
"VCTK_p259",
"VCTK_p260",
"VCTK_p261",
"VCTK_p262",
"VCTK_p263",
"VCTK_p264",
"VCTK_p265",
"VCTK_p266",
"VCTK_p267",
"VCTK_p268",
"VCTK_p269",
"VCTK_p270",
"VCTK_p271",
"VCTK_p272",
"VCTK_p273",
"VCTK_p274",
"VCTK_p275",
"VCTK_p276",
"VCTK_p277",
"VCTK_p278",
"VCTK_p279",
"VCTK_p280",
"VCTK_p281",
"VCTK_p282",
"VCTK_p283",
"VCTK_p284",
"VCTK_p285",
"VCTK_p286",
"VCTK_p287",
"VCTK_p288",
"VCTK_p292",
"VCTK_p293",
"VCTK_p294",
"VCTK_p295",
"VCTK_p297",
"VCTK_p298",
"VCTK_p299",
"VCTK_p300",
"VCTK_p301",
"VCTK_p302",
"VCTK_p303",
"VCTK_p304",
"VCTK_p305",
"VCTK_p306",
"VCTK_p307",
"VCTK_p308",
"VCTK_p310",
"VCTK_p311",
"VCTK_p312",
"VCTK_p313",
"VCTK_p314",
"VCTK_p316",
"VCTK_p317",
"VCTK_p318",
"VCTK_p323",
"VCTK_p326",
"VCTK_p329",
"VCTK_p330",
"VCTK_p333",
"VCTK_p334",
"VCTK_p335",
"VCTK_p336",
"VCTK_p339",
"VCTK_p340",
"VCTK_p341",
"VCTK_p343",
"VCTK_p345",
"VCTK_p347",
"VCTK_p351",
"VCTK_p360",
"VCTK_p361",
"VCTK_p362",
"VCTK_p363",
"VCTK_p364",
"VCTK_p374",
"VCTK_p376"
]
}
},
"sam": {
"tacotron-DDC": {
"id": "tts_models/en/sam/tacotron-DDC"
}
},
"blizzard2013": {
"capacitron-t2-c50": {
"id": "tts_models/en/blizzard2013/capacitron-t2-c50"
},
"capacitron-t2-c150_v2": {
"id": "tts_models/en/blizzard2013/capacitron-t2-c150_v2"
}
},
"multi-dataset": {
"tortoise-v2": {
"id": "tts_models/en/multi-dataset/tortoise-v2"
}
},
"jenny": {
"jenny": {
"id": "tts_models/en/jenny/jenny"
}
}
},
"es": {
"mai": {
"tacotron2-DDC": {
"id": "tts_models/es/mai/tacotron2-DDC"
}
},
"css10": {
"vits": {
"id": "tts_models/es/css10/vits"
}
}
},
"fr": {
"mai": {
"tacotron2-DDC": {
"id": "tts_models/fr/mai/tacotron2-DDC"
}
},
"css10": {
"vits": {
"id": "tts_models/fr/css10/vits"
}
}
},
"uk": {
"mai": {
"glow-tts": {
"id": "tts_models/uk/mai/glow-tts"
},
"vits": {
"id": "tts_models/uk/mai/vits"
}
}
},
"zh-CN": {
"baker": {
"tacotron2-DDC-GST": {
"id": "tts_models/zh-CN/baker/tacotron2-DDC-GST"
}
}
},
"nl": {
"mai": {
"tacotron2-DDC": {
"id": "tts_models/nl/mai/tacotron2-DDC"
}
},
"css10": {
"vits": {
"id": "tts_models/nl/css10/vits"
}
}
},
"de": {
"thorsten": {
"tacotron2-DCA": {
"id": "tts_models/de/thorsten/tacotron2-DCA"
},
"vits": {
"id": "tts_models/de/thorsten/vits"
},
"tacotron2-DDC": {
"id": "tts_models/de/thorsten/tacotron2-DDC"
}
},
"css10": {
"vits-neon": {
"id": "tts_models/de/css10/vits-neon"
}
}
},
"ja": {
"kokoro": {
"tacotron2-DDC": {
"id": "tts_models/ja/kokoro/tacotron2-DDC"
}
}
},
"tr": {
"common-voice": {
"glow-tts": {
"id": "tts_models/tr/common-voice/glow-tts"
}
}
},
"it": {
"mai_female": {
"glow-tts": {
"id": "tts_models/it/mai_female/glow-tts"
},
"vits": {
"id": "tts_models/it/mai_female/vits"
}
},
"mai_male": {
"glow-tts": {
"id": "tts_models/it/mai_male/glow-tts"
},
"vits": {
"id": "tts_models/it/mai_male/vits"
}
}
},
"ewe": {
"openbible": {
"vits": {
"id": "tts_models/ewe/openbible/vits"
}
}
},
"hau": {
"openbible": {
"vits": {
"id": "tts_models/hau/openbible/vits"
}
}
},
"lin": {
"openbible": {
"vits": {
"id": "tts_models/lin/openbible/vits"
}
}
},
"tw_akuapem": {
"openbible": {
"vits": {
"id": "tts_models/tw_akuapem/openbible/vits"
}
}
},
"tw_asante": {
"openbible": {
"vits": {
"id": "tts_models/tw_asante/openbible/vits"
}
}
},
"yor": {
"openbible": {
"vits": {
"id": "tts_models/yor/openbible/vits"
}
}
},
"hu": {
"css10": {
"vits": {
"id": "tts_models/hu/css10/vits"
}
}
},
"el": {
"cv": {
"vits": {
"id": "tts_models/el/cv/vits"
}
}
},
"fi": {
"css10": {
"vits": {
"id": "tts_models/fi/css10/vits"
}
}
},
"hr": {
"cv": {
"vits": {
"id": "tts_models/hr/cv/vits"
}
}
},
"lt": {
"cv": {
"vits": {
"id": "tts_models/lt/cv/vits"
}
}
},
"lv": {
"cv": {
"vits": {
"id": "tts_models/lv/cv/vits"
}
}
},
"mt": {
"cv": {
"vits": {
"id": "tts_models/mt/cv/vits"
}
}
},
"pl": {
"mai_female": {
"vits": {
"id": "tts_models/pl/mai_female/vits"
}
}
},
"pt": {
"cv": {
"vits": {
"id": "tts_models/pt/cv/vits"
}
}
},
"ro": {
"cv": {
"vits": {
"id": "tts_models/ro/cv/vits"
}
}
},
"sk": {
"cv": {
"vits": {
"id": "tts_models/sk/cv/vits"
}
}
},
"sl": {
"cv": {
"vits": {
"id": "tts_models/sl/cv/vits"
}
}
},
"sv": {
"cv": {
"vits": {
"id": "tts_models/sv/cv/vits"
}
}
},
"ca": {
"custom": {
"vits": {
"id": "tts_models/ca/custom/vits",
"speakers": [
"00236e350cc84b94a6684f182acf96e68963d7fa1164d4fa56da20f46f210b2dd3ecf189e97fb3c94113a54c12dc20550508f5b7b9b37e1873898d58a308feb5",
"00459",
"00762",
"00983a845f95493fb27125b114c635f3b40060efaee167d32d8a3dd040c877713446c7bd3e6944641227bdb4165ecb8d684ec2ef66c817e65e77c52cc50e62ed",
"01591",
"02452",
"02689",
"02992",
"02f7d61edf5063ca42953b1068539f1572985aa9448555cfd8d7667121eeedc72c912d95cf33abf61a1f9620f2a01be4251a53aa5440d15849003fb31210d830",
"03115",
"03386",
"03655",
"03944",
"04247",
"04484",
"04787",
"04910",
"05147",
"056d7638d714a7dc1efe1c47d390d0659fbfdfc7df5249e8bfe10ba346cc76d5cda93fc8ecbeadffd4924c4f9cfb6b32c1739c8af1e2d58d7cec88b2cf18795f",
"05739",
"06008",
"06042",
"06279",
"06311",
"06582",
"06705",
"06942",
"06c6d2e093624103c268e2cba37466147fd564bff1312a78d1c5be9ba168af4cf4819c7a91d5321d7aa9bd20ad6c702ca2cb005496dd20c45d293200b2b8a7b9",
"07140",
"07245",
"07803",
"08001",
"08106",
"085503e68b0772f1b3aa4de86a57bb26e3750660e7929a14a653c729787a110cc8b3704f8ea09842f72be46b6ffbb35bdb3732308b31dceefc3b33e5ad3f7975",
"08664",
"08935",
"08967",
"09204",
"09598",
"09901",
"0befb1084ad00d656f45a87ad83f074c61e3b3767cf6f5463fd5bc199ab7fd4733c5f02e3a100359e953977cc2a2689bd4824ef6e3178a7108cb45a0204fb3cb",
"0c6bf67821762116d753c9b48ebed8a2ccfa0a956d5dbf19feb0ac0bc2096154ca288ae7c5e324a3092db395cc24c64c6a4e4fb0e01429b7343cafc7ac1b2e13",
"0d0a943d348b4f0948da443c4d020b2e690731955ce8c318c0fb72663cfec3cd3458488ff9ff9cee6d221c85771b8eb83cc087dda37d4109bbb1614039e5f565",
"0da83aed14276e120e2581be32891bb088a22c272feb6f03b4bac1b827cccdbc8fee277a885f58e98931819e0d6171526c5fa7b2e788f68a2852e4d5314f613b",
"0ff19536d6147f61b24d50c0c993a7a687df4d253c2052e4fa30b1624c87e60075649d888f51ce71318fb8789cb378879091aba020256d66ac19f024833c3e63",
"125d9d1721de26a9b89d0e4f4d386e07458d287ebb2f338879e8886847abe6b3209f79e2bc335bcfc437350184df5a7d9e1a08ffb5239674edfd1cf95a9d1e24",
"1378866a4d2b6965c03eed8e48e03fffd089638acdf1fa82ed20a9856406e083f0c0e1f5043c4d3bf67dbc383f7cd28b602eff1d8bd8bf8c1a0191dc98540322",
"14bc32c10eb26503a4e799c3a762bfe5949d7a232074e854ecfad8139acaa4257c563a502e06a263f2d5fa8337114a9741d4a634a0f914adae74e5f9a80f145f",
"151fcb1168f41a51c49e20b426605109e1d9dd50be2926aa9878040fe325eba2f2c470357a735e9e24b7412e78e2550a8fbd0ea77fed80e4d8f50a21f2064948",
"1610e29603954ee12f408eaeb83a5c99781b5efe5f64fdf2e3c5e0ba9756b5b11bfeefda40a787842f7d5da653135043e3f43e7f786499cb51ad6181ef8acc9d",
"1887c37f4187a4c3213ba4b58d0ef15f903a3720ee94d5ce59c33db193f60db416a4d9607f213ae44ca3eafc7217fb9871e2ee1837ad4cc0f34794e5c543f9f5",
"1add23d44d2d913f0ea6e061fe292b0563653fa15b4ecc9cce0c2b83dc5a743d6d3dd4625efa112f751852c348a667a9456ef3486ae7e8c5954dedc69f998ea2",
"1b7fc0c4e437188bdf1b03ed21d45b780b525fd0dc3900b9759d0755e34bc25e31d64e69c5bd547ed0eda67d104fc0d658b8ec78277810830167c53ef8ced24b",
"1b8354b1fe9255578225b3d2255d5e781eed7d13ab61e84bc08be5d6465ec468c533563137ca756fcb7d3759af0ac2c0b4d00873782c7bf47ea72fd9be2f9e8c",
"1be6c773da6334cc73e23312689bc8a5915529c905e1d5289dbfe00332a7dcb9ae97efe209635e2e5040783777409155926d231a5a5f76357494671512d2b1a4",
"1c7af1cc1357fd63bd9ffe915745e20c34588438e1e0d85fdc8c9de4b3bd41d3d61b318b6a69862c6d64dd41f15ef3d994a6bb6c9a9dac69c891308b09ab16a5",
"1c7f19a7fa0b166c700bac583b6858ce7adbe19566d66e530953273aae59776757aebaeb30c20a58d74bc50ce1345516af5bbd36168f443fed809bf54c02f63c",
"1c80e9d982aa0c12db8498e8275b2281e638e8e5c684a752e19f8f9842979b70a3624785d0c30e0e57112950cae5e892bf554c295c74cf8c82c8ec33c732d8c6",
"2256cc5ee6c617347af9a1abd97dfe80f55e6691eb0a042321d46a1bd7ce0baf1c7a4c8ea3fe4184f8638b1c3d9e83b6aa193bd6f9b49d5358523f1fe324cd92",
"238532dddf77923ce93cf2e9ed809d088094106a1aad327e8a7b229ce24a339771e59478f7d5162efc1da6f347b44cbb2273ac9154aa3a76c7a8fc458470cc2b",
"241ca4fdf2124f550657446301fb8dfc8bdef46d3888ce39bf9d8622c2bbec7e06b198f5e33fadbf30e477fdb37435cea36d10341af1a7d3a80d0ad1caa94bf9",
"2421aa51a089ecfe45250bf284d5690a9994a9eb03f2ba4f43d2ad73fe78783ae5f3d3088d772e01fd1d747b2ecd6bb1bfae5feb10a72130d3952ba7304d5c53",
"24d967d0e8b84beb3652417724be81ab83c7834afaa7b7d3d7d9591b1a2a7bb75f9b25be548a200570ebd6cc34e91306b675af510ef91cd34a77060b65b9faaa",
"25911630ab15956e81427d3e990cf37f79490e305914a15ca7dd7b95dd4d4feb15fd94549cc005376801ce68d637eab6e19ee36017dba5c01bd0f206e5e8dc3b",
"26099adbc4db8fcf000e2c7d1da3399662281f9af03831808d29c602431af4fc13f21b38c5c42c5ac4f77ece48448eef99f735d92cdaed857d19da7dd2b888ad",
"28e2fe1944a593c44c3de0dc52971f040f0b8901fced2057025bdbafa1fe3b042be19618044ae085d7364e3cb38601e9ce4030329f15af7a0898f9d4c2c5014f",
"2b59e9f830e5ea00c500b63eff4e72553e0c2608f5741d35c226e733400412014d7697bd6efb67bc61b19fd61e40f9ed70fde2589fe0b5498915eebc1c8b5d93",
"2bc2a177bf56dcc98e05501e7bc6eebd3d1662114764299a4f9e6b060a48095b8ec95d20a5814f71343d65ee3cd2e7f42a80faa51a148005242cc5073e605ba4",
"2ce84c6ea6aae52c449b6d34cbc095b2f2c3e6fa20d0e48b2f7d223724ae01375e92a8ad106b029f0562ee735de36d9bca6cd167257c3f68796bd8b1a0ab600f",
"2d84f39c2cca33dd28fee650caa022c9a06407462342fae8dc256af7904cdd114af5b4cc883181407b8dcf2dc4a93d45c62c83a317d84e876cf710a521f20d80",
"2e6ccdf9f0a7bf0df6fd0572bbb53f25378fc5333b352bc885b3a0d01e5dd672156cf697c127cc998ac546d122c547c421970a6f23dccb60bf2c841146fa6576",
"2f92b4704080216aa80b1b39cfa223feeb9ed7c909f5b77be1b6e45fdc8827463bc4c4cd98263f02b57e653ecc2ccc7192aedd92990113433077e7ed44eb1e0a",
"2fb95c3b786fa65215534207266e034b294317b2327ee0928be3436258e42db8f4479e86e6006979ec4438dbaa9daa05be21ddc66717d30dc43e36ead349965e",
"30b1f81c579755895581259d79a8a5a3ca45b908b0bd14ad1c6418f39aa1e2f47cb4749c69b5440cdb92e3bafb772e19e7bc2b16d196b061addd173a1309e491",
"31535cb2ece4710d08fdbeefb6f8f75ed093fee4cf8573bd601d960f8c6156f0fd0a85712761691e86e31160b993ee0eacb10c4c8aed000cc394cf7c7d207a7e",
"31e6f3a011661320b2e59b6f8be43f6db2243e9feabc2b9787c1413788e13eb0e5810bed983bf7ff66e46417d183a91ed50b3b9be9d89e4f51aada72293b9881",
"32550810ba55b9a67a25d308f0ede521f12cbf6076472ff5bd60a8f5e951c481b784e2f04194fb96116c4f001d84b3993b2c580879671de46333d5f212ff2ca5",
"336f82b4645b80c99137018e69bb6f8138a9c8dc05a510e36922503120648625674e1414cd90d0cf46f28cbd5993ae0eaedc9994b72e8eb5242737ddefc0bfb2",
"35b962b08846ed7d8a4cc47582a4e607f5ff4136042ce0b1adb55d1e8d58e2dc1abc5807b3601a7f7be1ada5939e1771e128fc916c1b5d39ed3619e251707952",
"3637902e0d19f0080313c14d2c9dcde800ec6b71d493459c2f3b2cebf186f028ea289dd59ba1fd4705e53891216f7f4c36dbcb8938aeeaf142317b441b20a837",
"3723bd65a05afc7411c2bfca904742062b7b0c081ba126e68c65d28eaa6122f69196f4959fc1795fe03f8e49ec7364863911f9b659684a06b3a126c6f1729551",
"373d86f9fa3a127372dd913b7571ed318bfea42173b2b7daebde93c742f3224fb7fe5306085e836d20bfee4201bfe070b4c6b36510f5c9f379f6a3b610f36cd0",
"379d321bff71ebcd34792e8f4552d341f30a006b4765f8c6de4fa98d3ca416def88adbcb0253f5849f51793b3d7ca7e53700ec70b5a97e84ccd1f35a2a1fb6e5",
"37c12c700c95dc0028b3b82c4cf1fb922d68680c35fe84585bf22674e71c4dc53bd9233ce8b71bd31e9c9b0e000d01d195a3572b9055a73fbec891b1ade250cf",
"3a4a32c7cff18f1896e7cbff2c19b4e6f91a95c1e7aab616722600ceb36a86b07fb0e1e0c70cc285dfb6192b53cb67826698b7f3f652549e27a969bde0177fa7",
"404ecea5ae8e5f4ec3d2c48494cf7f1d559268542d8f1f7928da2fcde55c9fa3f491ba632f555ec69e8c9e819072df450add7e5886cf5527f446b11544af7d05",
"41e5e21b3a3b0c8df01ba5b3c3e6224cc4082f41ca87679344b0273e2216cc272e19426c160f5a9580915c057a3e4000788be6cc7a6f5f346cad5068c7884ce7",
"464d9ac63f7958200bc09a141171355bf4f3631d66dc4bbfabd497619a8f055c034c0752987944b2102e02d4b435bcd3ce0527962871112049e1d26865b776e9",
"4869d94d4936ab700c5e5bc7b666177b53220082f5f221774b5625d7275cd4f117482dcd1498674b7f885fa41d86f99b8d00b6a6f641829780946651f561fc22",
"496b66c9cb705a46cdfef9eeaf29c9d738a4b70b601270985a7df5a06f9e1d6c56be0982995c8cc06902d0ee89bae201c37a91f568331ffe28ad2d150e183fed",
"49a7654071536ed5882b8b6e6d2e3558ef796ecd8aab8ceaa24ad8bc9f3420b528ef1413696584c11facd6d5bccd37fe8e274b8c3d139dc251ffb11c3a503aaa",
"4b6c7e4e9bde35c471cbf5e2e93b2eb8bbba52b710acecf99910af08b3b35365f24d883ddfdd9825918c31477a5f3fc48f075080c4e97e80fecd6e1936bc92eb",
"4bce212aca40bd1834bf741e47954526a8817ecbff8fedda854dbfc2d033a2567bd34b84fa02c3d07855f3dcf413590ae75ad6edf261d66bffb84d77803a7b76",
"4cedaa8d96436fd0d2ebdb61d616790a3cb3737d0a93d2ae41d588137c0d3339999d991b7b3c452704be1f5f512ce5a08c0971898fad0ad77f18fd623411cd7f",
"4d7e2548403c7e04d809030aa25015c9706e773517e1f72b81bdda22213aeb8f542cc62156bc5ef1c1622e99227fedbcc9c1b3e5e147b854e3b629f8f78bd158",
"4de9f262eee7ee7d24ef8933af4610a1c5b97ff055c4fd0f97868e338a017308d460f4b003b74bd2aba7789153593f3b986b814fd93f2e4dfa5b55594fb17c55",
"4e5e58a6ec7d9cac969f99b817f981ab7f8d2cbd9ab9dd0a37e45c70a8a8ca3b8e1c43b2013082062ffc1f4f3b268ea78ebb88d613d026a6312f40a6867a1d0b",
"4ec8f1e81d7abd9d2dcb3dbd4be86b615f643386f3b1098c37a02a103fe6b36239c05bff6746ce568ca81765b285c1c271af4fb1fd99120341cae2851b776bbb",
"4f57d1abde3364d91128e682ba724e6d3bc2ed6b112d2cd679739e478ea6bd671c527edff64c6a7b5c1173f68e02a410f09c2256356fde7d517908310c118382",
"503dbbe83f0154e9bba4bc685bf1c1fbdd27293d0e4f837947910e4d320bc4d5bed1ade67a45b541013189a2c133f6f9f6cbc3566fad220c0635f286feec74c5",
"51795e8ea8faa28e88f02559f6bdd47d9a0735589d47dd0f2e057b8b01fd3667fd9fd29e2613f200174af1d4b2d3d0860704cebebf2b6e79f1724d6782d7a270",
"52cfac480c0cbc60068305d983adbf98814d2cfddb8be0ccfeb7c7f95bdaf31a5f70da944cc2453e6a5fbb9bb4092e36b662e838762855fb016f55e6e3d957c1",
"537e815df93312978a9ba479ff2dffc9975c875950a203e8a1b7ffe4cb06625964f59dde1a06b87921a2a91702cc6bab04e159aec7cb2e2fc576cbe25838df2b",
"547dd49c2cbe113b60c9df4a8e8b83a532f0da054cea8f1d23db66cc2638f7b5edfee820b4764646be10dbcd05caa5d71483477718a73ce8dfc752204807d9e5",
"54f344faa37da0c9ab1ab563735c532ab81fcd1c8431cd8eca4ab7a8774f194e1ffe922547ea42bd1fad36e7493761992eca4821138ded1a9580e9fa38685291",
"56071bfe30e977f201fa4d6808f8d7c2f3e6788ba68d12e2eb18386ac2507bb2bbe3c14bab90370066bbf6e2af42afcf1e45b362dba958d38fbc69cdea3874ea",
"57e5f7cc5fac058f7c772eb41f8d49bd0fe3070c41eef445b1c073abf9b1cec451aa22764490b7da4c5bcacf4ee453c3153158cb1569f2f9447807cb14dc1126",
"5a9a6481f1365def2919871790a95fdccbae145640f3b4b5e11d1d1370ed35c5a4c31e402b3b438892a6bfd9dfbbe2fc97056d2cc24f2ac412b3a5e1adb7003a",
"5ba168675a3f2ea8d6d51896c5db84ee59ca65359b1b97e6d79543a6c918fe427f8b6cec79037c452eb086debe1d57049c25481d61a873f0503703266bf0cb84",
"5da56ed896575439b7bbca20981f0b50618958d94f08b8f47d13774dca3990d4c571be1f4aa2786bf8fcbd1a594336be49cc26d972d5fb0c0682ab4d5b59d19e",
"5ebf04dfec6c9b10a6fe7fd03725901973565a13530c20d02b4332670cce9beab185dd0b0f61f4a87f9c3dacc307d06e062d640f6550d9443a4c06a114e5bca5",
"620b0d4c3be90f5f77f0cf9f976e5d7f067689884dc857f2b26a6edb40ef4fd2826213b5028900b168e853d036f1741600b236e04d8dcae5fa26cd2b8975ac04",
"6323ec0401b28c1b06afa76760b478535101ae48c6c9367491087143287d9ff76b9c00f39dc838cdb20d65eab16622dc85143f5845791bf85705cb4f20975bbe",
"633e7303eae41420e558e186308510783f5c234e9c639c0e6f5b6d37fca6bab766c5d475b2f330910bac93cef6982124e73a1b6bab1a2e99a2e5d797f8547c6d",
"6688b60c24d068e19487c0b88a8b0a256854d8090ebebfa9a462fe49a77b8e9f303aa02042069cb0d6f227932cb48863758d0b57a18d53125ad39953bac543aa",
"6745c47d0bd557c3dbce201697e8a2fbaff9ca52744d6007a636a237b82d1167795a0c0e2e5eb71b7460ed16e3fdcdac1dba1b7a2910d5168416e236c93ccb76",
"6892c6ba9f66d0d7aa0445139081dc82a76d9ef8c7bb049a8eaf090f76c06cb4f1db05739038d7e04167569bb6d0fea55fc15343f7c77cd5a3e2d4c5ed068290",
"689a213fd2d66b9d3634c9165b316e49ec53ac96131be42226d462ef1bc3ba38651e94698fd6e6f5c6d6c834d2b9a6732be54a8d6273c1025511d795326ffd3e",
"696e8808717101399ab7ff16382db411adfadcd60c6a525539b0f8f88d84b448662fbca212b175379ff78ce7b2e64aa4b4e96d1820ade8eb2f742295f744db7c",
"6bdec6b6f7e6b5a187feb6537101d90cca1043e34d53e347f2f0b14e701585361fc4a4cd81577b6d4588844fc8bdba8af66155d9eb6c2eefd461e23d0b2b87e4",
"6e5948f904b3048511677d23d3cc9bd678739b234170302e1556c1bd1db8cee4243bf5e012a1320b4c50f6276e05cc5f620c461f640ac7413c23524f63f4aac3",
"7115c00371f891d0094a716083b978948431509a16d5a9598e78ec12712db46d46f1674312cd31339e2d6118cca5f7a3f82ec25dce861a059ee31d832cd6dcda",
"71b67ba5ec75978632136441a25426dbd48d4c0a55c1a5fc91f0f952b6bac06ab0d9709f0a7bb5a05393499135b76e4d722c7065fb636a227ba58c7fb86438eb",
"72a3d5bde83f60653937232cf4d29218ff5988533855fdbc804d9bea7e94eb14a8afbe36a8f8ee576a3ed2345632d4ad36df52efdd9adbdb60da6f890074c6b6",
"73d3685f3e78183724e3362f6c4288d522b54a8d2722197dc5ff5006974c1529dc562c1cbb05f023da4922cfc04340eb83b887c5343041febed1ddc44b22f9ed",
"74a679bf6c4a1b5856a25780496812416383f0567afcbd9b411ae9a0abab47d466741bd925b03decd7da586c6ea9589c8f40208ac2a22fa4413d4ea6e1a6f0dd",
"7638395f7d47fbf631633e2b899044e82e7ce0e07305114921cb0696551966b09993766782aaad70fa40a0f7362be31940381653c659fb73d3e1fd1fa45c257d",
"76383f56d9979837d4b3348f9f28877dd1ba58c1bd0ea839bdabf021428c2edfba46ff25558004c5183a73575eb126d4e0746a40e22ab15154d5d6f238a48ca5",
"77cd12af0a3d1d8cb64dd577bd2d50ac057d816694e8bc04089a6adb90e53ad6cbe9fe6aaf52596450e0c8178d8f9b88a545b27adcbc89bddf4d7c4bc4dd31fd",
"7834da277192e9434b0c039272ee6b3f1b225d2f975aa175fee762fb0d5f16b1edba2e0dbf11d8aa2bab5984482f703f88f0e9d1786a7687710ea2688f307ccd",
"79a830901c1bb0e27663dbe14d13df91d887daf0eabb6d3eee7f09768212afcd9cdde458d13042a9d2aa099f390c79ab94f2c1ca47fe0321f6c18973e437cdd6",
"7b7593f44cc6f9f7b21495bca6f3d564f73f36b97ee15d51a783da8141463834022996c55e494800d21304079aefa8a5fe64350c9273e0d36453b097b2dcc5f4",
"7c7d917d97412c24b76af336086469a43013d1d6b27298aa82c4e99b3f3b6c5a82014428a6a14b080a834382d9b0f178e405fb10170bcd340957955087698e19",
"7d19dccf48114d3ec00c45fe80581300faca042157d6c9458ec439c300d8c7b1190aa70eecf19f8b1d5af8c7291f3da08fc635a7fd6acc7c5b203d1e226589f5",
"7d8d6fa22ff724d823b82499686732b7fbf32f7c1f35dd5733ec3b65fa9625cf2d49bba86e6e0132252bea64074f35ff96a77bfa44441aed3fc1765b13cbc526",
"7e36be2204fe367a3798e1b2ff988779890591e5c997b1f6025ec8ee1fef3eb19e81b74bb8657874f5a990d5062d6c849621ce363c4a9c2c5a63c0966be6140c",
"7ff908cc2a18ec5a80e74fb4a2f12b406f0b7456ac797d35091d618c7ee991baa88edf62200817aa27732b03d9109cbdc6603092822b2e13a575953045b1cd0a",
"8154716e77acd0f5e912887facffc7b2c9889891e863a39fcfed1e5637e47328a4a3bf40bbac1e740629d3013304ada88cf24dbf3735a7aa2d4b855f813c8fd1",
"8162d651b6211f06f655a69cd7fdd383d6b4287e9ba132b9898ef9ac8687349e777626333d23bed93f9264aae965efb14ed650cb64fd0ad90494aff903eaef11",
"8348c81a253096a9def0b472a8499fc03ef8c6c6d3cc9b4a018f142501ebd04c2479008b88895e033eb83978e7d71e52a91a2e324ca869ed8f2724dfdcef269e",
"84b101db8d076398c1d624a8b38b22fbddbfa8fcc43ade44619f5a9b6e70daf1c963d6dc09ea039cca94ec56ccfd04a1689e806c970c0bd32cc9e56b73c7bd7b",
"853fb95e0f017c203e08312e3ccf45c0419928e08313b1bb0444aa4ff089550546e67fdd3434a22cae3f67603437051e49be5c4d8fc5583b1aa6a1ae36f0a911",
"85c9e13ccfc0d67de10281b04257d8ac0c256d2f9415e54148fb59954c0d43f66d3cbea43ea6389f8407a8bda8b1b1becd30e41dfbb3dd9bebbe69816d096fa9",
"85ea0b349a8df04283c62efb571d2947e7264b566883e300501086733b08efa42ced215bc47951c8198626f86ca8c0df730cdc35f4d99ffff958599884b68e51",
"88673d4f24d039e89c15d9ede6b653e41e42ca8bd7a8cb7e92a4f235e9b56cbad6200f8dc313c644e9a7d8d1dbc2b7e988da93bc0765499701bca6bc86d8fe3d",
"88ec4ff5a1b0ffdabfe62d068286c851ee64c428883e56f32af14b59756d5846be9d46e5a777c4c22f2dec9596a1a44ed3aa75f1fb0231923cbd4ab59f1f9c47",
"892bf89bd3a008a7d982de0d278349e654c713efacf965e88e46a12398375cd8502711378e378c39c33b2f995f47799760c6e6e05948b93c0d2b9fd427854ffc",
"894bd433b4b06514195a604961c871649e108d210a41d5cbebe76f78cb6270b7708d1c59d6cb88807f882bb154be1c9058a753e2b6a95c3f4ac9e27a02036f12",
"896256329fbeb5b8116349c31d8a39a7d36d5f970d48558e1db5417d611e240e4dbf473f6e49137f7aa6116394b7deabb0bbec4a014896cdc9484ee91458117d",
"897c3401b4a35d8fad5966bf8c4dce6d94837c76e46e8131a8bd70527f6e1d8c9f59053d0a56425d7dee71939280ac3c38df14e976f613cb906d7187d6141297",
"89e6f6a865ab743936a9b29d53b67bf4b68660ccbe834d4a11fa9011edb535e3b7b4d7a238c84971d4cb5f06ef7398bfecc4f2b786200fee67d7307f242da565",
"8b707d4f8f32c80709d880fb257873915033c7d5bce9589a80ca9437618262c55dbe8eec2e8c82469bc335a84a8f16f89afcc53b8329dced5407a513927efc4c",
"8e98d00c5d110856943461cd85305b0a817abb457c2afc8e89edb32e502d0060081c8e667d9fceb63a2f8efbcb6e193e9b0231afcc05ecb2303d7f742f304396",
"90bb7c91281bb6625a0700c1ee2f3cee488cb9c1864ccf2e24699c5d957b1b7b686574d11acb37572fdf18a15f272fd44009b6cfce9b6cdf9025dd5002869d30",
"911c26cf828319df5123a9cf38641704961a6b894aa6ee2b0d13409996a93d89f4868b91e0eb1efea907a70a14cf3a3bd8935033aeb03bd8555f2dea857a48bd",
"92862e616dce7469bafc507ab8fbb47bb6f5ca8b96b05e9fbf39a259d1d4c4cac97b0472f713db2e5fbff0d3e587e7b34bedff80cc2a70c446becf9b488370d9",
"92a15e2cbd0c89fbce36b05e3b282255097bb5492fc11f0d2b0a08c4311621a41ec35df201de51523b62189a3b44bb3eb1cbdf64e80f4a543d0d9f9a99f9bd3f",
"97679def7032179662646816abc12f74fc693fb02c43675a2d5407e58be6dacd1eb483d1bf46f66c5103de3a649211c29e1127dca473e13b02dcd5e7df719cc0",
"97e29f9edfe712b059203de5af236569e2c41fae8cddfc7b486204d6e30c411ec605c757fa5a1a151646092bd5d71de18a5f2d8b6fb74b9a28a7c7226a4f641b",
"9b5f9ebc961424b8a6b7def59a86ad6fa6e45fc9ad5fb251c15d4d09202e6d3f63bb37b80faa4fdfe3997182079988d78556a9ebf7db535951a1e3cba0c0f6c9",
"9b847b5006ea1b47dc0ec366d09aec4a67aec747c55af554c094994fe8c8625b09cfd5322958c816bea74f725abb3d1403f2e9336007db3b257949401b1fef03",
"9cdf4ab91c8ef6148dfd724f2a2c644cc00df44f5eea5035e760ac59ec79078ffaf3d97a9c5a9747c04895a3dc666339f82cd17e40095b9fd055df3ff07da6d6",
"9fb127fbe4659174b52ef61778a705cc5a96c8f136445bd28c10ac79398ab9ea291852b627e285e828fe37aa23d05b13cf202f3f0cb4c272aba94dd1806802c9",
"9fe6ba948da2f4e4aa0e1b0d3e1aec1f093335f8097d7dd3d6b5217cd539f5c41735ef7a615d8210f2e6b777b7198f151264ade172be7dbbf5d442bf91843e8f",
"a1afb2eae49546bf59e6f9a1968287add54dd6e336ec795037090a435f736b6d8ba2076e05e27034979a8caaeafce05fc6d9d5541f4e5a4321e64106dabd1549",
"a2b06b54679145e65ce10a8356285efcadbacd41be817d2e0858ddba59e638775b79f76cb9e4ac5859627b67ebf227c55b51cc48e6d0d7ef41c9845d96ded68a",
"a2b503bc78bd0b68fcdc3e3b68e3c68cf3da8d2d48d91f09313c7cdc11b43dd4d4de3a8a2c4b526809adf9879427c4818db72cffdbc2f0015a9fa5ade83bd400",
"a359c15185b6d2a402dacfb7b3dc2e3ce5fd80a1add892b2dcf8e23bebe57f16680eebf7a851c3a870d3ba9932c4e42bad937c4676931d849c62f021ba812860",
"a35dea43a67cbd18b705cf2b28114652686eb409c1ae1e56c04256fd902ba9ab52c7343bb8b162522bf3442da42431246644432c70f819ba8617a723abcce836",
"a4b1eb406ff2c349437a5634148365fd0eecad5a264036e3af171d0f6769a7129590a0a3e09592038baa8bc1292af2bbdbfb74e3b1a685844e263532a87baef6",
"a4b8fa949865e0aa45147a27f0a034a26e34745d624dcf0603dd25fd1ce279eaf2d073a853d67e6432447d5e06708d71a9cddac0d2918876d2d3498af3ae0892",
"a6bc3c6beffd4335228c3b4857365215f0c4bc5197a5b0eca95334af33dba19ebf8d513f6c75359d7cb678b051d96579d73ebdaa5b6906e3b6eab35005bee13e",
"aabfdbdc21150ac70f9bb1a34f4d7de570a72ba7e1afd8c08d64c85e00c12e6ca1f2ffe60dbd16a871987bd7aa47182baf57e7f68daaf0bac7fc3b907c8ef4c0",
"af506d21ee140905c125e61c19d04599354fe84fe211502c9c766951387f6ff79e80db0658392af173f37ef7c92d7815ac9214e8ba4c6ade3e7a7ef014e5cc08",
"b04a1d5062f2921f39074e4f5c00675269195834a0a9c0bcce10b1427bf8a6499bdd7d8c6717f220aa4ec9f590bb04b290673018528a60dd819ce9798b0a33b4",
"b0a3c5148905a3e7e18c773684026e4ccd8811c3c62f6fcfc23135686a8db9c2caa6de7b14775e29b7cdeb360ae25ea626381c7689ade892c3fb72f82e2daa89",
"b1a0cbb91459433ff6de32b189783a734c2ada4c04d7dd164de449ce79c749d382aff10aa9ed7b4449af3390da51585123ef88719ecf7cfea9c24223023a23dd",
"b47a96b489f4dd851c364dee278699905f1ed933ba3a98a6660160463a8decef830bb91ac0a1b4f9b742df2dfbdc9625ec27133a69f6cf3cb81ed298183764e7",
"b52e493e5049e86223385546f3407f5924fd75311a0a11af38423b7bb7c02c3f085fd1d9188515c7b43c59fbf168c23126456dba98dc9c0d29b7a3edee159015",
"b5419f6ea89dc32431a7671df1ebf934647bba5b27db54235fb1e47d691b70c3160bf8019653d5faad616b169adfea5d8e7077e9820d9294144354133d45ee16",
"b570d19edbda421e0975056b5fdf4cefbc3825b840aacaa337567ec1aa151a81633eb645a86c8c1c22b23e7f916c60c20cb115de29b670511fb9413611e8cc3b",
"baff09432cffceac6ecd395a8ed5c947fdafe6c30c1c0f3b83c4ddfaa2ca9d57b21876153ac2b82067d7d37bc6789e2f68558f1f26fbcd53fd6a500124f80655",
"bc0b544f1c13cc1d0fe15b0eab96e89e6d4dfc8919de1fb757ef97a7d5de9efff5e520def5a8471b75480fd49d410d222ed9332089bd527946c74070e8ad1934",
"bc3886ba087d3fd637a4fa85adf33170e23b369c0c6eca422ddb26c73c04ae467e2b95ed73bead19013001af65bf2cf0d686a6e702b458a77068184c8b17dfb5",
"bd609b6955a6a35a5580a6e19e173b02fa6d4ed880b6cba8fb5d2fd91309dc753326a824a47ee6148b3d6a01b9b49ce7c1122b1e30b6ea181bd257bbc38c2940",
"bet",
"bf64f21ff129fae4bf3ff795c39df0a4a6dc40ece1d71747a913dd84af2e4cac4e1b84213e23cb1397b3299f26b1b6302a3cdbd41da8baea2505febd6e1803ce",
"bfe8d96ce71f9cce7bd16b5282041c66773405f1a11f4f0c8d3b6e81646f262bdac0cb3ee8f54e13175ba9ed7da38407e8a9aeff20972271f0c62c0b19f8b644",
"c088e98f02d33581ac0d79c37a101e4273e0750a5691cffd96a09c38742617dae948cbc4affbff4ece1d611e44ea5539f0597eef33ef39f7f0e3ec2a5edf75eb",
"c1bafe50eb70a1b65188fac549c6bbe7f641b672fbe9fd08cb64ed1f176efbedeca88f5c295d508e2dbf9b495fe0040bbbfbc4776af0d6cad6576a997db3e4cc",
"c1e166044d7731207ce8b838011eae84814857a8ddb63b8a393d2497bdcd7e96d045aa229a7978533646cf9f9ea99a619943599d47a1558073690601fb486ad5",
"c21ee36416076c1929dd93af7e936e371d4fe263662a2deb8fd6b0e5cd5b8cd86437b4afb2faa8813bd7b8689c7f56a63729a1e666684d8303f469faad669e54",
"c3f1018eb1f7b5e5c0210deab309d06d3e8e9e15ec7dd41d2dbcf863c39e36955b2034fe44af5a4983285b8fc6c0d92b092f95383f8989c1d75a40a4bcdd3d83",
"c4d740361d5f6bdcf408abc029d8adceb35f06c332c46fc290d187d96562992a8d6caa562eaa21643c346d44c9e706cd991ba986e53cfe37b41a0e048d14d6e0",
"c5d4c712e06053bc35bc6cef173daaaae7fd47db5ac812b95a2f0f08374432ffeaa2b49a0f10cb60f38405d2459489df0e43fb73b48bdb6caadcb4405915c33e",
"c777d3358a0aff067b64f254ac462fa223a1650af20ce2af341de610eebbb55a128a1dc43c91da7a1844848b5920b7dd5c5e0a1e8651d6442a2418709dad8c87",
"c96c4e97012d25add2fe69513a5b1f941fc36c837737780c443203c72182b808a129982ebd64aaffb8eda4ba3c8787fd98ca55fd33f060f63917567446417574",
"c9774fae6c0a30b456a21005abf026799f370a12fbcbc098e81bac2456955320ec6e712f1d6f9d59a50d615f81c6284785292180364598987a7990ae83c0f0c9",
"cb557116fa7b3b6da35024b539795d9e255c111c06edbf0e77ba728dd352353182c96918c649fb9327bbb4fea1bb25affcade9b5069676b191611062941356e7",
"cc3b30ba0f733abfe64667838f620c4f542db4665fa68e4d945b75ac0d2c435e6529e6541c4ac8ca18dec753b10e3a5c4614cfbc658dc951ab6cab357e6ef363",
"ccd85fb40538f948396a4c2bf381ea591927a7cde9330ecab883cad5bd59db56f0c983362f9d0a8e88a67d3f2bae2182bc8ea94b4e3adc721c782ca5c801e2af",
"cd1226e73c8275de15f2edb3744a413277fc76a4ebb7842fb743215c14b405b96c4e64bc8324feafe58937da218a1b0aeb9451d5781672ced1ad68c31eb54ba1",
"cdc5df38351edbdf7afdb3aaf0b4f53253cedbf3f43d662548a432f86389505fd6f2f64f51f951355f4fcfc5718a98dd782e1472246556c87f0bbaacebb38cb8",
"ce31dc5dfa61834e3ab67925ff5f24baf04b4aee6e35cd8ffa524f87b2e2e094999f85c68cc7a1c0e9b19016d050c1755406d02f7116ef85afa355c65a9a5855",
"cefa12e7ac99a5d11df487ab6521837b11165246d1c3cdb2108770532cb1429c2dcba5262a4dbd9a37686bb76ad1c48ddecf473d807c2e552534b24bb78ee30d",
"cf5b890eb74b4ac647d011a989a92a413c23c0db580c87057fc5afba2d83dd861f2a8640fb952381d090328d6278dbe56713d516020ce95cfb6d4fecf63b89e7",
"cf8c583b1282449a97b72e317e56d5a4d1432e5420148a21ba8fd8bb2a172c7832379f30cd6582bd6674b548deb8517c8915c5c4b423bd3e73903f71b8862380",
"d0cd44fcdae652efb0dd428cd1b8f1911e6eb2ca3469a1f2d6f9faf97a9d05e30f28387dfb81bfb4c97eba64187a0c047c85bf06998ccaec58781f3982626bb6",
"d15bfc3278de168872744ebec8fc7a07678bd04b7557e89749eeedc7087fe0a36cb8b094e978e979d67feba46c4a2741f0fab18010796b5ff436836a5fc67e88",
"d3d64ab67746fcb7b4a37d6b6b80c9d4b11afd9e15d81a60b3fde53e4f99267a63b50cfb2184c7c84c9f0dd4345c0d929160a7df52698a82603c112e0bf8ab8e",
"d647b73602a3a0c1b06f282a612c29eefc6a7e372bc8af212a41f481843c23a975b41ca402f06ecb7dc660d4dd22a814f7659b48da7dfd28c02a319032394da1",
"d98d182c89b465adb0fdd1cc5c2bcb22b81fcc4eb941977b667de22927ccc9a7876033008118957d803c83afb95595986bcc076e77483dd55dca91ce253ba010",
"dafd89491990553f5e22021f96344b3bc92be6a419c919ba78860876f226e51e668dbabcb11cf9500f3bd05582b387907ea007b5e8f37c78fb71ac819b9bc20e",
"db6932752693a1b2e7ef9af4adbf6fc8a299f21965ff9ff52b141563a471600df9308a89562af7b664b7fe14da134b4f44beafcf910f8794652e16dc475796b5",
"db8eecd1ac9b20918e31f04331e46007f367c1f6365c9c4abb7af70eb1d2ea12174375fb95d1d11c46e03c81976de6d68f70693e1ea7f2096aecf06307a17d29",
"dbe9efadf636bdd82f3ac2b3710653421e7cefca01b74012824b73f7368469fc4dd7e788b047920d4b3b7e4a486c732872ca11a75a89d1323337191ac2bc899e",
"dca1aa77f919ef1000d91291ba68800340332c299e3c4c6bcabb41fd2305f36db353211d6ac691c37d16889e3c3ffc1efb7c621e8040cb77b7249e264af44768",
"dee065b956b99b10db4763759d64c41791af1a7e77f1864f90a2b0847a12633dcf9bc108db7eaf73cc8d0e750f5c37383a56cd77cc2276d3960104c6bebe6346",
"df52eb2c24a6c35b977a1d0fab336ab5c21cd84f78f685d5f0bea9ebaa7c078c0ca69717455e29f17bcd9282a1af9cbbe2d3e608c62cecf868419da081e2d810",
"dfc8721858bd56b846473eb6123420a2735fc69cd77a92a1d2c623c51eab3ac664d61a890d305c6fe77ec48f2759248744e9d56689f6c22317bbaa316c848fbc",
"e249989b0c397ac03583594a3911c9e9222ccce620921170bb39b8ab6fdaf136b164f3c9fcd8b4f750fc469c9cd69f144c2ca2dd918fcb778148fbf9751a869b",
"e364856fe22a5c80cc8d13ee445473a0eb7204bad6972fc4c116ea1551b50da43a01577ef0487f2afb7aaee4b4155d61b1ff2b83dc502363929de76af0226818",
"e37d85b60af58cc03e9b36e09dee5e8308368f44f91b28455e7f645a13fe29902e7f7d594ccb600e02caf4202a05d15477d4ea5191c7b97038ea06d73ce93c33",
"e41b679ec1446821bf0a80fa7003fb90ac66b79d09c00dccf702a1b254f9ea85a68b0643ecd81d999413d5814b06b9998afd9876062067f51a63747533921d08",
"e61565e75d632748413d51997cabb00613355f0a94cf6b2f929fdfa351490d2afc9bad72c7fa67595d9d9c7adc9454e8d1b05527991a17258424b14ec4e9a1d5",
"e6a64aa839b95caeb74d810677a33b747e23907213719dd9706af7364b4cacf204b09f9b26686a70cd6d416a6b590f87103cc683685529968ea0edd75107f649",
"e751d2f83310990aedc7392b54f827afac1873e9f8861e625814a8d1d15776160864742d557796d07a612479b2886287b417273cc9f7718889216c2ec3b3b7ed",
"e7847a5814b865bc043600fee7d810b9815da389278fdfdd412114ab8f87b1536f4b63f3f7c3d3eeb097486abc152043eefdae6fd12c2f8743dac1cb668ab136",
"e82ba384934ac4780595261c43eeceb3df29a047087870f5da13c7acae782b4b97857b98852ce235428b4bc24aa4ddcdcd7297acf683421201eff1c3fbcab84e",
"e9da05b6d590dcf94addabd168c543be41a2275ddf44f6f44db1e3698f0bf7dd67f2e93b66679e0a0d42a2f39f3bc6a389f0e6b362431d0cb197fe46f9dd6606",
"ea8456e0667e1cce6273cb333b7e6982f9aa0f260c7c103e04eb0076a73fe3497070b1a8f0c45b097dc3100a30254095a1c63e9514367655e9a378344ed25d1d",
"eb415e110eaff48bdbc03b5ab719f64593f222b4a1d872b552e4fc48d338e532d1954f76e94813e44a6cd030425b4076cd7b9bf388b870a31344545d092dfa1a",
"eb5078bcb64f9595d6d8589ad60502b2870f16942fbb4cbd2483c817c7fa460faeda90b82bcf531ac96be8c1d6825953ab85ab0bd46ea477615e71e50386ffe0",
"ed5c9e654bfb28e9d4131b3805597ee9fa14fe72c6e2a6d503ec2e47faf396bbfa15ce49e6fe83bd97da1d441138545d388a329ae888c1f1ea44fc62996d787f",
"edba91511ccf8ab01de2e2cef34c47d8430f8a2f4c62cd66c42ecb62da52d396e909aef7da067eedc58e1eb58a1fc3697939371e6a36c931af5987a50509854b",
"ee216d2d13cba1a951445b061771ab0c97eb3c250003e16008debd85fa0317a508f923db79c796dc29de18c83baad5b15651f80db1cf7aee854e6da28853b742",
"eli",
"eva",
"f1812dbb566edaa2ac92121641e5ae504d647bec835a02ed5d7c7f90424d0e8fe202846a599c2f74c49ec9b86181d3d6c50ac0688baa9b4c28608d592becdfdb",
"f26a63e5171e2935e13015fbb755f04bff87fb1767ac91aa3481b9fe13b54cc75f772b41dfdc634829dd9b44c7b08798ed114046ef981d454889c41d4f6408d9",
"f2f359ea473c07070fd1e50d2fcfe3dc4f624f01678c35920b079660b2d5b9c1743259ae6129992cd3b99ec2cdda94a45e8710888488b196c6cd9c853e86e454",
"f35ce011f75fc01d153a94339aad24ae4fd5f181af55916a5ca0153cd5220ed199b98459eb88e9f4f3a4f8fbcf5c272bafdca35ddaca0827c4b480f79e7db1d6",
"f4df4a067fec667827901fb55acb16acc4650f24eeaa588af1a103e5009e9166f753c7cd313d0d3dec79abb82a13c43fd2059db5ac0307b78369ca318001c4e7",
"f56a47b89ebd2d22f869e2260b55f70d7ae0d499fc3fd4dbcb0e6e507f12513f29c004b9426e428696df0d434e4ad467f143bc620a2f661a54608de9e2c265d5",
"f61bdd3abb2d03f07e33bfb0b9fba46069468cefd9eda04e77cdc5c2f13a417716d3e60ca91c39de1a480b72112ef0e6143e927fad45410ee252cfce9034f0b1",
"f62196a11f50362b35eb1ed830b03c18bb187e4d07014a3d1b238756fe836f254afa923184170512a0c6d990032b4b1edb25dd2b74f6fc15f6ef6b51b6f82dd4",
"f8e4bf2dd4f93dd473b055ebf2dfa6081703014fddca40a0efb6bd5dcb702244a30a2d3edcd6597ea4118c20258da575a0bc69a895356519d8400a5ad3b2bf58",
"f980d152d5c14c6e7557f13fe26305ed0105dbb23177d455372e5529a5d3333e203070e87352d985a136f5ce3976a16b97070a4343fb4cb9d0760d9bcd5c7677",
"fa8641fb64db60e7299f070f6497678dee0bfdeefcc22a51ca328da34b33fdd6c31b882d97fc32cfcdeee4fcb72b05d7eae43b10b531db161b7e8dcfc2775ebf",
"fdde8cdd2fa5689aec75121e3c0778ca8c37238fd6a64706d85a4156d7735c482f1db74cefd023e94587b64a56d4a06e3b7fecf5c85978a4c777c9eaa5c633fd",
"jan",
"mar",
"ona",
"pau",
"pep",
"pol",
"teo"
]
}
}
},
"fa": {
"custom": {
"glow-tts": {
"id": "tts_models/fa/custom/glow-tts"
}
}
},
"bn": {
"custom": {
"vits-male": {
"id": "tts_models/bn/custom/vits-male"
},
"vits-female": {
"id": "tts_models/bn/custom/vits-female"
}
}
}
}

View File

@@ -0,0 +1,206 @@
import { saveTtsProviderSettings } from './index.js';
export { CosyVoiceProvider };
class CosyVoiceProvider {
//########//
// Config //
//########//
settings;
ready = false;
voices = [];
separator = '. ';
audioElement = document.createElement('audio');
/**
* Perform any text processing before passing to TTS engine.
* @param {string} text Input text
* @returns {string} Processed text
*/
processText(text) {
return text;
}
audioFormats = ['wav', 'ogg', 'silk', 'mp3', 'flac'];
languageLabels = {
'Auto': 'auto',
};
langKey2LangCode = {
'zh': 'zh-CN',
'en': 'en-US',
'ja': 'ja-JP',
'ko': 'ko-KR',
};
modelTypes = {
CosyVoice: 'CosyVoice',
};
defaultSettings = {
provider_endpoint: 'http://localhost:9880',
format: 'wav',
lang: 'auto',
streaming: false,
};
get settingsHtml() {
let html = `
<label for="tts_endpoint">Provider Endpoint:</label>
<input id="tts_endpoint" type="text" class="text_pole" maxlength="250" height="300" value="${this.defaultSettings.provider_endpoint}"/>
<span>Windows users Use <a target="_blank" href="https://github.com/v3ucn/CosyVoice_For_Windows">CosyVoice_For_Windows</a>(Unofficial).</span><br/>
<span>Macos Users Use <a target="_blank" href="https://github.com/v3ucn/CosyVoice_for_MacOs">CosyVoice_for_MacOs</a>(Unofficial).</span><br/>
<br/>
`;
return html;
}
onSettingsChange() {
// Used when provider settings are updated from UI
this.settings.provider_endpoint = $('#tts_endpoint').val();
saveTtsProviderSettings();
this.changeTTSSettings();
}
async loadSettings(settings) {
// Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
console.debug(`Ignoring non-user-configurable setting: ${key}`);
}
}
// Set initial values from the settings
$('#tts_endpoint').val(this.settings.provider_endpoint).on('change', this.onSettingsChange.bind(this));
await this.checkReady();
console.info('ITS: Settings loaded');
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
}
async onRefreshClick() {
return await this.checkReady();
}
//#################//
// TTS Interfaces //
//#################//
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
v => v.name == voiceName,
)[0];
console.log(match);
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
const response = await fetch(`${this.settings.provider_endpoint}/speakers`);
console.info(response);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
}
const responseJson = await response.json();
this.voices = responseJson;
return responseJson;
}
// Each time a parameter is changed, we change the configuration
async changeTTSSettings() {
}
/**
* Fetch TTS generation from the API.
* @param {string} inputText Text to generate TTS for
* @param {string} voiceId Voice ID to use (model_type&speaker_id))
* @returns {Promise<Response|string>} Fetch response
*/
async fetchTtsGeneration(inputText, voiceId, lang = null, forceNoStreaming = false) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
const streaming = this.settings.streaming;
const params = {
text: inputText,
speaker: voiceId,
};
if (streaming) {
params['streaming'] = 1;
}
const url = `${this.settings.provider_endpoint}/`;
const response = await fetch(
url,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(params), // Convert parameter objects to JSON strings
},
);
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
// Interface not used
async fetchTtsFromHistory(history_item_id) {
return Promise.resolve(history_item_id);
}
}

View File

@@ -0,0 +1,38 @@
.minimax_tts_settings>.tts_block {
gap: 5px;
margin: 5px 0;
}
.minimax-custom-item {
display: flex;
justify-content: space-between;
align-items: center;
padding: 8px;
background: #333;
margin: 5px 0;
border-radius: 4px;
}
.minimax-custom-item-info {
display: flex;
flex-direction: column;
}
.minimax-custom-item-name {
font-weight: bold;
}
.minimax-custom-item-details {
color: #aaa;
font-size: 0.9em;
}
.minimax-custom-item-remove {
padding: 4px 8px;
font-size: 12px;
}
.minimax-empty-list {
color: #888;
font-style: italic;
}

View File

@@ -0,0 +1,11 @@
#openai-character-instructions {
display: flex;
flex-direction: column;
gap: 10px;
}
#openai-character-instructions .character-instructions {
display: flex;
flex-direction: column;
gap: 5px;
}

View File

@@ -0,0 +1,270 @@
import { getRequestHeaders } from '../../../script.js';
import { getApiUrl } from '../../extensions.js';
import { doExtrasFetch, modules } from '../../extensions.js';
import { getPreviewString } from './index.js';
import { saveTtsProviderSettings } from './index.js';
export { EdgeTtsProvider };
const EDGE_TTS_PROVIDER = {
extras: 'extras',
plugin: 'plugin',
};
class EdgeTtsProvider {
//########//
// Config //
//########//
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
defaultSettings = {
voiceMap: {},
rate: 0,
provider: EDGE_TTS_PROVIDER.extras,
};
get settingsHtml() {
let html = `Microsoft Edge TTS<br>
<label for="edge_tts_provider">Provider</label>
<select id="edge_tts_provider">
<option value="${EDGE_TTS_PROVIDER.extras}">Extras</option>
<option value="${EDGE_TTS_PROVIDER.plugin}">Plugin</option>
</select>
<label for="edge_tts_rate">Rate: <span id="edge_tts_rate_output"></span></label>
<input id="edge_tts_rate" type="range" value="${this.defaultSettings.rate}" min="-100" max="100" step="1" />
`;
return html;
}
onSettingsChange() {
this.settings.rate = Number($('#edge_tts_rate').val());
$('#edge_tts_rate_output').text(this.settings.rate);
this.settings.provider = String($('#edge_tts_provider').val());
saveTtsProviderSettings();
}
async loadSettings(settings) {
// Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
$('#edge_tts_rate').val(this.settings.rate || 0);
$('#edge_tts_rate_output').text(this.settings.rate || 0);
$('#edge_tts_rate').on('input', () => { this.onSettingsChange(); });
$('#edge_tts_provider').val(this.settings.provider || EDGE_TTS_PROVIDER.extras);
$('#edge_tts_provider').on('change', () => { this.onSettingsChange(); });
await this.checkReady();
console.debug('EdgeTTS: Settings loaded');
}
/**
* Perform a simple readiness check by trying to fetch voiceIds
*/
async checkReady() {
await this.throwIfModuleMissing();
await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
return;
}
//#################//
// TTS Interfaces //
//#################//
/**
* Get a voice from the TTS provider.
* @param {string} voiceName Voice name to get
* @returns {Promise<Object>} Voice object
*/
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
voice => voice.name == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
/**
* Generate TTS for a given text.
* @param {string} text Text to generate TTS for
* @param {string} voiceId Voice ID to use
* @returns {Promise<Response>} Fetch response
*/
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
await this.throwIfModuleMissing();
const url = this.getVoicesUrl();
const response = await this.doFetch(url);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
let responseJson = await response.json();
responseJson = responseJson
.sort((a, b) => a.Locale.localeCompare(b.Locale) || a.ShortName.localeCompare(b.ShortName))
.map(x => ({ name: x.ShortName, voice_id: x.ShortName, preview_url: false, lang: x.Locale }));
return responseJson;
}
/**
* Preview TTS for a given voice ID.
* @param {string} id Voice ID
*/
async previewTtsVoice(id) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const voice = await this.getVoice(id);
const text = getPreviewString(voice.lang);
const response = await this.fetchTtsGeneration(text, id);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
const audio = await response.blob();
const url = URL.createObjectURL(audio);
this.audioElement.src = url;
this.audioElement.play();
this.audioElement.onended = () => URL.revokeObjectURL(url);
}
/**
* Fetch TTS generation from the API.
* @param {string} inputText Text to generate TTS for
* @param {string} voiceId Voice ID to use
* @returns {Promise<Response>} Fetch response
*/
async fetchTtsGeneration(inputText, voiceId) {
await this.throwIfModuleMissing();
console.info(`Generating new TTS for voice_id ${voiceId}`);
const url = this.getGenerateUrl();
const response = await this.doFetch(url,
{
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
'text': inputText,
'voice': voiceId,
'rate': Number(this.settings.rate),
}),
},
);
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
/**
* Perform a fetch request using the configured provider.
* @param {string} url URL string
* @param {any} options Request options
* @returns {Promise<Response>} Fetch response
*/
doFetch(url, options) {
if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
return doExtrasFetch(url, options);
}
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
return fetch(url, options);
}
throw new Error('Invalid TTS Provider');
}
/**
* Get the URL for the TTS generation endpoint.
* @returns {string} URL string
*/
getGenerateUrl() {
if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
const url = new URL(getApiUrl());
url.pathname = '/api/edge-tts/generate';
return url.toString();
}
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
return '/api/plugins/edge-tts/generate';
}
throw new Error('Invalid TTS Provider');
}
/**
* Get the URL for the TTS voices endpoint.
* @returns {string} URL object or string
*/
getVoicesUrl() {
if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
const url = new URL(getApiUrl());
url.pathname = '/api/edge-tts/list';
return url.toString();
}
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
return '/api/plugins/edge-tts/list';
}
throw new Error('Invalid TTS Provider');
}
async throwIfModuleMissing() {
if (this.settings.provider === EDGE_TTS_PROVIDER.extras && !modules.includes('edge-tts')) {
const message = 'Edge TTS module not loaded. Add edge-tts to enable-modules and restart the Extras API.';
// toastr.error(message)
throw new Error(message);
}
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin && !this.isPluginAvailable()) {
const message = 'Edge TTS Server plugin not loaded. Install it from https://github.com/SillyTavern/SillyTavern-EdgeTTS-Plugin and restart the SillyTavern server.';
// toastr.error(message)
throw new Error(message);
}
}
async isPluginAvailable() {
try {
const result = await fetch('/api/plugins/edge-tts/probe', {
method: 'POST',
headers: getRequestHeaders({ omitContentType: true }),
});
return result.ok;
} catch (e) {
return false;
}
}
}

View File

@@ -0,0 +1,455 @@
import { event_types, eventSource, getRequestHeaders } from '../../../script.js';
import { SECRET_KEYS, secret_state } from '../../secrets.js';
import { getPreviewString, saveTtsProviderSettings, initVoiceMap } from './index.js';
export { ElectronHubTtsProvider };
class ElectronHubTtsProvider {
settings;
voices = [];
models = [];
separator = ' . ';
audioElement = document.createElement('audio');
defaultSettings = {
voiceMap: {},
model: 'tts-1',
speed: 1,
temperature: 1,
top_p: 1,
// GPT-4o Mini TTS
instructions: '',
// Dia
speaker_transcript: '',
cfg_filter_top_k: 25,
cfg_scale: 3,
// Microsoft TTS
speech_rate: 0,
pitch_adjustment: 0,
emotional_style: '',
};
get settingsHtml() {
let html = `
<div>Electron Hub unified TTS API.</div>
<div class="flex-container alignItemsCenter">
<div class="flex1"></div>
<div id="electronhub_tts_key" class="menu_button menu_button_icon manage-api-keys" data-key="api_key_electronhub">
<i class="fa-solid fa-key"></i>
<span>API Key</span>
</div>
</div>
<div class="flex-container flexGap10 wrap">
<div class="flex1">
<label for="electronhub_tts_model">Model</label>
<select id="electronhub_tts_model" class="text_pole"></select>
</div>
<div>
<label for="electronhub_tts_speed">Speed <span id="electronhub_tts_speed_output"></span></label>
<input type="range" id="electronhub_tts_speed" value="1" min="0.25" max="4" step="0.05">
</div>
<div>
<label for="electronhub_tts_temperature">Temperature</label>
<input id="electronhub_tts_temperature" class="text_pole" type="number" min="0" max="2" step="0.1" value="1" />
</div>
<div id="electronhub_block_top_p" style="display:none;">
<label for="electronhub_tts_top_p">Top-p</label>
<input id="electronhub_tts_top_p" class="text_pole" type="number" min="0" max="1" step="0.01" value="1" />
</div>
</div>
<div id="electronhub_block_instructions" style="display:none;">
<label for="electronhub_tts_instructions">Instructions (GPT-4o Mini TTS):</label>
<textarea id="electronhub_tts_instructions" class="textarea_compact autoSetHeight" placeholder="e.g., 'Speak cheerfully and energetically'"></textarea>
</div>
<div id="electronhub_block_dia" style="display:none;">
<label for="electronhub_tts_speaker_transcript">Speaker transcript (Dia):</label>
<textarea id="electronhub_tts_speaker_transcript" class="textarea_compact autoSetHeight" maxlength="1000"></textarea>
<label for="electronhub_tts_cfg_scale">CFG scale (1-5):</label>
<input id="electronhub_tts_cfg_scale" type="number" min="1" max="5" step="1" />
<label for="electronhub_tts_cfg_topk">CFG filter top_k (15-50):</label>
<input id="electronhub_tts_cfg_topk" type="number" min="15" max="50" step="1" />
</div>
<div id="electronhub_block_msft" style="display:none;">
<div class="flex-container flexGap10 wrap">
<div>
<label for="electronhub_tts_speech_rate">Speech rate (-100..100)</label>
<input id="electronhub_tts_speech_rate" class="text_pole" type="number" min="-100" max="100" step="1" style="width:120px;" />
</div>
<div>
<label for="electronhub_tts_pitch_adjustment">Pitch adjustment (-100..100)</label>
<input id="electronhub_tts_pitch_adjustment" class="text_pole" type="number" min="-100" max="100" step="1" style="width:120px;" />
</div>
</div>
<div class="flex-container flexGap10">
<div class="flex1">
<label for="electronhub_tts_emotional_style">Emotional style</label>
<input id="electronhub_tts_emotional_style" class="text_pole" type="text" placeholder="cheerful, sad, angry, gentle..." />
</div>
</div>
</div>
<div id="electronhub_dynamic_params" class="flex-container flexGap10 wrap" style="display:none;"></div>`;
return html;
}
constructor() {
this.handler = async function (/** @type {string} */ key) {
if (key !== SECRET_KEYS.ELECTRONHUB) return;
$('#electronhub_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.ELECTRONHUB]);
await this.onRefreshClick();
}.bind(this);
}
dispose() {
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.removeListener(event, this.handler);
});
}
async loadSettings(settings) {
if (Object.keys(settings).length == 0) {
console.info('Using default Electron Hub TTS settings');
}
this.settings = { ...this.defaultSettings, ...settings };
await this.loadModels();
this.populateModelSelect();
$('#electronhub_tts_model').val(this.settings.model);
$('#electronhub_tts_model').on('change', () => { this.onSettingsChange(); });
$('#electronhub_tts_speed').val(this.settings.speed);
$('#electronhub_tts_speed_output').text(this.settings.speed);
$('#electronhub_tts_speed').on('input', () => { this.onSettingsChange(); });
$('#electronhub_tts_temperature').val(this.settings.temperature);
$('#electronhub_tts_temperature').on('input', () => { this.onSettingsChange(); });
$('#electronhub_tts_top_p').val(this.settings.top_p);
$('#electronhub_tts_top_p').on('input', () => { this.onSettingsChange(); });
$('#electronhub_tts_instructions').val(this.settings.instructions);
$('#electronhub_tts_instructions').on('input', () => { this.onSettingsChange(); });
$('#electronhub_tts_speaker_transcript').val(this.settings.speaker_transcript);
$('#electronhub_tts_speaker_transcript').on('input', () => { this.onSettingsChange(); });
$('#electronhub_tts_cfg_scale').val(this.settings.cfg_scale);
$('#electronhub_tts_cfg_scale').on('input', () => { this.onSettingsChange(); });
$('#electronhub_tts_cfg_topk').val(this.settings.cfg_filter_top_k);
$('#electronhub_tts_cfg_topk').on('input', () => { this.onSettingsChange(); });
$('#electronhub_tts_speech_rate').val(this.settings.speech_rate);
$('#electronhub_tts_speech_rate').on('input', () => { this.onSettingsChange(); });
$('#electronhub_tts_pitch_adjustment').val(this.settings.pitch_adjustment);
$('#electronhub_tts_pitch_adjustment').on('input', () => { this.onSettingsChange(); });
$('#electronhub_tts_emotional_style').val(this.settings.emotional_style);
$('#electronhub_tts_emotional_style').on('input', () => { this.onSettingsChange(); });
$('#electronhub_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.ELECTRONHUB]);
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.on(event, this.handler);
});
await this.checkReady();
this.updateConditionalBlocks();
this.renderDynamicParams();
console.debug('Electron Hub TTS: Settings loaded');
}
async onSettingsChange() {
const previousModel = this.settings.model;
this.settings.model = String($('#electronhub_tts_model').find(':selected').val() || this.settings.model);
this.settings.speed = Number($('#electronhub_tts_speed').val());
$('#electronhub_tts_speed_output').text(this.settings.speed);
this.settings.temperature = Number($('#electronhub_tts_temperature').val());
this.settings.top_p = Number($('#electronhub_tts_top_p').val());
this.settings.instructions = String($('#electronhub_tts_instructions').val() || '');
this.settings.speaker_transcript = String($('#electronhub_tts_speaker_transcript').val() || '');
this.settings.cfg_scale = Number($('#electronhub_tts_cfg_scale').val());
this.settings.cfg_filter_top_k = Number($('#electronhub_tts_cfg_topk').val());
this.settings.speech_rate = Number($('#electronhub_tts_speech_rate').val());
this.settings.pitch_adjustment = Number($('#electronhub_tts_pitch_adjustment').val());
this.settings.emotional_style = String($('#electronhub_tts_emotional_style').val() || '');
this.updateConditionalBlocks();
this.renderDynamicParams();
saveTtsProviderSettings();
if (previousModel !== this.settings.model) {
this.voices = await this.fetchTtsVoiceObjects();
await initVoiceMap();
}
}
async loadModels() {
try {
const response = await fetch('/api/openai/electronhub/models', {
method: 'POST',
headers: getRequestHeaders({ omitContentType: true }),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
/** @type {Array<any>} */
const data = await response.json();
const allModels = Array.isArray(data) ? data : [];
const ttsModels = allModels.filter(m => {
const eps = Array.isArray(m?.endpoints) ? m.endpoints : [];
return eps.some(ep => {
if (typeof ep !== 'string') return false;
return ep === '/v1/audio/speech' || ep.endsWith('/audio/speech') || ep === 'audio/speech';
});
});
this.models = ttsModels;
if (this.models.length > 0 && !this.models.find(m => m.id === this.settings.model)) {
this.settings.model = this.models[0].id;
saveTtsProviderSettings();
}
} catch (err) {
console.warn('Electron Hub models fetch failed', err);
this.models = [];
}
}
populateModelSelect() {
const select = $('#electronhub_tts_model');
select.empty();
const groups = this.groupByVendor(this.models);
for (const [vendor, models] of groups.entries()) {
const optgroup = document.createElement('optgroup');
optgroup.label = vendor;
for (const m of models) {
const opt = document.createElement('option');
opt.value = m.id;
opt.text = m.name || m.id;
optgroup.appendChild(opt);
}
select.append(optgroup);
}
if (this.models.find(x => x.id === this.settings.model)) {
select.val(this.settings.model);
}
}
/**
* Group models by vendor prefix from name before ':'
* @param {Array<any>} array
* @returns {Map<string, any[]>}
*/
groupByVendor(array) {
return array.reduce((acc, curr) => {
const name = String(curr?.name || curr?.id || 'Other');
const vendor = name.split(':')[0].trim() || 'Other';
if (!acc.has(vendor)) acc.set(vendor, []);
acc.get(vendor).push(curr);
return acc;
}, new Map());
}
updateConditionalBlocks() {
const modelId = this.settings.model;
const model = this.models.find(m => m.id === modelId);
const params = model?.parameters || {};
const vendorName = String(model?.name || '').split(':')[0].trim().toLowerCase();
const hasInstructions = 'instructions' in params || modelId === 'gpt-4o-mini-tts';
const hasDia = 'speaker_transcript' in params || 'cfg_scale' in params || 'cfg_filter_top_k' in params || modelId.includes('dia');
const hasMsft = 'speech_rate' in params || 'pitch_adjustment' in params || 'emotional_style' in params || vendorName === 'microsoft' || modelId === 'microsoft-tts';
const hasTopP = 'top_p' in params;
$('#electronhub_block_instructions').toggle(!!hasInstructions);
$('#electronhub_block_dia').toggle(!!hasDia);
$('#electronhub_block_msft').toggle(!!hasMsft);
$('#electronhub_block_top_p').toggle(!!hasTopP);
}
/**
* Build UI for additional model parameters dynamically
*/
renderDynamicParams() {
const container = $('#electronhub_dynamic_params');
container.empty();
const model = this.models.find(m => m.id === this.settings.model);
const params = model?.parameters || {};
const modelHasVoices = Array.isArray(model?.voices) && model.voices.length > 0;
const exclude = new Set(['input', 'response_format', 'model', 'speed', 'temperature', 'top_p', 'instructions', 'speaker_transcript', 'cfg_scale', 'cfg_filter_top_k', 'speech_rate', 'pitch_adjustment', 'emotional_style']);
if (modelHasVoices) exclude.add('voice');
const entries = Object.entries(params).filter(([k]) => !exclude.has(k));
container.toggle(entries.length > 0);
if (entries.length === 0) return;
for (const [key, spec] of entries) {
const nice = key.replace(/_/g, ' ').replace(/\b\w/g, c => c.toUpperCase());
const type = String(spec?.type || 'string');
const id = `electronhub_dyn_${key.replace(/[^a-zA-Z0-9_-]/g, '_')}`;
if (Array.isArray(spec?.enum) && spec.enum.length) {
const select = $(`<div><label for="${id}">${nice}</label><select id="${id}" class="text_pole"></select></div>`);
container.append(select);
const el = select.find('select');
for (const opt of spec.enum) el.append(new Option(String(opt), String(opt)));
const val = this.settings[key] ?? spec.default ?? spec.enum[0];
el.val(String(val));
el.on('change', () => { this.settings[key] = String(el.val() || ''); saveTtsProviderSettings(); });
continue;
}
if (type === 'boolean') {
const block = $(`<label class="checkbox_label" for="${id}"><input type="checkbox" id="${id}"> <small>${nice}</small></label>`);
container.append(block);
const el = block.find('input');
el.prop('checked', !!(this.settings[key] ?? spec.default ?? false));
el.on('change', () => { this.settings[key] = !!el.is(':checked'); saveTtsProviderSettings(); });
continue;
}
if (type === 'number' || type === 'integer') {
const min = spec.minimum ?? undefined;
const max = spec.maximum ?? undefined;
const step = type === 'integer' ? 1 : (spec.step ?? 0.01);
const block = $(`<div><label for="${id}">${nice}${(min != null || max != null) ? ` (${min ?? ''}..${max ?? ''})` : ''}:</label><input id="${id}" type="number" class="text_pole" ${min != null ? `min="${min}"` : ''} ${max != null ? `max="${max}"` : ''} step="${step}"></div>`);
container.append(block);
const el = block.find('input');
const val = this.settings[key] ?? spec.default ?? '';
if (val !== '') el.val(val);
el.on('input', () => {
const raw = el.val();
this.settings[key] = (raw === '') ? '' : Number(raw);
saveTtsProviderSettings();
});
continue;
}
const isLong = /instructions|transcript|style|prompt|description/i.test(key);
if (isLong) {
const block = $(`<div><label for="${id}">${nice}</label><textarea id="${id}" class="textarea_compact autoSetHeight"></textarea></div>`);
container.append(block);
const el = block.find('textarea');
el.val(String(this.settings[key] ?? spec.default ?? ''));
el.on('input', () => { this.settings[key] = String(el.val() || ''); saveTtsProviderSettings(); });
} else {
const block = $(`<div><label for="${id}">${nice}</label><input id="${id}" type="text" class="text_pole" /></div>`);
container.append(block);
const el = block.find('input');
el.val(String(this.settings[key] ?? spec.default ?? ''));
el.on('input', () => { this.settings[key] = String(el.val() || ''); saveTtsProviderSettings(); });
}
}
}
async checkReady() {
this.voices = await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
await this.loadModels();
this.populateModelSelect();
this.voices = await this.fetchTtsVoiceObjects();
this.updateConditionalBlocks();
this.renderDynamicParams();
saveTtsProviderSettings();
}
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(v => v.name == voiceName)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
async fetchTtsVoiceObjects() {
const modelId = this.settings.model;
const model = this.models.find(m => m.id === modelId);
if (model && Array.isArray(model.voices) && model.voices.length) {
return model.voices.map(name => ({ name, voice_id: name, lang: 'en-US' }));
}
// Fallback to common OpenAI voices
const fallback = ['alloy', 'ash', 'ballad', 'coral', 'echo', 'fable', 'onyx', 'nova', 'sage', 'shimmer', 'verse'];
return fallback.map(name => ({ name, voice_id: name, lang: 'en-US' }));
}
async previewTtsVoice(voiceId) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const text = getPreviewString('en-US');
const response = await this.fetchTtsGeneration(text, voiceId);
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const audio = await response.blob();
const url = URL.createObjectURL(audio);
this.audioElement.src = url;
this.audioElement.play();
this.audioElement.onended = () => URL.revokeObjectURL(url);
}
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating Electron Hub TTS for voice_id ${voiceId}`);
const body = {
input: inputText,
voice: voiceId,
speed: this.settings.speed,
temperature: this.settings.temperature,
model: this.settings.model,
};
const model = (this.settings.model || '').toLowerCase();
if (model === 'gpt-4o-mini-tts') {
if (this.settings.instructions?.trim()) body.instructions = this.settings.instructions.trim();
}
if (model.includes('dia')) {
if (this.settings.speaker_transcript?.trim()) body.speaker_transcript = this.settings.speaker_transcript.trim();
if (Number.isFinite(this.settings.cfg_scale)) body.cfg_scale = Number(this.settings.cfg_scale);
if (Number.isFinite(this.settings.cfg_filter_top_k)) body.cfg_filter_top_k = Number(this.settings.cfg_filter_top_k);
}
if (model.includes('microsoft-tts')) {
if (Number.isFinite(this.settings.speech_rate)) body.speech_rate = Number(this.settings.speech_rate);
if (Number.isFinite(this.settings.pitch_adjustment)) body.pitch_adjustment = Number(this.settings.pitch_adjustment);
if ((this.settings.emotional_style || '').trim()) body.emotional_style = String(this.settings.emotional_style).trim();
}
if (Number.isFinite(this.settings.top_p)) {
body.top_p = Number(this.settings.top_p);
}
// add dynamic params based on schema
const modelObj = this.models.find(m => m.id === this.settings.model);
const params = modelObj?.parameters || {};
const modelHasVoices = Array.isArray(modelObj?.voices) && modelObj.voices.length > 0;
const exclude = new Set(['input', 'response_format', 'model', 'speed', 'temperature', 'top_p', 'instructions', 'speaker_transcript', 'cfg_scale', 'cfg_filter_top_k', 'speech_rate', 'pitch_adjustment', 'emotional_style']);
if (modelHasVoices) exclude.add('voice');
for (const key of Object.keys(params)) {
if (exclude.has(key)) continue;
const val = this.settings[key];
if (val === undefined || val === '') continue;
body[key] = val;
}
const response = await fetch('/api/openai/electronhub/generate-voice', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify(body),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
}

View File

@@ -0,0 +1,436 @@
import { saveTtsProviderSettings } from './index.js';
import { event_types, eventSource, getRequestHeaders } from '/script.js';
import { SECRET_KEYS, secret_state, writeSecret } from '/scripts/secrets.js';
import { getBase64Async } from '/scripts/utils.js';
export { ElevenLabsTtsProvider };
class ElevenLabsTtsProvider {
settings;
voices = [];
separator = ' ... ... ... ';
defaultSettings = {
stability: 0.75,
similarity_boost: 0.75,
style_exaggeration: 0.00,
speaker_boost: true,
speed: 1.0,
model: 'eleven_turbo_v2_5',
voiceMap: {},
};
get settingsHtml() {
let html = `
<div class="elevenlabs_tts_settings">
<div class="flex-container alignItemsBaseline">
<h4 for="elevenlabs_tts_key" class="flex1 margin0">
<a href="https://elevenlabs.io/app/developers/api-keys" target="_blank">ElevenLabs TTS Key</a>
</h4>
<div id="elevenlabs_tts_key" class="menu_button menu_button_icon manage-api-keys" data-key="api_key_elevenlabs">
<i class="fa-solid fa-key"></i>
<span>Click to set</span>
</div>
</div>
<label for="elevenlabs_tts_model">Model</label>
<select id="elevenlabs_tts_model" class="text_pole">
<option value="eleven_v3">Eleven v3</option>
<option value="eleven_ttv_v3">Eleven ttv v3</option>
<option value="eleven_multilingual_v2">Multilingual v2</option>
<option value="eleven_flash_v2_5">Eleven Flash v2.5</option>
<option value="eleven_turbo_v2_5">Turbo v2.5</option>
<option value="eleven_multilingual_ttv_v2">Multilingual ttv v2</option>
<option value="eleven_monolingual_v1">English v1 (Old)</option>
<option value="eleven_multilingual_v1">Multilingual v1 (Old)</option>
<option value="eleven_turbo_v2">Turbo v2 (Old)</option>
</select>
<label for="elevenlabs_tts_stability">Stability: <span id="elevenlabs_tts_stability_output"></span></label>
<input id="elevenlabs_tts_stability" type="range" value="${this.defaultSettings.stability}" min="0" max="1" step="0.01" />
<label for="elevenlabs_tts_similarity_boost">Similarity Boost: <span id="elevenlabs_tts_similarity_boost_output"></span></label>
<input id="elevenlabs_tts_similarity_boost" type="range" value="${this.defaultSettings.similarity_boost}" min="0" max="1" step="0.01" />
<label for="elevenlabs_tts_speed">Speed: <span id="elevenlabs_tts_speed_output"></span></label>
<input id="elevenlabs_tts_speed" type="range" value="${this.defaultSettings.speed}" min="0.7" max="1.2" step="0.01" />
<div id="elevenlabs_tts_v2_options" style="display: none;">
<label for="elevenlabs_tts_style_exaggeration">Style Exaggeration: <span id="elevenlabs_tts_style_exaggeration_output"></span></label>
<input id="elevenlabs_tts_style_exaggeration" type="range" value="${this.defaultSettings.style_exaggeration}" min="0" max="1" step="0.01" />
<label for="elevenlabs_tts_speaker_boost">Speaker Boost:</label>
<input id="elevenlabs_tts_speaker_boost" style="display: inline-grid" type="checkbox" />
</div>
<hr>
<div id="elevenlabs_tts_voice_cloning">
<span>Instant Voice Cloning</span><br>
<input id="elevenlabs_tts_voice_cloning_name" type="text" class="text_pole" placeholder="Voice Name"/>
<input id="elevenlabs_tts_voice_cloning_description" type="text" class="text_pole" placeholder="Voice Description"/>
<input id="elevenlabs_tts_voice_cloning_labels" type="text" class="text_pole" placeholder="Labels"/>
<div class="menu_button menu_button_icon" id="upload_audio_file">
<i class="fa-solid fa-file-import"></i>
<span>Upload Audio Files</span>
</div>
<input id="elevenlabs_tts_audio_files" type="file" name="audio_files" accept="audio/*" style="display: none;" multiple>
<div id="elevenlabs_tts_selected_files_list"></div>
<input id="elevenlabs_tts_clone_voice_button" class="menu_button menu_button_icon" type="button" value="Clone Voice">
</div>
<hr>
</div>
`;
return html;
}
constructor() {
this.handler = async function (/** @type {string} */ key) {
if (key !== SECRET_KEYS.ELEVENLABS) return;
$('#elevenlabs_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.ELEVENLABS]);
await this.fetchTtsVoiceObjects();
}.bind(this);
}
dispose() {
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.removeListener(event, this.handler);
});
}
shouldInvolveExtendedSettings() {
// Models that support extended settings (style_exaggeration, speaker_boost)
const modelsWithExtendedSettings = [
'eleven_v3',
'eleven_ttv_v3',
'eleven_multilingual_v2',
'eleven_multilingual_ttv_v2',
];
return modelsWithExtendedSettings.includes(this.settings.model);
}
onSettingsChange() {
// Update dynamically
this.settings.stability = $('#elevenlabs_tts_stability').val();
this.settings.similarity_boost = $('#elevenlabs_tts_similarity_boost').val();
this.settings.style_exaggeration = $('#elevenlabs_tts_style_exaggeration').val();
this.settings.speaker_boost = $('#elevenlabs_tts_speaker_boost').is(':checked');
this.settings.speed = $('#elevenlabs_tts_speed').val();
this.settings.model = $('#elevenlabs_tts_model').find(':selected').val();
$('#elevenlabs_tts_stability_output').text(Math.round(this.settings.stability * 100) + '%');
$('#elevenlabs_tts_similarity_boost_output').text(Math.round(this.settings.similarity_boost * 100) + '%');
$('#elevenlabs_tts_style_exaggeration_output').text(Math.round(this.settings.style_exaggeration * 100) + '%');
$('#elevenlabs_tts_speed_output').text(this.settings.speed + 'x');
$('#elevenlabs_tts_v2_options').toggle(this.shouldInvolveExtendedSettings());
saveTtsProviderSettings();
}
async loadSettings(settings) {
// Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
// Migrate old settings
if (settings['multilingual'] !== undefined) {
settings.model = settings.multilingual ? 'eleven_multilingual_v1' : 'eleven_monolingual_v1';
delete settings['multilingual'];
}
if (Object.hasOwn(settings, 'apiKey')) {
if (settings.apiKey && !secret_state[SECRET_KEYS.ELEVENLABS]){
await writeSecret(SECRET_KEYS.ELEVENLABS, settings.apiKey);
}
delete settings['apiKey'];
}
$('#elevenlabs_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.ELEVENLABS]);
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.on(event, this.handler);
});
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
$('#elevenlabs_tts_stability').val(this.settings.stability);
$('#elevenlabs_tts_similarity_boost').val(this.settings.similarity_boost);
$('#elevenlabs_tts_style_exaggeration').val(this.settings.style_exaggeration);
$('#elevenlabs_tts_speaker_boost').prop('checked', this.settings.speaker_boost);
$('#elevenlabs_tts_speed').val(this.settings.speed);
$('#elevenlabs_tts_model').val(this.settings.model);
$('#elevenlabs_tts_similarity_boost').on('input', this.onSettingsChange.bind(this));
$('#elevenlabs_tts_stability').on('input', this.onSettingsChange.bind(this));
$('#elevenlabs_tts_style_exaggeration').on('input', this.onSettingsChange.bind(this));
$('#elevenlabs_tts_speaker_boost').on('change', this.onSettingsChange.bind(this));
$('#elevenlabs_tts_speed').on('input', this.onSettingsChange.bind(this));
$('#elevenlabs_tts_model').on('change', this.onSettingsChange.bind(this));
$('#elevenlabs_tts_stability_output').text(Math.round(this.settings.stability * 100) + '%');
$('#elevenlabs_tts_similarity_boost_output').text(Math.round(this.settings.similarity_boost * 100) + '%');
$('#elevenlabs_tts_style_exaggeration_output').text(Math.round(this.settings.style_exaggeration * 100) + '%');
$('#elevenlabs_tts_speed_output').text(this.settings.speed + 'x');
$('#elevenlabs_tts_v2_options').toggle(this.shouldInvolveExtendedSettings());
try {
await this.checkReady();
console.debug('ElevenLabs: Settings loaded');
} catch {
console.debug('ElevenLabs: Settings loaded, but not ready');
}
this.setupVoiceCloningMenu();
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
await this.fetchTtsVoiceObjects();
}
setupVoiceCloningMenu() {
const audioFilesInput = /** @type {HTMLInputElement} */ (document.getElementById('elevenlabs_tts_audio_files'));
const selectedFilesListElement = document.getElementById('elevenlabs_tts_selected_files_list');
const cloneVoiceButton = document.getElementById('elevenlabs_tts_clone_voice_button');
const uploadAudioFileButton = document.getElementById('upload_audio_file');
const voiceCloningNameInput = /** @type {HTMLInputElement} */ (document.getElementById('elevenlabs_tts_voice_cloning_name'));
const voiceCloningDescriptionInput = /** @type {HTMLInputElement} */ (document.getElementById('elevenlabs_tts_voice_cloning_description'));
const voiceCloningLabelsInput = /** @type {HTMLInputElement} */ (document.getElementById('elevenlabs_tts_voice_cloning_labels'));
const updateCloneVoiceButtonVisibility = () => {
cloneVoiceButton.style.display = audioFilesInput.files.length > 0 ? 'inline-block' : 'none';
};
const clearSelectedFiles = () => {
audioFilesInput.value = '';
selectedFilesListElement.innerHTML = '';
updateCloneVoiceButtonVisibility();
};
uploadAudioFileButton.addEventListener('click', () => {
audioFilesInput.click();
});
audioFilesInput.addEventListener('change', () => {
selectedFilesListElement.innerHTML = '';
for (const file of audioFilesInput.files) {
const listItem = document.createElement('div');
listItem.textContent = file.name;
selectedFilesListElement.appendChild(listItem);
}
updateCloneVoiceButtonVisibility();
});
cloneVoiceButton.addEventListener('click', async () => {
const voiceName = voiceCloningNameInput.value.trim();
const voiceDescription = voiceCloningDescriptionInput.value.trim();
const voiceLabels = voiceCloningLabelsInput.value.trim();
if (!voiceName) {
toastr.error('Please provide a name for the cloned voice.');
return;
}
try {
await this.addVoice(voiceName, voiceDescription, voiceLabels);
toastr.success('Voice cloned successfully. Hit reload to see the new voice in the voice listing.');
clearSelectedFiles();
voiceCloningNameInput.value = '';
voiceCloningDescriptionInput.value = '';
voiceCloningLabelsInput.value = '';
} catch (error) {
toastr.error(`Failed to clone voice: ${error.message}`);
}
});
updateCloneVoiceButtonVisibility();
}
/**
* Get voice object by name
* @param {string} voiceName Voice name to look up
* @returns {Promise<Object>} Voice object
*/
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
elevenVoice => elevenVoice.name == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found in ElevenLabs account`;
}
return match;
}
/**
* Generate TTS audio
* @param {string} text Text to synthesize
* @param {string} voiceId Voice ID to use for synthesis
* @returns {Promise<Response>} Response object containing audio data
*/
async generateTts(text, voiceId) {
const historyId = await this.findTtsGenerationInHistory(text, voiceId);
if (historyId) {
console.debug(`Found existing TTS generation with id ${historyId}`);
return await this.fetchTtsFromHistory(historyId);
} else {
console.debug('No existing TTS generation found, requesting new generation');
return await this.fetchTtsGeneration(text, voiceId);
}
}
/**
* Find existing TTS generation in history
* @param {string} message Message text used for TTS generation
* @param {string} voiceId Voice ID used for TTS generation
* @returns {Promise<string>} History item ID if found, empty string otherwise
*/
async findTtsGenerationInHistory(message, voiceId) {
const ttsHistory = await this.fetchTtsHistory();
for (const history of ttsHistory) {
const text = history.text;
const itemId = history.history_item_id;
if (message === text && history.voice_id == voiceId) {
console.info(`Existing TTS history item ${itemId} found: ${text} `);
return itemId;
}
}
return '';
}
async fetchTtsVoiceObjects() {
const response = await fetch('/api/speech/elevenlabs/voices', {
method: 'POST',
headers: getRequestHeaders({ omitContentType: true }),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}. See server console for details.`);
}
const responseJson = await response.json();
return responseJson.voices;
}
async fetchTtsVoiceSettings() {
const response = await fetch('/api/speech/elevenlabs/voice-settings', {
method: 'POST',
headers: getRequestHeaders({ omitContentType: true }),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}. See server console for details.`);
}
return response.json();
}
/**
* Fetch new TTS generation from ElevenLabs API
* @param {string} text Text to synthesize
* @param {string} voiceId Voice ID to use for synthesis
* @returns {Promise<Response>} Response object containing audio data
*/
async fetchTtsGeneration(text, voiceId) {
let model = this.settings.model ?? 'eleven_monolingual_v1';
console.info(`Generating new TTS for voice_id ${voiceId}, model ${model}`);
const request = {
model_id: model,
text: text,
voice_settings: {
stability: Number(this.settings.stability),
similarity_boost: Number(this.settings.similarity_boost),
speed: Number(this.settings.speed),
},
};
if (this.shouldInvolveExtendedSettings()) {
request.voice_settings.style = Number(this.settings.style_exaggeration);
request.voice_settings.use_speaker_boost = Boolean(this.settings.speaker_boost);
}
const response = await fetch('/api/speech/elevenlabs/synthesize', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
voiceId: voiceId,
request: request,
}),
});
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}. See server console for details.`);
}
return response;
}
/**
* Fetch existing TTS audio from history
* @param {string} historyItemId History item ID to fetch audio for
* @returns {Promise<Response>} Response object containing audio data
*/
async fetchTtsFromHistory(historyItemId) {
console.info(`Fetched existing TTS with history_item_id ${historyItemId}`);
const response = await fetch('/api/speech/elevenlabs/history-audio', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
historyItemId: historyItemId,
}),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}. See server console for details.`);
}
return response;
}
/**
* Fetch TTS generation history
* @returns {Promise<Array>} Array of TTS history items
*/
async fetchTtsHistory() {
const response = await fetch('/api/speech/elevenlabs/history', {
method: 'POST',
headers: getRequestHeaders({ omitContentType: true }),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}. See server console for details.`);
}
const responseJson = await response.json();
return responseJson.history;
}
/**
* Add a new voice via ElevenLabs API
* @param {string} name Voice name
* @param {string} description Voice description
* @param {string} labels Voice labels
* @returns {Promise<Object>} Newly created voice object
*/
async addVoice(name, description, labels) {
const audioFilesInput = /** @type {HTMLInputElement} */ (document.getElementById('elevenlabs_tts_audio_files'));
if (!(audioFilesInput instanceof HTMLInputElement) || audioFilesInput.files.length === 0) {
throw new Error('No audio files selected for voice cloning.');
}
const data = {
name: name,
description: description,
labels: labels,
files: [],
};
for (const file of audioFilesInput.files) {
const base64Data = await getBase64Async(file);
data.files.push(base64Data);
}
const response = await fetch('/api/speech/elevenlabs/voices/add', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify(data),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}. See server console for details.`);
}
return await response.json();
}
}

View File

@@ -0,0 +1,195 @@
import { getRequestHeaders } from '../../../script.js';
import { oai_settings } from '../../openai.js';
import { isValidUrl } from '../../utils.js';
import { getPreviewString, saveTtsProviderSettings } from './index.js';
export class GoogleNativeTtsProvider {
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
defaultSettings = {
voiceMap: {},
model: 'gemini-2.5-flash-preview-tts',
apiType: 'makersuite',
};
get settingsHtml() {
return `
<small>Hint: Save an API key in the Google AI Studio/Vertex AI connection settings</small>
<div id="google-native-tts-settings">
<div>
<label for="google-tts-api-type">API Type:</label>
<select id="google-tts-api-type">
<option value="makersuite">Google AI Studio (MakerSuite)</option>
<option value="vertexai" disabled>Google Vertex AI (unsupported)</option>
</select>
</div>
<div>
<label for="google-tts-model">Model:</label>
<select id="google-tts-model">
<option value="gemini-2.5-flash-preview-tts">Gemini 2.5 Flash Preview TTS</option>
<option value="gemini-2.5-pro-preview-tts">Gemini 2.5 Pro Preview TTS</option>
</select>
</div>
</div>`;
}
async loadSettings(settings) {
if (Object.keys(settings).length === 0) {
console.info('Using default Google TTS Provider settings');
}
this.settings = { ...this.defaultSettings, ...settings };
$('#google-tts-api-type').val(this.settings.apiType);
$('#google-tts-model').val(this.settings.model);
$('#google-tts-api-type, #google-tts-model').on('change', () => this.onSettingsChange());
try {
await this.checkReady();
console.debug('Google TTS: Settings loaded');
} catch (err) {
console.warn('Google TTS: Settings loaded, but not ready.', err.message);
}
}
onSettingsChange() {
this.settings.apiType = $('#google-tts-api-type').val();
this.settings.model = $('#google-tts-model').val();
this.voices = []; // Reset voices cache so it re-fetches
saveTtsProviderSettings();
}
async checkReady() {
await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
await this.checkReady();
}
async getVoice(voiceName) {
if (this.voices.length === 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.find(voice => voice.name === voiceName || voice.voice_id === voiceName);
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
return await this.fetchNativeTtsGeneration(text, voiceId);
}
async fetchTtsVoiceObjects() {
try {
const response = await fetch('/api/google/list-native-voices', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({}),
});
if (!response.ok) {
let errorMessage = `HTTP ${response.status}: ${response.statusText}`;
try {
const errorJson = await response.json();
if (errorJson.error) {
errorMessage = errorJson.error;
}
} catch (parseError) {
// Response isn't valid JSON, use the HTTP error message
console.debug('Error response is not JSON:', parseError.message);
}
throw new Error(errorMessage);
}
const responseJson = await response.json();
if (!responseJson.voices || !Array.isArray(responseJson.voices)) {
throw new Error('Invalid response format: voices array not found');
}
this.voices = responseJson.voices;
console.info(`Google TTS: Loaded ${this.voices.length} voices`);
return this.voices;
} catch (error) {
console.error('Failed to fetch Google TTS voices:', error);
throw error;
}
}
async previewTtsVoice(id) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
try {
const voice = await this.getVoice(id);
const text = getPreviewString(voice.lang || 'en-US');
const response = await this.fetchNativeTtsGeneration(text, id);
if (!response.ok) {
// Error is handled inside the fetch function, but we still need to stop here
return;
}
const audioBlob = await response.blob();
const url = URL.createObjectURL(audioBlob);
this.audioElement.src = url;
this.audioElement.play();
this.audioElement.onended = () => URL.revokeObjectURL(url);
} catch (error) {
console.error('TTS Preview Error:', error);
toastr.error(`Could not generate preview: ${error.message}`);
}
}
async fetchNativeTtsGeneration(text, voiceId) {
console.info(`Generating native Google TTS for voice_id ${voiceId}`);
const useReverseProxy = oai_settings.reverse_proxy && isValidUrl(oai_settings.reverse_proxy);
const response = await fetch('/api/google/generate-native-tts', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
text: text,
voice: voiceId,
model: this.settings.model,
api: this.settings.apiType,
reverse_proxy: useReverseProxy ? oai_settings.reverse_proxy : '',
proxy_password: useReverseProxy ? oai_settings.proxy_password : '',
vertexai_auth_mode: oai_settings.vertexai_auth_mode,
vertexai_region: oai_settings.vertexai_region,
vertexai_express_project_id: oai_settings.vertexai_express_project_id,
}),
});
if (!response.ok) {
let errorMessage = `HTTP ${response.status}: ${response.statusText}`;
try {
const errorJson = await response.json();
if (errorJson.error) {
errorMessage = errorJson.error;
}
} catch {
// Not a JSON response, do nothing and keep the original http error
}
throw new Error(errorMessage);
}
return response;
}
}

View File

@@ -0,0 +1,140 @@
import { getRequestHeaders } from '../../../script.js';
import { splitRecursive } from '../../utils.js';
import { getPreviewString, saveTtsProviderSettings } from './index.js';
export { GoogleTranslateTtsProvider };
class GoogleTranslateTtsProvider {
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
defaultSettings = {
region: '',
voiceMap: {},
};
get settingsHtml() {
return '';
}
onSettingsChange() {
this.voices = [];
saveTtsProviderSettings();
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
try {
await this.checkReady();
console.debug('Google Translate TTS: Settings loaded');
} catch {
console.debug('Google Translate TTS: Settings loaded, but not ready');
}
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
await this.checkReady();
}
//#################//
// TTS Interfaces //
//#################//
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
voice => voice.name == voiceName || voice.voice_id == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
const response = await fetch('/api/google/list-voices', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({}),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
let responseJson = await response.json();
responseJson = Object.entries(responseJson)
.sort((a, b) => a[1].localeCompare(b[1]))
.map(x => ({ name: x[1], voice_id: x[0], preview_url: false, lang: x[0] }));
return responseJson;
}
/**
* Preview TTS for a given voice ID.
* @param {string} id Voice ID
*/
async previewTtsVoice(id) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const voice = await this.getVoice(id);
const text = getPreviewString(voice.lang);
const response = await this.fetchTtsGeneration(text, id);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
const audio = await response.blob();
const url = URL.createObjectURL(audio);
this.audioElement.src = url;
this.audioElement.play();
this.audioElement.onended = () => URL.revokeObjectURL(url);
}
async fetchTtsGeneration(text, voiceId) {
const response = await fetch('/api/google/generate-voice', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
text: splitRecursive(text, 200),
voice: voiceId,
}),
});
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
}

View File

@@ -0,0 +1,223 @@
import { saveTtsProviderSettings } from './index.js';
export { GptSovitsV2Provider };
class GptSovitsV2Provider {
//########//
// Config //
//########//
settings;
ready = false;
voices = [];
separator = '. ';
audioElement = document.createElement('audio');
/**
* Perform any text processing before passing to TTS engine.
* @param {string} text Input text
* @returns {string} Processed text
*/
processText(text) {
return text;
}
audioFormats = ['wav', 'ogg', 'silk', 'mp3', 'flac'];
languageLabels = {
'Auto': 'auto',
};
langKey2LangCode = {
'zh': 'zh-CN',
'en': 'en-US',
'ja': 'ja-JP',
'ko': 'ko-KR',
};
defaultSettings = {
provider_endpoint: 'http://localhost:9880',
format: 'wav',
lang: 'auto',
streaming: false,
text_lang: 'zh',
prompt_lang: 'zh',
};
get settingsHtml() {
let html = `
<label for="tts_endpoint">Provider Endpoint:</label>
<input id="tts_endpoint" type="text" class="text_pole" maxlength="250" height="300" value="${this.defaultSettings.provider_endpoint}"/>
<span>Use <a target="_blank" href="https://github.com/v3ucn/GPT-SoVITS-V2">GPT-SoVITS-V2</a>(Unofficial).</span><br/>
<label for="text_lang">Text Lang(Inference text language):</label>
<input id="text_lang" type="text" class="text_pole" maxlength="250" height="300" value="${this.defaultSettings.text_lang}"/>
<label for="text_lang">Prompt Lang(Reference audio text language):</label>
<input id="prompt_lang" type="text" class="text_pole" maxlength="250" height="300" value="${this.defaultSettings.prompt_lang}"/>
<br/>
`;
return html;
}
onSettingsChange() {
// Used when provider settings are updated from UI
this.settings.provider_endpoint = $('#tts_endpoint').val();
this.settings.text_lang = $('#text_lang').val();
this.settings.prompt_lang = $('#prompt_lang').val();
saveTtsProviderSettings();
this.changeTTSSettings();
}
async loadSettings(settings) {
// Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
console.debug(`Ignoring non-user-configurable setting: ${key}`);
}
}
// Set initial values from the settings
$('#tts_endpoint').val(this.settings.provider_endpoint).on('change', this.onSettingsChange.bind(this));
$('#text_lang').val(this.settings.text_lang).on('change', this.onSettingsChange.bind(this));
$('#prompt_lang').val(this.settings.prompt_lang).on('change', this.onSettingsChange.bind(this));
await this.checkReady();
console.info('ITS: Settings loaded');
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
}
async onRefreshClick() {
return await this.checkReady();
}
//#################//
// TTS Interfaces //
//#################//
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
v => v.name == voiceName,
)[0];
console.log(match);
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
const response = await fetch(`${this.settings.provider_endpoint}/speakers`);
console.info(response);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
}
const responseJson = await response.json();
this.voices = responseJson;
return responseJson;
}
// Each time a parameter is changed, we change the configuration
async changeTTSSettings() {
}
/**
* Fetch TTS generation from the API.
* @param {string} inputText Text to generate TTS for
* @param {string} voiceId Voice ID to use (model_type&speaker_id))
* @returns {Promise<Response|string>} Fetch response
*/
async fetchTtsGeneration(inputText, voiceId, lang = null, forceNoStreaming = false) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
function replaceSpeaker(text) {
return text.replace(/\[.*?\]/gu, '');
}
let prompt_text = replaceSpeaker(voiceId);
const params = {
text: inputText,
prompt_text: prompt_text,
ref_audio_path: './参考音频/' + voiceId + '.wav',
text_lang: this.settings.text_lang,
prompt_lang: this.settings.prompt_lang,
text_split_method: 'cut5',
batch_size: 1,
media_type: 'ogg',
streaming_mode: 'true',
};
const url = `${this.settings.provider_endpoint}/`;
const response = await fetch(
url,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(params), // Convert parameter objects to JSON strings
},
);
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
// Interface not used
async fetchTtsFromHistory(history_item_id) {
return Promise.resolve(history_item_id);
}
}

View File

@@ -0,0 +1,267 @@
import { saveTtsProviderSettings } from './index.js';
export { GSVITtsProvider };
class GSVITtsProvider {
//########//
// Config //
//########//
settings;
ready = false;
separator = '. ';
characterList = {};
voices = [];
/**
* Perform any text processing before passing to TTS engine.
* @param {string} text Input text
* @returns {string} Processed text
*/
processText(text) {
text = text.replace('<br>', '\n'); // Replace <br> with newline
return text;
}
languageLabels = {
'Multilingual': '多语种混合',
'Chinese': '中文',
'English': '英文',
'Japanese': '日文',
'Chinese-English': '中英混合',
'Japanese-English': '日英混合',
};
defaultSettings = {
provider_endpoint: 'http://127.0.0.1:5000',
language: '多语种混合',
cha_name: '',
character_emotion: 'default',
speed: 1,
top_k: 6,
top_p: 0.85,
temperature: 0.75,
batch_size: 10,
stream: false,
stream_chunk_size: 100,
};
// Added new methods to obtain characters and emotions
async fetchCharacterList() {
const response = await fetch(this.settings.provider_endpoint + '/character_list');
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
const characterList = await response.json();
this.characterList = characterList;
this.voices = Object.keys(characterList);
}
get settingsHtml() {
let html = `
<label for="gsvi_api_language">Text Language</label>
<select id="gsvi_api_language">`;
for (let language in this.languageLabels) {
if (this.languageLabels[language] == this.settings?.language) {
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
continue;
}
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
}
html += `
</select>
<label>GSVI Settings:</label><br/>
<label for="gsvi_tts_endpoint">Provider Endpoint:</label>
<input id="gsvi_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
<label for="gsvi_speed">Speed: <span id="gsvi_tts_speed_output">${this.defaultSettings.speed}</span></label>
<input id="gsvi_speed" type="range" value="${this.defaultSettings.speed}" min="0.5" max="2" step="0.01" />
<label for="gsvi_top_k">Top K: <span id="gsvi_top_k_output">${this.defaultSettings.top_k}</span></label>
<input id="gsvi_top_k" type="range" value="${this.defaultSettings.top_k}" min="0" max="100" step="1" />
<label for="gsvi_top_p">Top P: <span id="gsvi_top_p_output">${this.defaultSettings.top_p}</span></label>
<input id="gsvi_top_p" type="range" value="${this.defaultSettings.top_p}" min="0" max="1" step="0.01" />
<label for="gsvi_temperature">Temperature: <span id="gsvi_tts_temperature_output">${this.defaultSettings.temperature}</span></label>
<input id="gsvi_temperature" type="range" value="${this.defaultSettings.temperature}" min="0.01" max="1" step="0.01" />
<label for="gsvi_batch_size">Batch Size: <span id="gsvi_batch_size_output">${this.defaultSettings.batch_size}</span></label>
<input id="gsvi_batch_size" type="range" value="${this.defaultSettings.batch_size}" min="1" max="35" step="1" />
<label for="gsvi_tts_streaming" class="checkbox_label">
<input id="gsvi_tts_streaming" type="checkbox" ${this.defaultSettings.stream ? 'checked' : ''}/>
<span>Streaming</span>
</label>
<label for="gsvi_stream_chunk_size">Stream Chunk Size: <span id="gsvi_stream_chunk_size_output">${this.defaultSettings.stream_chunk_size}</span></label>
<input id="gsvi_stream_chunk_size" type="range" value="${this.defaultSettings.stream_chunk_size}" min="100" max="400" step="1" />
<p>
For more information, visit the
<a href="https://github.com/X-T-E-R/GPT-SoVITS-Inference" target="_blank">GSVI project page</a>.
</p>
`;
return html;
}
onSettingsChange() {
// Update provider settings based on input fields
this.settings.provider_endpoint = $('#gsvi_tts_endpoint').val();
this.settings.language = $('#gsvi_api_language').val();
// Update the rest of TTS settings based on input fields
this.settings.speed = parseFloat($('#gsvi_speed').val());
this.settings.temperature = parseFloat($('#gsvi_temperature').val());
this.settings.top_k = parseInt($('#gsvi_top_k').val(), 10);
this.settings.top_p = parseFloat($('#gsvi_top_p').val());
this.settings.batch_size = parseInt($('#gsvi_batch_size').val(), 10);
this.settings.stream = $('#gsvi_tts_streaming').is(':checked');
this.settings.stream_chunk_size = parseInt($('#gsvi_stream_chunk_size').val(), 10);
// Update UI to reflect changes
$('#gsvi_tts_speed_output').text(this.settings.speed);
$('#gsvi_tts_temperature_output').text(this.settings.temperature);
$('#gsvi_top_k_output').text(this.settings.top_k);
$('#gsvi_top_p_output').text(this.settings.top_p);
$('#gsvi_stream_chunk_size_output').text(this.settings.stream_chunk_size);
$('#gsvi_batch_size_output').text(this.settings.batch_size);
// Persist settings changes
saveTtsProviderSettings();
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length === 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = { ...this.defaultSettings, ...settings };
// Fetch character and emotion list
// Set initial values from the settings
$('#gsvi_tts_endpoint').val(this.settings.provider_endpoint);
$('#gsvi_api_language').val(this.settings.language);
$('#gsvi_speed').val(this.settings.speed);
$('#gsvi_temperature').val(this.settings.temperature);
$('#gsvi_top_k').val(this.settings.top_k);
$('#gsvi_top_p').val(this.settings.top_p);
$('#gsvi_batch_size').val(this.settings.batch_size);
$('#gsvi_tts_streaming').prop('checked', this.settings.stream);
$('#gsvi_stream_chunk_size').val(this.settings.stream_chunk_size);
// Update UI to reflect initial settings
$('#gsvi_tts_speed_output').text(this.settings.speed);
$('#gsvi_tts_temperature_output').text(this.settings.temperature);
$('#gsvi_top_k_output').text(this.settings.top_k);
$('#gsvi_top_p_output').text(this.settings.top_p);
$('#gsvi_stream_chunk_size_output').text(this.settings.stream_chunk_size);
// Register event listeners to update settings on user interaction
// (Similar to before, ensure event listeners for character and emotion selection are included)
// Register input/change event listeners to update settings on user interaction
$('#gsvi_tts_endpoint').on('input', () => { this.onSettingsChange(); });
$('#gsvi_api_language').on('change', () => { this.onSettingsChange(); });
$('#gsvi_speed').on('input', () => { this.onSettingsChange(); });
$('#gsvi_temperature').on('input', () => { this.onSettingsChange(); });
$('#gsvi_top_k').on('input', () => { this.onSettingsChange(); });
$('#gsvi_top_p').on('input', () => { this.onSettingsChange(); });
$('#gsvi_batch_size').on('input', () => { this.onSettingsChange(); });
$('#gsvi_tts_streaming').on('change', () => { this.onSettingsChange(); });
$('#gsvi_stream_chunk_size').on('input', () => { this.onSettingsChange(); });
await this.checkReady();
console.debug('GSVI: Settings loaded');
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
await Promise.allSettled([this.fetchCharacterList()]);
}
async onRefreshClick() {
return;
}
//#################//
// TTS Interfaces //
//#################//
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.fetchCharacterList();
}
if (!this.voices.includes(voiceName)) {
throw `TTS Voice name ${voiceName} not found`;
}
return { name: voiceName, voice_id: voiceName, preview_url: false, lang: 'zh-CN' };
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
if (this.voices.length == 0) {
await this.fetchCharacterList();
}
console.log(this.voices);
const voices = this.voices.map(x => ({ name: x, voice_id: x, preview_url: false, lang: 'zh-CN' }));
return voices;
}
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
const params = new URLSearchParams();
params.append('text', inputText);
params.append('cha_name', voiceId);
params.append('text_language', this.settings.language);
params.append('batch_size', this.settings.batch_size.toString());
params.append('speed', this.settings.speed.toString());
params.append('top_k', this.settings.top_k.toString());
params.append('top_p', this.settings.top_p.toString());
params.append('temperature', this.settings.temperature.toString());
params.append('stream', this.settings.stream.toString());
return `${this.settings.provider_endpoint}/tts?${params.toString()}`;
}
// Interface not used by GSVI TTS
async fetchTtsFromHistory(history_item_id) {
return Promise.resolve(history_item_id);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,113 @@
// kokoro-worker.js
/** @type {import('./lib/kokoro.web.js').KokoroTTS} */
let tts = null;
/** @type {boolean} */
let ready = false;
/** @type {string[]} */
let voices = [];
// Handle messages from the main thread
self.onmessage = async function(e) {
const { action, data } = e.data;
switch (action) {
case 'initialize':
try {
const result = await initializeTts(data);
self.postMessage({
action: 'initialized',
success: result,
voices,
});
} catch (error) {
self.postMessage({
action: 'initialized',
success: false,
error: error.message,
});
}
break;
case 'generateTts':
try {
const audioBlob = await generateTts(data.text, data.voice, data.speakingRate);
const blobUrl = URL.createObjectURL(audioBlob);
self.postMessage({
action: 'generatedTts',
success: true,
blobUrl,
requestId: data.requestId,
});
} catch (error) {
self.postMessage({
action: 'generatedTts',
success: false,
error: error.message,
requestId: data.requestId,
});
}
break;
case 'checkReady':
self.postMessage({ action: 'readyStatus', ready });
break;
}
};
// Initialize the TTS engine
async function initializeTts(settings) {
try {
const { KokoroTTS } = await import('./lib/kokoro.web.js');
console.log('Worker: Initializing Kokoro TTS with settings:', {
modelId: settings.modelId,
dtype: settings.dtype,
device: settings.device,
});
// Create TTS instance
tts = await KokoroTTS.from_pretrained(settings.modelId, {
dtype: settings.dtype,
device: settings.device,
});
// Get available voices
voices = Object.keys(tts.voices);
// Check if generate method exists
if (typeof tts.generate !== 'function') {
throw new Error('TTS instance does not have generate method');
}
console.log('Worker: TTS initialized successfully');
ready = true;
return true;
} catch (error) {
console.error('Worker: Kokoro TTS initialization failed:', error);
ready = false;
throw error;
}
}
// Generate TTS audio
async function generateTts(text, voiceId, speakingRate) {
if (!ready || !tts) {
throw new Error('TTS engine not initialized');
}
if (text.trim().length === 0) {
throw new Error('Empty text');
}
try {
const audio = await tts.generate(text, {
voice: voiceId,
speed: speakingRate || 1.0,
});
return audio.toBlob();
} catch (error) {
console.error('Worker: TTS generation failed:', error);
throw error;
}
}

View File

@@ -0,0 +1,352 @@
import { debounce_timeout } from '../../constants.js';
import { debounceAsync, splitRecursive } from '../../utils.js';
import { getPreviewString, saveTtsProviderSettings } from './index.js';
export class KokoroTtsProvider {
constructor() {
this.settings = {
modelId: 'onnx-community/Kokoro-82M-v1.0-ONNX',
dtype: 'q8',
device: 'wasm',
voiceMap: {},
defaultVoice: 'af_heart',
speakingRate: 1.0,
};
this.ready = false;
this.voices = [
'af_heart',
'af_alloy',
'af_aoede',
'af_bella',
'af_jessica',
'af_kore',
'af_nicole',
'af_nova',
'af_river',
'af_sarah',
'af_sky',
'am_adam',
'am_echo',
'am_eric',
'am_fenrir',
'am_liam',
'am_michael',
'am_onyx',
'am_puck',
'am_santa',
'bf_emma',
'bf_isabella',
'bm_george',
'bm_lewis',
'bf_alice',
'bf_lily',
'bm_daniel',
'bm_fable',
];
this.worker = null;
this.separator = ' ... ... ... ';
this.pendingRequests = new Map();
this.nextRequestId = 1;
// Update display values immediately but only reinitialize TTS after a delay
this.initTtsDebounced = debounceAsync(this.initializeWorker.bind(this), debounce_timeout.relaxed);
}
/**
* Perform any text processing before passing to TTS engine.
* @param {string} text Input text
* @returns {string} Processed text
*/
processText(text) {
// TILDE!
text = text.replace(/~/g, '.');
return text;
}
async loadSettings(settings) {
if (settings.modelId !== undefined) this.settings.modelId = settings.modelId;
if (settings.dtype !== undefined) this.settings.dtype = settings.dtype;
if (settings.device !== undefined) this.settings.device = settings.device;
if (settings.voiceMap !== undefined) this.settings.voiceMap = settings.voiceMap;
if (settings.defaultVoice !== undefined) this.settings.defaultVoice = settings.defaultVoice;
if (settings.speakingRate !== undefined) this.settings.speakingRate = settings.speakingRate;
$('#kokoro_model_id').val(this.settings.modelId).on('input', this.onSettingsChange.bind(this));
$('#kokoro_dtype').val(this.settings.dtype).on('change', this.onSettingsChange.bind(this));
$('#kokoro_device').val(this.settings.device).on('change', this.onSettingsChange.bind(this));
$('#kokoro_speaking_rate').val(this.settings.speakingRate).on('input', this.onSettingsChange.bind(this));
$('#kokoro_speaking_rate_output').text(this.settings.speakingRate + 'x');
}
initializeWorker() {
return new Promise((resolve, reject) => {
try {
// Terminate the existing worker if it exists
if (this.worker) {
this.worker.terminate();
$('#kokoro_status_text').text('Initializing...').removeAttr('style');
}
// Create a new worker
this.worker = new Worker(new URL('./kokoro-worker.js', import.meta.url), { type: 'module' });
// Set up message handling
this.worker.onmessage = this.handleWorkerMessage.bind(this);
// Initialize the worker with the current settings
this.worker.postMessage({
action: 'initialize',
data: {
modelId: this.settings.modelId,
dtype: this.settings.dtype,
device: this.settings.device,
},
});
// Create a promise that will resolve when initialization completes
const initPromise = new Promise((initResolve, initReject) => {
const timeoutId = setTimeout(() => {
initReject(new Error('Worker initialization timed out'));
}, 600000); // 600 second timeout
this.pendingRequests.set('initialization', {
resolve: (result) => {
clearTimeout(timeoutId);
initResolve(result);
},
reject: (error) => {
clearTimeout(timeoutId);
initReject(error);
},
});
});
// Resolve the outer promise when initialization completes
initPromise.then(success => {
this.ready = success;
this.updateStatusDisplay();
resolve(success);
}).catch(error => {
console.error('Worker initialization failed:', error);
this.ready = false;
this.updateStatusDisplay();
reject(error);
});
} catch (error) {
console.error('Failed to create worker:', error);
this.ready = false;
this.updateStatusDisplay();
reject(error);
}
});
}
handleWorkerMessage(event) {
const { action, success, ready, error, requestId, blobUrl } = event.data;
switch (action) {
case 'initialized': {
const initRequest = this.pendingRequests.get('initialization');
if (initRequest) {
if (success) {
initRequest.resolve(true);
} else {
initRequest.reject(new Error(error || 'Initialization failed'));
}
this.pendingRequests.delete('initialization');
}
} break;
case 'generatedTts': {
const request = this.pendingRequests.get(requestId);
if (request) {
if (success) {
fetch(blobUrl).then(response => response.blob()).then(audioBlob => {
// Clean up the blob URL
URL.revokeObjectURL(blobUrl);
request.resolve(new Response(audioBlob, {
headers: {
'Content-Type': 'audio/wav',
},
}));
}).catch(error => {
request.reject(new Error('Failed to fetch TTS audio blob: ' + error));
});
} else {
request.reject(new Error(error || 'TTS generation failed'));
}
this.pendingRequests.delete(requestId);
}
} break;
case 'readyStatus':
this.ready = ready;
this.updateStatusDisplay();
break;
}
}
updateStatusDisplay() {
const statusText = this.ready ? 'Ready' : 'Failed';
const statusColor = this.ready ? 'green' : 'red';
$('#kokoro_status_text').text(statusText).css('color', statusColor);
}
async checkReady() {
if (!this.worker) {
return await this.initializeWorker();
}
this.worker.postMessage({ action: 'checkReady' });
return this.ready;
}
async onRefreshClick() {
return await this.initializeWorker();
}
get settingsHtml() {
return `
<div class="kokoro_tts_settings">
<label for="kokoro_model_id">Model ID:</label>
<input id="kokoro_model_id" type="text" class="text_pole" value="${this.settings.modelId}" />
<label for="kokoro_dtype">Data Type:</label>
<select id="kokoro_dtype" class="text_pole">
<option value="q8" ${this.settings.dtype === 'q8' ? 'selected' : ''}>q8 (Recommended)</option>
<option value="fp32" ${this.settings.dtype === 'fp32' ? 'selected' : ''}>fp32 (High Precision)</option>
<option value="fp16" ${this.settings.dtype === 'fp16' ? 'selected' : ''}>fp16</option>
<option value="q4" ${this.settings.dtype === 'q4' ? 'selected' : ''}>q4 (Low Memory)</option>
<option value="q4f16" ${this.settings.dtype === 'q4f16' ? 'selected' : ''}>q4f16</option>
</select>
<label for="kokoro_device">Device:</label>
<select id="kokoro_device" class="text_pole">
<option value="wasm" ${this.settings.device === 'wasm' ? 'selected' : ''}>WebAssembly (CPU)</option>
<option value="webgpu" ${this.settings.device === 'webgpu' ? 'selected' : ''}>WebGPU (GPU Acceleration)</option>
</select>
<label for="kokoro_speaking_rate">Speaking Rate: <span id="kokoro_speaking_rate_output">${this.settings.speakingRate}x</span></label>
<input id="kokoro_speaking_rate" type="range" value="${this.settings.speakingRate}" min="0.5" max="2.0" step="0.1" />
<hr>
<div>
Status: <span id="kokoro_status_text">Initializing...</span>
</div>
</div>
`;
}
async onSettingsChange() {
this.settings.modelId = $('#kokoro_model_id').val().toString();
this.settings.dtype = $('#kokoro_dtype').val().toString();
this.settings.device = $('#kokoro_device').val().toString();
this.settings.speakingRate = parseFloat($('#kokoro_speaking_rate').val().toString());
// Update UI display
$('#kokoro_speaking_rate_output').text(this.settings.speakingRate + 'x');
// Reinitialize TTS engine with debounce
this.initTtsDebounced();
saveTtsProviderSettings();
}
async fetchTtsVoiceObjects() {
if (!this.ready) {
await this.checkReady();
}
return this.voices.map(voice => ({
name: voice,
voice_id: voice,
preview_url: null,
lang: voice.startsWith('b') ? 'en-GB' : 'en-US',
}));
}
async previewTtsVoice(voiceId) {
if (!this.ready) {
await this.checkReady();
}
const voice = this.getVoice(voiceId);
const previewText = getPreviewString(voice.lang);
for await (const response of this.generateTts(previewText, voiceId)) {
const audio = await response.blob();
const url = URL.createObjectURL(audio);
await new Promise(resolve => {
const audioElement = new Audio();
audioElement.src = url;
audioElement.play();
audioElement.onended = () => resolve();
});
URL.revokeObjectURL(url);
}
}
getVoiceDisplayName(voiceId) {
return voiceId;
}
getVoice(voiceName) {
const defaultVoice = this.settings.defaultVoice || 'af_heart';
const actualVoiceName = this.voices.includes(voiceName) ? voiceName : defaultVoice;
return {
name: actualVoiceName,
voice_id: actualVoiceName,
preview_url: null,
lang: actualVoiceName.startsWith('b') ? 'en-GB' : 'en-US',
};
}
/**
* Generate TTS audio for the given text using the specified voice.
* @param {string} text Text to generate
* @param {string} voiceId Voice ID
* @returns {AsyncGenerator<Response>} Audio response generator
*/
async* generateTts(text, voiceId) {
if (!this.ready || !this.worker) {
console.log('TTS not ready, initializing...');
await this.initializeWorker();
}
if (!this.ready || !this.worker) {
throw new Error('Failed to initialize TTS engine');
}
if (text.trim().length === 0) {
throw new Error('Empty text');
}
const voice = this.getVoice(voiceId);
const requestId = this.nextRequestId++;
const chunkSize = 400;
const chunks = splitRecursive(text, chunkSize, ['\n\n', '\n', '.', '?', '!', ',', ' ', '']);
for (const chunk of chunks) {
yield await new Promise((resolve, reject) => {
// Store the promise callbacks
this.pendingRequests.set(requestId, { resolve, reject });
// Send the request to the worker
this.worker.postMessage({
action: 'generateTts',
data: {
text: chunk,
voice: voice.voice_id,
speakingRate: this.settings.speakingRate || 1.0,
requestId,
},
});
});
}
}
dispose() {
// Clean up the worker when the provider is disposed
if (this.worker) {
this.worker.terminate();
this.worker = null;
}
}
}

View File

@@ -0,0 +1,8 @@
# kokoro-js
* Author: hexgrad
* NPM: <https://www.npmjs.com/package/kokoro-js>
* Version: 1.2.0
* License: Apache-2.0
Last updated: 2025-03-10

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,73 @@
class PCMProcessor extends AudioWorkletProcessor {
constructor() {
super();
this.buffer = new Float32Array(24000 * 30); // Pre-allocate buffer for ~30 seconds at 24kHz
this.writeIndex = 0;
this.readIndex = 0;
this.pendingBytes = new Uint8Array(0); // Buffer for incomplete samples
this.volume = 1.0; // Default volume (1.0 = 100%, 0.5 = 50%, etc.)
this.port.onmessage = (event) => {
if (event.data.pcmData) {
// Combine any pending bytes with new data
const newData = new Uint8Array(event.data.pcmData);
const combined = new Uint8Array(this.pendingBytes.length + newData.length);
combined.set(this.pendingBytes);
combined.set(newData, this.pendingBytes.length);
// Calculate how many complete 16-bit samples we have
const completeSamples = Math.floor(combined.length / 2);
const bytesToProcess = completeSamples * 2;
if (completeSamples > 0) {
// Process complete samples
const int16Array = new Int16Array(combined.buffer.slice(0, bytesToProcess));
// Write directly to circular buffer
for (let i = 0; i < int16Array.length; i++) {
// Expand buffer if needed
if (this.writeIndex >= this.buffer.length) {
const newBuffer = new Float32Array(this.buffer.length * 2);
// Copy existing data maintaining order
let sourceIndex = this.readIndex;
let targetIndex = 0;
while (sourceIndex !== this.writeIndex) {
newBuffer[targetIndex++] = this.buffer[sourceIndex];
sourceIndex = (sourceIndex + 1) % this.buffer.length;
}
this.buffer = newBuffer;
this.readIndex = 0;
this.writeIndex = targetIndex;
}
this.buffer[this.writeIndex] = int16Array[i] / 32768.0; // Convert 16-bit to float
this.writeIndex = (this.writeIndex + 1) % this.buffer.length;
}
}
// Store any remaining incomplete bytes
if (combined.length > bytesToProcess) {
this.pendingBytes = combined.slice(bytesToProcess);
} else {
this.pendingBytes = new Uint8Array(0);
}
} else if (event.data.volume !== undefined) {
// Set volume (0.0 to 1.0, can go higher for amplification)
this.volume = Math.max(0, event.data.volume);
}
};
}
process(inputs, outputs, parameters) {
const output = outputs[0];
if (output.length > 0 && this.readIndex !== this.writeIndex) {
const channelData = output[0];
for (let i = 0; i < channelData.length && this.readIndex !== this.writeIndex; i++) {
channelData[i] = this.buffer[this.readIndex] * this.volume;
this.readIndex = (this.readIndex + 1) % this.buffer.length;
}
}
return true;
}
}
registerProcessor('pcm-processor', PCMProcessor);

View File

@@ -0,0 +1,15 @@
{
"display_name": "TTS",
"loading_order": 10,
"requires": [],
"optional": [
"silero-tts",
"edge-tts",
"coqui-tts"
],
"js": "index.js",
"css": "style.css",
"author": "Ouoertheo#7264",
"version": "1.0.0",
"homePage": "None"
}

View File

@@ -0,0 +1,963 @@
import { getPreviewString, initVoiceMap, saveTtsProviderSettings } from './index.js';
import { event_types, eventSource, getRequestHeaders } from '../../../script.js';
import { SECRET_KEYS, secret_state } from '../../secrets.js';
import { getBase64Async } from '../../utils.js';
export { MiniMaxTtsProvider };
class MiniMaxTtsProvider {
//########//
// Config //
//########//
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
defaultSettings = {
apiHost: 'https://api.minimax.io',
model: 'speech-02-hd',
voiceMap: {},
speed: { default: 1.0, min: 0.5, max: 2.0, step: 0.1 },
volume: { default: 1.0, min: 0.0, max: 10.0, step: 0.1 },
pitch: { default: 0, min: -12, max: 12, step: 1 },
audioSampleRate: 32000,
bitrate: 128000,
format: 'mp3',
customModels: [],
customVoices: [],
customVoiceId: '',
};
// MiniMax API doesn't provide a method to list user's cloned voices
// so users need to manually input their custom cloned voice IDs
static defaultVoices = [
{ name: 'Unrestrained Young Man', voice_id: 'Chinese (Mandarin)_Unrestrained_Young_Man', lang: 'zh-CN', preview_url: null },
];
// default models (by MiniMax doc)
static defaultModels = [
{ id: 'speech-02-hd', name: 'Speech-02-HD (High Quality)' },
{ id: 'speech-02-turbo', name: 'Speech-02-Turbo (Fast)' },
{ id: 'speech-01', name: 'Speech-01 (Legacy)' },
{ id: 'speech-01-240228', name: 'Speech-01-240228 (Legacy)' },
];
availableModels = [];
availableVoices = [];
get settingsHtml() {
return `
<div class="minimax_tts_settings">
<div class="tts_block justifyCenter">
<div id="api_key_minimax" class="menu_button menu_button_icon manage-api-keys" data-key="api_key_minimax">
<i class="fa-solid fa-key"></i>
<span>Click to set API Key</span>
</div>
<div id="minimax_group_id" class="menu_button menu_button_icon manage-api-keys" data-key="minimax_group_id">
<i class="fa-solid fa-key"></i>
<span>Click to set Group ID</span>
</div>
</div>
<div class="tts_block">
<label for="minimax_tts_api_host">API Host</label>
<select id="minimax_tts_api_host" class="text_pole">
<option value="https://api.minimax.io">Official (api.minimax.io)</option>
<option value="https://api.minimaxi.chat">Global (api.minimaxi.chat)</option>
<option value="https://api.minimax.chat">Mainland China (api.minimax.chat)</option>
</select>
</div>
<div class="tts_block">
<label for="minimax_tts_model">Model</label>
<select id="minimax_tts_model" class="text_pole">
<option value="speech-02-hd">Speech-02-HD (High Quality)</option>
<option value="speech-02-turbo">Speech-02-Turbo (Fast)</option>
<option value="speech-01">Speech-01 (Legacy)</option>
<option value="speech-01-240228">Speech-01-240228 (Legacy)</option>
</select>
</div>
<div class="tts_block">
<input id="minimax_connect" class="menu_button" type="button" value="Connect" />
<input id="minimax_refresh" class="menu_button" type="button" value="Refresh" />
</div>
<div class="tts_block">
<label for="minimax_tts_speed">Speed: <span id="minimax_tts_speed_output"></span></label>
<input id="minimax_tts_speed" type="range" value="${this.defaultSettings.speed.default}" min="${this.defaultSettings.speed.min}" max="${this.defaultSettings.speed.max}" step="${this.defaultSettings.speed.step}" />
</div>
<div class="tts_block">
<label for="minimax_tts_volume">Volume: <span id="minimax_tts_volume_output"></span></label>
<input id="minimax_tts_volume" type="range" value="${this.defaultSettings.volume.default}" min="${this.defaultSettings.volume.min}" max="${this.defaultSettings.volume.max}" step="${this.defaultSettings.volume.step}" />
</div>
<div class="tts_block">
<label for="minimax_tts_pitch">Pitch: <span id="minimax_tts_pitch_output"></span></label>
<input id="minimax_tts_pitch" type="range" value="${this.defaultSettings.pitch.default}" min="${this.defaultSettings.pitch.min}" max="${this.defaultSettings.pitch.max}" step="${this.defaultSettings.pitch.step}" />
</div>
<div class="tts_block">
<label for="minimax_tts_format">Audio Format</label>
<select id="minimax_tts_format" class="text_pole">
<option value="mp3">MP3</option>
<option value="wav">WAV</option>
<option value="flac">FLAC</option>
</select>
</div>
<hr>
<div class="tts_block">
<label for="minimax_tts_custom_voice_id">Custom Voice ID (for 'customVoice' option)</label>
<input id="minimax_tts_custom_voice_id" type="text" class="text_pole" placeholder="Enter custom voice ID from MiniMax platform"/>
</div>
<hr>
<div id="minimax_custom_voice_cloning" class="tts_block flexFlowColumn">
<h4>Custom Voice Management</h4>
<div class="tts_block wide100p">
<input id="minimax_custom_voice_name" type="text" class="text_pole" placeholder="Voice Name"/>
</div>
<div class="tts_block wide100p">
<input id="minimax_custom_voice_id" type="text" class="text_pole" placeholder="Voice ID (from MiniMax platform)"/>
</div>
<div class="tts_block wide100p">
<select id="minimax_custom_voice_lang" class="text_pole">
<option value="auto">Auto Detect</option>
<option value="Chinese">Chinese (中文)</option>
<option value="Chinese,Yue">Chinese, Yue (粤语)</option>
<option value="English">English</option>
<option value="Arabic">Arabic (العربية)</option>
<option value="Russian">Russian (Русский)</option>
<option value="Spanish">Spanish (Español)</option>
<option value="French">French (Français)</option>
<option value="Portuguese">Portuguese (Português)</option>
<option value="German">German (Deutsch)</option>
<option value="Turkish">Turkish (Türkçe)</option>
<option value="Dutch">Dutch (Nederlands)</option>
<option value="Ukrainian">Ukrainian (Українська)</option>
<option value="Vietnamese">Vietnamese (Tiếng Việt)</option>
<option value="Indonesian">Indonesian (Bahasa Indonesia)</option>
<option value="Japanese">Japanese (日本語)</option>
<option value="Italian">Italian (Italiano)</option>
<option value="Korean">Korean (한국어)</option>
<option value="Thai">Thai (ไทย)</option>
<option value="Polish">Polish (Polski)</option>
<option value="Romanian">Romanian (Română)</option>
<option value="Greek">Greek (Ελληνικά)</option>
<option value="Czech">Czech (Čeština)</option>
<option value="Finnish">Finnish (Suomi)</option>
<option value="Hindi">Hindi (हिन्दी)</option>
</select>
</div>
<div class="tts_block">
<input id="minimax_add_custom_voice" class="menu_button" type="button" value="Add Custom Voice">
</div>
<div id="minimax_custom_voices_list" style="margin-top: 10px;"></div>
</div>
<hr>
<div id="minimax_custom_model_management" class="tts_block flexFlowColumn">
<h4>Custom Model Management</h4>
<div class="tts_block wide100p">
<input id="minimax_custom_model_id" type="text" class="text_pole" placeholder="Model ID"/>
</div>
<div class="tts_block wide100p">
<input id="minimax_custom_model_name" type="text" class="text_pole" placeholder="Model Name"/>
</div>
<div class="tts_block">
<input id="minimax_add_custom_model" class="menu_button" type="button" value="Add Custom Model">
</div>
<div id="minimax_custom_models_list" style="margin-top: 10px;"></div>
</div>
</div>
`;
}
constructor() {
this.handler = async function (/** @type {string} */ key) {
if (![SECRET_KEYS.MINIMAX, SECRET_KEYS.MINIMAX_GROUP_ID].includes(key)) return;
$('#api_key_minimax').toggleClass('success', !!secret_state[SECRET_KEYS.MINIMAX]);
$('#minimax_group_id').toggleClass('success', !!secret_state[SECRET_KEYS.MINIMAX_GROUP_ID]);
await this.onRefreshClick();
}.bind(this);
}
dispose() {
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.removeListener(event, this.handler);
});
}
onSettingsChange() {
this.settings.apiHost = $('#minimax_tts_api_host').val();
this.settings.speed = parseFloat($('#minimax_tts_speed').val().toString());
this.settings.volume = parseFloat($('#minimax_tts_volume').val().toString());
this.settings.pitch = parseInt($('#minimax_tts_pitch').val().toString());
this.settings.model = $('#minimax_tts_model').find(':selected').val();
this.settings.format = $('#minimax_tts_format').find(':selected').val();
this.settings.customVoiceId = $('#minimax_tts_custom_voice_id').val();
$('#minimax_tts_speed_output').text(this.settings.speed.toFixed(1));
$('#minimax_tts_volume_output').text(this.settings.volume.toFixed(1));
$('#minimax_tts_pitch_output').text(this.settings.pitch);
saveTtsProviderSettings();
}
addCustomModel() {
const modelId = $('#minimax_custom_model_id').val().toString().trim();
const modelName = $('#minimax_custom_model_name').val().toString().trim();
if (!modelId || !modelName) {
toastr.error('Please enter model ID and name');
return;
}
// Check if already exists in custom models
if (this.settings.customModels.find(m => m.id === modelId)) {
toastr.error('Model ID already exists in custom models');
return;
}
// Check if conflicts with default models
if (MiniMaxTtsProvider.defaultModels.find(m => m.id === modelId)) {
toastr.error('Model ID conflicts with default model. Please use a different model ID.');
return;
}
// Check if conflicts with default model names
if (MiniMaxTtsProvider.defaultModels.find(m => m.name === modelName)) {
toastr.error('Model name conflicts with default model. Please use a different model name.');
return;
}
this.settings.customModels.push({ id: modelId, name: modelName });
$('#minimax_custom_model_id').val('');
$('#minimax_custom_model_name').val('');
this.updateCustomModelsDisplay();
this.updateModelSelect(this.getAllModels());
saveTtsProviderSettings();
toastr.success('Model added successfully');
}
removeCustomModel(modelId) {
this.settings.customModels = this.settings.customModels.filter(m => m.id !== modelId);
this.updateCustomModelsDisplay();
this.updateModelSelect(this.getAllModels());
saveTtsProviderSettings();
toastr.success('Model removed successfully');
}
addCustomVoice() {
const voiceName = $('#minimax_custom_voice_name').val().toString().trim();
const voiceId = $('#minimax_custom_voice_id').val().toString().trim();
const voiceLang = $('#minimax_custom_voice_lang').val().toString().trim();
if (!voiceName || !voiceId) {
toastr.error('Please enter voice name and ID');
return;
}
// Check if already exists in custom voices
if (this.settings.customVoices.find(v => v.voice_id === voiceId)) {
toastr.error('Voice ID already exists in custom voices');
return;
}
// Check if conflicts with default voices
if (MiniMaxTtsProvider.defaultVoices.find(v => v.voice_id === voiceId)) {
toastr.error('Voice ID conflicts with default voice. Please use a different voice ID.');
return;
}
// Check if conflicts with default voice names
if (MiniMaxTtsProvider.defaultVoices.find(v => v.name === voiceName)) {
toastr.error('Voice name conflicts with default voice. Please use a different voice name.');
return;
}
// Convert display name to standard language code before saving
const standardLangCode = this.convertDisplayNameToLanguageCode(voiceLang);
this.settings.customVoices.push({
name: voiceName,
voice_id: voiceId,
lang: standardLangCode,
preview_url: null,
});
$('#minimax_custom_voice_name').val('');
$('#minimax_custom_voice_id').val('');
$('#minimax_custom_voice_lang').val('auto');
this.updateCustomVoicesDisplay();
initVoiceMap(); // Update TTS extension voiceMap
saveTtsProviderSettings();
toastr.success('Voice added successfully');
}
// Remove custom voice
removeCustomVoice(voiceId) {
this.settings.customVoices = this.settings.customVoices.filter(v => v.voice_id !== voiceId);
this.updateCustomVoicesDisplay();
initVoiceMap(); // Update TTS extension voiceMap
saveTtsProviderSettings();
toastr.success('Voice removed successfully');
}
// Helper function to escape HTML
escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
// Update custom models display
updateCustomModelsDisplay() {
const container = $('#minimax_custom_models_list');
container.empty();
if (this.settings.customModels.length === 0) {
container.append('<div class="minimax-empty-list">No custom models added</div>');
return;
}
this.settings.customModels.forEach(model => {
const modelDiv = $('<div></div>').addClass('minimax-custom-item');
const modelInfo = $('<div></div>').addClass('minimax-custom-item-info');
const modelName = $('<div></div>').addClass('minimax-custom-item-name').text(model.name);
const modelId = $('<div></div>').addClass('minimax-custom-item-details').text(`(${model.id})`);
modelInfo.append(modelName).append(modelId);
const removeBtn = $('<button></button>')
.addClass('menu_button minimax-custom-item-remove')
.text('Remove')
.on('click', () => {
try {
this.removeCustomModel(model.id);
} catch (error) {
console.error('MiniMax TTS: Error removing custom model:', error);
toastr.error(`Failed to remove custom model: ${error.message}`);
}
});
modelDiv.append(modelInfo).append(removeBtn);
container.append(modelDiv);
});
}
// Update custom voices display
updateCustomVoicesDisplay() {
const container = $('#minimax_custom_voices_list');
container.empty();
if (this.settings.customVoices.length === 0) {
container.append('<div class="minimax-empty-list">No custom voices added</div>');
return;
}
this.settings.customVoices.forEach(voice => {
const voiceDiv = $('<div></div>').addClass('minimax-custom-item');
const voiceInfo = $('<div></div>').addClass('minimax-custom-item-info');
const voiceName = $('<div></div>').addClass('minimax-custom-item-name').text(voice.name);
const voiceDetails = $('<div></div>').addClass('minimax-custom-item-details').text(`(${voice.voice_id}) - ${voice.lang}`);
voiceInfo.append(voiceName).append(voiceDetails);
const removeBtn = $('<button></button>')
.addClass('menu_button minimax-custom-item-remove')
.text('Remove')
.on('click', () => {
try {
this.removeCustomVoice(voice.voice_id);
} catch (error) {
console.error('MiniMax TTS: Error removing custom voice:', error);
toastr.error(`Failed to remove custom voice: ${error.message}`);
}
});
voiceDiv.append(voiceInfo).append(removeBtn);
container.append(voiceDiv);
});
}
// Get all models (default + custom)
getAllModels() {
return [...MiniMaxTtsProvider.defaultModels, ...this.settings.customModels];
}
// Get all voices (default + custom)
getAllVoices() {
return [...MiniMaxTtsProvider.defaultVoices, ...this.settings.customVoices];
}
/**
* Convert display names to standard language codes
* @param {string} displayName Language display name
* @returns {string} Standard language code
*/
convertDisplayNameToLanguageCode(displayName) {
const displayNameToCode = {
'Chinese': 'zh-CN',
'Chinese,Yue': 'zh-TW',
'English': 'en-US',
'Japanese': 'ja-JP',
'Korean': 'ko-KR',
'French': 'fr-FR',
'German': 'de-DE',
'Spanish': 'es-ES',
'Portuguese': 'pt-BR',
'Italian': 'it-IT',
'Arabic': 'ar-SA',
'Russian': 'ru-RU',
'Turkish': 'tr-TR',
'Dutch': 'nl-NL',
'Ukrainian': 'uk-UA',
'Vietnamese': 'vi-VN',
'Indonesian': 'id-ID',
'Thai': 'th-TH',
'Polish': 'pl-PL',
'Romanian': 'ro-RO',
'Greek': 'el-GR',
'Czech': 'cs-CZ',
'Finnish': 'fi-FI',
'Hindi': 'hi-IN',
};
return displayNameToCode[displayName] || displayName;
}
updateModelSelect(models) {
const modelSelect = $('#minimax_tts_model');
const currentValue = modelSelect.val();
// Clear existing options
modelSelect.empty();
// Add all models
models.forEach(model => {
const option = $('<option></option>');
option.val(model.id);
option.text(model.name);
modelSelect.append(option);
});
// Restore previous selection if it still exists
if (currentValue && models.find(m => m.id === currentValue)) {
modelSelect.val(currentValue);
}
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length === 0) {
console.info('Using default MiniMax TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = { ...this.defaultSettings };
// Flatten the settings fields with default/min/max definitions so the actual values are used
this.settings = Object.fromEntries(
Object.entries(this.defaultSettings).map(([key, value]) => {
if (value && typeof value === 'object' && 'default' in value) {
return [key, value.default];
}
return [key, value];
}),
);
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
console.warn(`Invalid setting passed to MiniMax TTS Provider: ${key}`);
}
}
// Ensure custom configuration arrays exist
if (!this.settings.customModels) this.settings.customModels = [];
if (!this.settings.customVoices) this.settings.customVoices = [];
// # Migrate settings
// Pitch value changed from float to int. If it's a float, let's try to extrapolate it to the new range
if (!Number.isInteger(this.settings.pitch)) {
const oldPitch = parseFloat(this.settings.pitch);
if (!isNaN(oldPitch)) {
// map old [0.5..1.0] to [-12..0], and [1.0..2.0] to [0..12] (old default was 1.0, new default is 0)
const newPitch = (oldPitch < 1.0) ? (oldPitch - 1.0) * 24 : (oldPitch - 1.0) * 12;
this.settings.pitch = Math.max(-12, Math.min(12, Math.round(newPitch)));
console.info(`MiniMax TTS: Migrated pitch from ${oldPitch} to ${this.settings.pitch}`);
} else {
this.settings.pitch = 0;
console.info(`MiniMax TTS: Migration reset pitch to default ${this.settings.pitch}`);
}
}
$('#minimax_tts_api_host').val(this.settings.apiHost || 'https://api.minimax.io');
$('#minimax_tts_model').val(this.settings.model);
$('#minimax_tts_speed').val(this.settings.speed);
$('#minimax_tts_volume').val(this.settings.volume);
$('#minimax_tts_pitch').val(this.settings.pitch);
$('#minimax_tts_format').val(this.settings.format);
$('#minimax_tts_custom_voice_id').val(this.settings.customVoiceId);
$('#minimax_connect').on('click', () => {
try {
this.onConnectClick();
} catch (error) {
console.error('MiniMax TTS: Error in connect click handler:', error);
toastr.error(`Connection failed: ${error.message}`);
}
});
$('#minimax_refresh').on('click', () => {
try {
this.onRefreshClick();
} catch (error) {
console.error('MiniMax TTS: Error in refresh click handler:', error);
toastr.error(`Refresh failed: ${error.message}`);
}
});
$('#minimax_tts_api_host').on('change', this.onSettingsChange.bind(this));
$('#minimax_tts_speed').on('input', this.onSettingsChange.bind(this));
$('#minimax_tts_volume').on('input', this.onSettingsChange.bind(this));
$('#minimax_tts_pitch').on('input', this.onSettingsChange.bind(this));
$('#minimax_tts_model').on('change', this.onSettingsChange.bind(this));
$('#minimax_tts_format').on('change', this.onSettingsChange.bind(this));
$('#minimax_tts_custom_voice_id').on('input', this.onSettingsChange.bind(this));
// Custom model and voice event listeners
$('#minimax_add_custom_model').on('click', () => {
try {
this.addCustomModel();
} catch (error) {
console.error('MiniMax TTS: Error adding custom model:', error);
toastr.error(`Failed to add custom model: ${error.message}`);
}
});
$('#minimax_add_custom_voice').on('click', () => {
try {
this.addCustomVoice();
} catch (error) {
console.error('MiniMax TTS: Error adding custom voice:', error);
toastr.error(`Failed to add custom voice: ${error.message}`);
}
});
// Keyboard event listeners
const ENTER_KEY = 13;
$('#minimax_custom_model_id, #minimax_custom_model_name').on('keypress', (e) => {
if (e.which === ENTER_KEY) {
try {
this.addCustomModel();
} catch (error) {
console.error('MiniMax TTS: Error adding custom model via keyboard:', error);
toastr.error(`Failed to add custom model: ${error.message}`);
}
}
});
$('#minimax_custom_voice_name, #minimax_custom_voice_id').on('keypress', (e) => {
if (e.which === ENTER_KEY) {
try {
this.addCustomVoice();
} catch (error) {
console.error('MiniMax TTS: Error adding custom voice via keyboard:', error);
toastr.error(`Failed to add custom voice: ${error.message}`);
}
}
});
$('#minimax_tts_speed_output').text(this.settings.speed.toFixed(1));
$('#minimax_tts_volume_output').text(this.settings.volume.toFixed(1));
$('#minimax_tts_pitch_output').text(this.settings.pitch);
// Initialize custom configuration display
this.updateCustomModelsDisplay();
this.updateCustomVoicesDisplay();
// Update model selector to include custom models
this.updateModelSelect(this.getAllModels());
// Initialize voice map for character voice assignment
try {
await initVoiceMap();
} catch (error) {
console.debug('MiniMax: Voice map initialization failed, but continuing');
}
$('#api_key_minimax').toggleClass('success', !!secret_state[SECRET_KEYS.MINIMAX]);
$('#minimax_group_id').toggleClass('success', !!secret_state[SECRET_KEYS.MINIMAX_GROUP_ID]);
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.on(event, this.handler);
});
// Only check ready status when API credentials are available
if (secret_state[SECRET_KEYS.MINIMAX] && secret_state[SECRET_KEYS.MINIMAX_GROUP_ID]) {
try {
await this.checkReady();
console.debug('MiniMax TTS: Settings loaded and ready');
} catch (error) {
console.debug('MiniMax TTS: Settings loaded, but not ready:', error);
}
} else {
console.debug('MiniMax TTS: Settings loaded, waiting for API credentials');
}
}
// Perform a simple readiness check
async checkReady() {
if (!secret_state[SECRET_KEYS.MINIMAX] || !secret_state[SECRET_KEYS.MINIMAX_GROUP_ID]) {
const error = new Error('API Key and Group ID are required');
console.error('MiniMax TTS checkReady error:', error.message);
throw error;
}
// Try to fetch available models and voices, but don't block connection on failure
try {
await this.updateModelsAndVoices();
} catch (error) {
console.warn('MiniMax TTS: Failed to fetch models/voices during ready check, will use all available:', error);
// Even if API call fails, set all available values to ensure basic functionality
this.availableModels = this.getAllModels();
this.availableVoices = this.getAllVoices();
}
// Ensure at least voices are available
if (!this.availableVoices || this.availableVoices.length === 0) {
this.availableVoices = this.getAllVoices();
}
}
async onRefreshClick() {
try {
await this.updateModelsAndVoices();
await initVoiceMap(); // Update voice map after refresh
toastr.success('MiniMax TTS: Models and voices refreshed successfully');
} catch (error) {
toastr.error(`MiniMax TTS: Failed to refresh - ${error.message}`);
}
}
async onConnectClick() {
try {
await this.checkReady();
await initVoiceMap(); // Update voice map after connection
toastr.success('MiniMax TTS: Connected successfully');
saveTtsProviderSettings();
} catch (error) {
toastr.error(`MiniMax TTS: ${error.message}`);
}
}
async getVoice(voiceName) {
if (!voiceName) {
const error = new Error('TTS Voice name not provided');
console.error('MiniMax TTS getVoice error:', error.message);
throw error;
}
// If no available voices, try to fetch them
if (!this.availableVoices || this.availableVoices.length === 0) {
this.availableVoices = await this.fetchTtsVoiceObjects();
}
// Ensure at least voices are available
if (!this.availableVoices || this.availableVoices.length === 0) {
this.availableVoices = this.getAllVoices();
}
const voice = this.availableVoices.find(voice =>
voice.voice_id === voiceName || voice.name === voiceName,
);
if (!voice) {
const error = new Error(`TTS Voice not found: ${voiceName}`);
console.error('MiniMax TTS getVoice error:', error.message);
throw error;
}
return voice;
}
async generateTts(text, voiceId) {
// If voiceId is 'customVoice', use the custom voice ID from settings
if (voiceId === 'customVoice') {
const customVoiceId = this.settings.customVoiceId;
if (!customVoiceId || customVoiceId.trim() === '') {
const error = new Error('Please enter custom voice ID in settings first');
console.error('MiniMax TTS generateTts error:', error.message);
throw error;
}
voiceId = customVoiceId.trim();
}
// Get the voice object to determine language
let language = null;
try {
const voice = await this.getVoice(voiceId);
if (voice && voice.lang) {
language = this.mapLanguageToMiniMaxFormat(voice.lang);
console.debug(`MiniMax TTS: Using voice language ${voice.lang}, API language: ${language}`);
}
} catch (error) {
console.debug('MiniMax TTS: Could not determine voice language, using default');
}
return await this.fetchTtsGeneration(text, voiceId, language);
}
async fetchTtsVoiceObjects() {
try {
if (!secret_state[SECRET_KEYS.MINIMAX] || !secret_state[SECRET_KEYS.MINIMAX_GROUP_ID]) {
console.warn('MiniMax TTS: API Key and Group ID required for fetching voices');
console.warn('Using all available voices (default + custom). Please check your API credentials');
return this.getAllVoices();
}
// MiniMax API doesn't provide a voices listing endpoint
// Using all available voices (default + custom)
console.info('MiniMax TTS: Using all available voices (default + custom)');
return this.getAllVoices();
} catch (error) {
console.error('Error fetching MiniMax voices:', error);
console.warn('Using all available voices (default + custom). Please check your API credentials');
return this.getAllVoices();
}
}
async fetchTtsModels() {
// MiniMax API doesn't provide a models listing endpoint
// Using all available models (default + custom)
console.info('MiniMax TTS: Using all available models (default + custom)');
this.availableModels = this.getAllModels();
return this.getAllModels();
}
async updateModelsAndVoices() {
try {
// Get models list
this.availableModels = await this.fetchTtsModels();
console.info(`MiniMax TTS: Loaded ${this.availableModels.length} models`);
// Get voices list (now fetched from API)
this.availableVoices = await this.fetchTtsVoiceObjects();
console.info(`MiniMax TTS: Loaded ${this.availableVoices.length} voices`);
// Update model dropdown
this.updateModelSelect(this.availableModels);
return {
models: this.availableModels,
voices: this.availableVoices,
};
} catch (error) {
console.error('MiniMax TTS: Failed to update models and voices:', error);
// Set all available values to ensure basic functionality
this.availableModels = this.getAllModels();
this.availableVoices = this.getAllVoices();
throw error;
}
}
// Get correct MIME type
getAudioMimeType(format) {
const mimeTypes = {
'mp3': 'audio/mpeg',
'wav': 'audio/wav',
'pcm': 'audio/pcm',
'flac': 'audio/flac',
'aac': 'audio/aac',
};
return mimeTypes[format] || 'audio/mpeg';
}
async fetchTtsGeneration(inputText, voiceId, language = null) {
console.info(`Generating new MiniMax TTS for voice_id ${voiceId}`);
if (!secret_state[SECRET_KEYS.MINIMAX] || !secret_state[SECRET_KEYS.MINIMAX_GROUP_ID]) {
const error = new Error('API Key and Group ID are required');
console.error('MiniMax TTS fetchTtsGeneration error:', error.message);
throw error;
}
/** @param {number} number @param {number} lower @param {number} upper @returns {number} */
const clamp = (number, lower, upper) => Math.min(Math.max(number, lower), upper);
const requestBody = {
text: inputText,
voiceId: voiceId,
apiHost: this.settings.apiHost,
model: this.settings.model || this.defaultSettings.model,
speed: clamp(Number(this.settings.speed) || this.defaultSettings.speed.default, this.defaultSettings.speed.min, this.defaultSettings.speed.max),
volume: clamp(Number(this.settings.volume) || this.defaultSettings.volume.default, this.defaultSettings.volume.min, this.defaultSettings.volume.max),
pitch: clamp(Math.round(Number(this.settings.pitch)) || this.defaultSettings.pitch.default, this.defaultSettings.pitch.min, this.defaultSettings.pitch.max),
audioSampleRate: Number(this.settings.audioSampleRate) || this.defaultSettings.audioSampleRate,
bitrate: Number(this.settings.bitrate) || this.defaultSettings.bitrate,
format: this.settings.format || this.defaultSettings.format,
language: language,
};
console.debug('MiniMax TTS Request:', {
body: { ...requestBody, voiceId: '[REDACTED]' },
});
try {
const response = await fetch('/api/minimax/generate-voice', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify(requestBody),
});
if (!response.ok) {
let errorMessage = `HTTP ${response.status}`;
try {
// Try to parse JSON error response from backend
const errorData = await response.json();
console.error('MiniMax TTS backend error:', errorData);
errorMessage = errorData.error || errorMessage;
} catch (jsonError) {
// If not JSON, try to read text
try {
const errorText = await response.text();
console.error('MiniMax TTS backend error (Text):', errorText);
errorMessage = errorText || errorMessage;
} catch (textError) {
console.error('MiniMax TTS: Failed to read error response:', textError);
}
}
toastr.error(`${errorMessage}`, 'MiniMax TTS Generation Failed');
const error = new Error(errorMessage);
console.error('MiniMax TTS fetchTtsGeneration error:', error.message);
throw error;
}
// Backend handles all the complex processing and returns audio data directly
console.debug('MiniMax TTS: Audio response received from backend');
return response;
} catch (error) {
console.error('Error in MiniMax TTS generation:', error);
throw error;
}
}
/**
* Map language codes to MiniMax API supported language format
* @param {string} lang Language code or display name
* @returns {string} MiniMax API language format
*/
mapLanguageToMiniMaxFormat(lang) {
// Convert display name to language code if needed
const languageCode = this.convertDisplayNameToLanguageCode(lang);
// Then map language codes to MiniMax API format
const languageMap = {
'zh-CN': 'zh_CN',
'zh-TW': 'zh_TW',
'en-US': 'en_US',
'en-GB': 'en_GB',
'en-AU': 'en_AU',
'en-IN': 'en_IN',
'ja-JP': 'ja_JP',
'ko-KR': 'ko_KR',
'fr-FR': 'fr_FR',
'de-DE': 'de_DE',
'es-ES': 'es_ES',
'pt-BR': 'pt_BR',
'it-IT': 'it_IT',
'ar-SA': 'ar_SA',
'ru-RU': 'ru_RU',
'tr-TR': 'tr_TR',
'nl-NL': 'nl_NL',
'uk-UA': 'uk_UA',
'vi-VN': 'vi_VN',
'id-ID': 'id_ID',
'th-TH': 'th_TH',
'pl-PL': 'pl_PL',
'ro-RO': 'ro_RO',
'el-GR': 'el_GR',
'cs-CZ': 'cs_CZ',
'fi-FI': 'fi_FI',
'hi-IN': 'hi_IN',
};
// Return mapped language or default to auto
return languageMap[languageCode] || 'auto';
}
/**
* Preview TTS for a given voice ID.
* @param {string} voiceId Voice ID
*/
async previewTtsVoice(voiceId) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
try {
const voice = await this.getVoice(voiceId);
// Get preview text based on voice language, defaulting to en-US
const previewLang = voice.lang || 'en-US';
const text = getPreviewString(previewLang);
// Map the language to MiniMax API format for the request
const apiLang = this.mapLanguageToMiniMaxFormat(previewLang);
console.debug(`MiniMax TTS: Using preview language ${previewLang}, API language: ${apiLang}`);
const response = await this.fetchTtsGeneration(text, voiceId, apiLang);
if (!response.ok) {
const errorText = await response.text();
const error = new Error(`HTTP ${response.status}: ${errorText}`);
console.error('MiniMax TTS previewTtsVoice error:', error.message);
throw error;
}
const audio = await response.blob();
console.debug(`MiniMax TTS: Audio blob size: ${audio.size}, type: ${audio.type}`);
// Use the same method as other TTS providers - convert to base64 data URL
const srcUrl = await getBase64Async(audio);
console.debug('MiniMax TTS: Base64 data URL created');
// Clean up previous event listener to prevent memory leaks
this.audioElement.onended = null;
this.audioElement.onerror = null;
this.audioElement.src = srcUrl;
this.audioElement.volume = Math.min(this.settings.volume || 1.0, 1.0); // HTML audio element max is 1.0
// Add error handler for audio element
this.audioElement.onerror = (e) => {
console.error('MiniMax TTS: Audio element error:', e);
console.error('MiniMax TTS: Audio element error details:', {
error: this.audioElement.error,
networkState: this.audioElement.networkState,
readyState: this.audioElement.readyState,
src: this.audioElement.src,
});
toastr.error('Audio playback failed. The audio format may not be supported by your browser.');
};
try {
await this.audioElement.play();
console.debug('MiniMax TTS: Audio playback started successfully');
} catch (playError) {
console.error('MiniMax TTS: Play error:', playError);
throw new Error(`Audio playback failed: ${playError.message}`);
}
this.audioElement.onended = () => {
this.audioElement.onended = null;
this.audioElement.onerror = null;
};
} catch (error) {
console.error('MiniMax TTS Preview Error:', error);
toastr.error(`Could not generate preview: ${error.message}`);
}
}
}

View File

@@ -0,0 +1,215 @@
import { getRequestHeaders } from '../../../script.js';
import { POPUP_TYPE, callGenericPopup } from '../../popup.js';
import { splitRecursive } from '../../utils.js';
import { getPreviewString, saveTtsProviderSettings } from './index.js';
import { initVoiceMap } from './index.js';
export { NovelTtsProvider };
class NovelTtsProvider {
//########//
// Config //
//########//
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
defaultSettings = {
voiceMap: {},
customVoices: [],
};
/**
* Perform any text processing before passing to TTS engine.
* @param {string} text Input text
* @returns {string} Processed text
*/
processText(text) {
// Novel reads tilde as a word. Replace with full stop
text = text.replace(/~/g, '.');
// Novel reads asterisk as a word. Remove it
text = text.replace(/\*/g, '');
return text;
}
get settingsHtml() {
let html = `
<div class="novel_tts_hints">
<div>Use NovelAI's TTS engine.</div>
<div>
The default Voice IDs are only examples. Add custom voices and Novel will create a new random voice for it.
Feel free to try different options!
</div>
<i>Hint: Save an API key in the NovelAI API settings to use it here.</i>
</div>
<label for="tts-novel-custom-voices-add">Custom Voices</label>
<div class="tts_custom_voices">
<select id="tts-novel-custom-voices-select"><select>
<i id="tts-novel-custom-voices-add" class="tts-button fa-solid fa-plus fa-xl success" title="Add"></i>
<i id="tts-novel-custom-voices-delete" class="tts-button fa-solid fa-xmark fa-xl failure" title="Delete"></i>
</div>
`;
return html;
}
// Add a new Novel custom voice to provider
async addCustomVoice() {
const voiceName = await callGenericPopup('Custom Voice name:', POPUP_TYPE.INPUT);
this.settings.customVoices.push(voiceName);
this.populateCustomVoices();
initVoiceMap(); // Update TTS extension voiceMap
saveTtsProviderSettings();
}
// Delete selected custom voice from provider
deleteCustomVoice() {
const selected = $('#tts-novel-custom-voices-select').find(':selected').val();
const voiceIndex = this.settings.customVoices.indexOf(selected);
if (voiceIndex !== -1) {
this.settings.customVoices.splice(voiceIndex, 1);
}
this.populateCustomVoices();
initVoiceMap(); // Update TTS extension voiceMap
saveTtsProviderSettings();
}
// Create the UI dropdown list of voices in provider
populateCustomVoices() {
let voiceSelect = $('#tts-novel-custom-voices-select');
voiceSelect.empty();
this.settings.customVoices.forEach(voice => {
voiceSelect.append(`<option>${voice}</option>`);
});
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
$('#tts-novel-custom-voices-add').on('click', () => (this.addCustomVoice()));
$('#tts-novel-custom-voices-delete').on('click', () => (this.deleteCustomVoice()));
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
this.populateCustomVoices();
await this.checkReady();
console.debug('NovelTTS: Settings loaded');
}
// Perform a simple readiness check by trying to fetch voiceIds
// Doesnt really do much for Novel, not seeing a good way to test this at the moment.
async checkReady() {
await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
return;
}
//#################//
// TTS Interfaces //
//#################//
async getVoice(voiceName) {
if (!voiceName) {
throw 'TTS Voice name not provided';
}
return { name: voiceName, voice_id: voiceName, lang: 'en-US', preview_url: false };
}
/**
* Generate TTS audio for the given text using the specified voice.
* @param {string} text Text to generate
* @param {string} voiceId Voice ID
* @returns {AsyncGenerator<Response>} Audio response generator
*/
generateTts(text, voiceId) {
return this.fetchTtsGeneration(text, voiceId);
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
let voices = [
{ name: 'Ligeia', voice_id: 'Ligeia', lang: 'en-US', preview_url: false },
{ name: 'Aini', voice_id: 'Aini', lang: 'en-US', preview_url: false },
{ name: 'Orea', voice_id: 'Orea', lang: 'en-US', preview_url: false },
{ name: 'Claea', voice_id: 'Claea', lang: 'en-US', preview_url: false },
{ name: 'Lim', voice_id: 'Lim', lang: 'en-US', preview_url: false },
{ name: 'Aurae', voice_id: 'Aurae', lang: 'en-US', preview_url: false },
{ name: 'Naia', voice_id: 'Naia', lang: 'en-US', preview_url: false },
{ name: 'Aulon', voice_id: 'Aulon', lang: 'en-US', preview_url: false },
{ name: 'Elei', voice_id: 'Elei', lang: 'en-US', preview_url: false },
{ name: 'Ogma', voice_id: 'Ogma', lang: 'en-US', preview_url: false },
{ name: 'Raid', voice_id: 'Raid', lang: 'en-US', preview_url: false },
{ name: 'Pega', voice_id: 'Pega', lang: 'en-US', preview_url: false },
{ name: 'Lam', voice_id: 'Lam', lang: 'en-US', preview_url: false },
];
// Add in custom voices to the map
let addVoices = this.settings.customVoices.map(voice =>
({ name: voice, voice_id: voice, lang: 'en-US', preview_url: false }),
);
voices = voices.concat(addVoices);
return voices;
}
async previewTtsVoice(id) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const text = getPreviewString('en-US');
for await (const response of this.generateTts(text, id)) {
const audio = await response.blob();
const url = URL.createObjectURL(audio);
await new Promise(resolve => {
const audioElement = new Audio();
audioElement.src = url;
audioElement.play();
audioElement.onended = () => resolve();
});
URL.revokeObjectURL(url);
}
}
async* fetchTtsGeneration(inputText, voiceId) {
const MAX_LENGTH = 1000;
console.info(`Generating new TTS for voice_id ${voiceId}`);
const chunks = splitRecursive(inputText, MAX_LENGTH);
for (const chunk of chunks) {
const response = await fetch('/api/novelai/generate-voice',
{
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
'text': chunk,
'voice': voiceId,
}),
},
);
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
yield response;
}
}
}

View File

@@ -0,0 +1,181 @@
import { event_types, eventSource, getRequestHeaders } from '../../../script.js';
import { SECRET_KEYS, secret_state } from '../../secrets.js';
import { getPreviewString, saveTtsProviderSettings } from './index.js';
export { OpenAICompatibleTtsProvider };
class OpenAICompatibleTtsProvider {
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
defaultSettings = {
voiceMap: {},
model: 'tts-1',
speed: 1,
available_voices: ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'],
provider_endpoint: 'http://127.0.0.1:8000/v1/audio/speech',
};
get settingsHtml() {
let html = `
<label for="openai_compatible_tts_endpoint">Provider Endpoint:</label>
<div class="flex-container alignItemsCenter">
<div class="flex1">
<input id="openai_compatible_tts_endpoint" type="text" class="text_pole" maxlength="500" value="${this.defaultSettings.provider_endpoint}"/>
</div>
<div id="openai_compatible_tts_key" class="menu_button menu_button_icon manage-api-keys" data-key="api_key_custom_openai_tts">
<i class="fa-solid fa-key"></i>
<span>API Key</span>
</div>
</div>
<label for="openai_compatible_model">Model:</label>
<input id="openai_compatible_model" type="text" class="text_pole" maxlength="500" value="${this.defaultSettings.model}"/>
<label for="openai_compatible_tts_voices">Available Voices (comma separated):</label>
<input id="openai_compatible_tts_voices" type="text" class="text_pole" value="${this.defaultSettings.available_voices.join()}"/>
<label for="openai_compatible_tts_speed">Speed: <span id="openai_compatible_tts_speed_output"></span></label>
<input type="range" id="openai_compatible_tts_speed" value="1" min="0.25" max="4" step="0.05">`;
return html;
}
constructor() {
this.handler = async function (/** @type {string} */ key) {
if (key !== SECRET_KEYS.CUSTOM_OPENAI_TTS) return;
$('#openai_compatible_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.CUSTOM_OPENAI_TTS]);
await this.onRefreshClick();
}.bind(this);
}
dispose() {
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.removeListener(event, this.handler);
});
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
$('#openai_compatible_tts_endpoint').val(this.settings.provider_endpoint);
$('#openai_compatible_tts_endpoint').on('input', () => { this.onSettingsChange(); });
$('#openai_compatible_model').val(this.defaultSettings.model);
$('#openai_compatible_model').on('input', () => { this.onSettingsChange(); });
$('#openai_compatible_tts_voices').val(this.settings.available_voices.join());
$('#openai_compatible_tts_voices').on('input', () => { this.onSettingsChange(); });
$('#openai_compatible_tts_speed').val(this.settings.speed);
$('#openai_compatible_tts_speed').on('input', () => {
this.onSettingsChange();
});
$('#openai_compatible_tts_speed_output').text(this.settings.speed);
$('#openai_compatible_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.CUSTOM_OPENAI_TTS]);
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
eventSource.on(event, this.handler);
});
await this.checkReady();
console.debug('OpenAI Compatible TTS: Settings loaded');
}
onSettingsChange() {
// Update dynamically
this.settings.provider_endpoint = String($('#openai_compatible_tts_endpoint').val());
this.settings.model = String($('#openai_compatible_model').val());
this.settings.available_voices = String($('#openai_compatible_tts_voices').val()).split(',');
this.settings.speed = Number($('#openai_compatible_tts_speed').val());
$('#openai_compatible_tts_speed_output').text(this.settings.speed);
saveTtsProviderSettings();
}
async checkReady() {
this.voices = await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
return;
}
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
oaicVoice => oaicVoice.name == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
async fetchTtsVoiceObjects() {
return this.settings.available_voices.map(v => {
return { name: v, voice_id: v, lang: 'en-US' };
});
}
async previewTtsVoice(voiceId) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const text = getPreviewString('en-US');
const response = await this.fetchTtsGeneration(text, voiceId);
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const audio = await response.blob();
const url = URL.createObjectURL(audio);
this.audioElement.src = url;
this.audioElement.play();
this.audioElement.onended = () => URL.revokeObjectURL(url);
}
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
const response = await fetch('/api/openai/custom/generate-voice', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
provider_endpoint: this.settings.provider_endpoint,
model: this.settings.model,
input: inputText,
voice: voiceId,
response_format: 'mp3',
speed: this.settings.speed,
}),
});
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
}

View File

@@ -0,0 +1,253 @@
import { getRequestHeaders, substituteParams } from '../../../script.js';
import { saveTtsProviderSettings, sanitizeId } from './index.js';
export { OpenAITtsProvider };
class OpenAITtsProvider {
static voices = [
{ name: 'Alloy', voice_id: 'alloy', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/alloy.wav' },
{ name: 'Ash', voice_id: 'ash', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/ash.wav' },
{ name: 'Coral', voice_id: 'coral', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/coral.wav' },
{ name: 'Echo', voice_id: 'echo', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/echo.wav' },
{ name: 'Fable', voice_id: 'fable', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/fable.wav' },
{ name: 'Onyx', voice_id: 'onyx', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/onyx.wav' },
{ name: 'Nova', voice_id: 'nova', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/nova.wav' },
{ name: 'Sage', voice_id: 'sage', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/sage.wav' },
{ name: 'Shimmer', voice_id: 'shimmer', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/shimmer.wav' },
];
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
defaultSettings = {
voiceMap: {},
customVoices: [],
model: 'tts-1',
speed: 1,
characterInstructions: {},
};
get settingsHtml() {
let html = `
<div>Use OpenAI's TTS engine.</div>
<small>Hint: Save an API key in the OpenAI API settings to use it here.</small>
<div>
<label for="openai-tts-model">Model:</label>
<select id="openai-tts-model">
<optgroup label="Latest">
<option value="tts-1">tts-1</option>
<option value="tts-1-hd">tts-1-hd</option>
<option value="gpt-4o-mini-tts">gpt-4o-mini-tts</option>
</optgroup>
<optgroup label="Snapshots">
<option value="tts-1-1106">tts-1-1106</option>
<option value="tts-1-hd-1106">tts-1-hd-1106</option>
</optgroup>
<select>
</div>
<div>
<label for="openai-tts-speed">Speed: <span id="openai-tts-speed-output"></span></label>
<input type="range" id="openai-tts-speed" value="1" min="0.25" max="4" step="0.05">
</div>`;
return html;
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
$('#openai-tts-model').val(this.settings.model);
$('#openai-tts-model').on('change', () => {
this.onSettingsChange();
});
$('#openai-tts-speed').val(this.settings.speed);
$('#openai-tts-speed').on('input', () => {
this.onSettingsChange();
});
$('#openai-tts-speed-output').text(this.settings.speed);
await this.checkReady();
// Initialize UI state based on current model (gpt-4o-mini-tts or other)
this.updateInstructionsUI();
// Look for voice map changes
this.setupVoiceMapObserver();
console.debug('OpenAI TTS: Settings loaded');
}
setupVoiceMapObserver() {
if (this.voiceMapObserver) {
this.voiceMapObserver.disconnect();
this.voiceMapObserver = null;
}
const targetNode = document.getElementById('tts_voicemap_block');
if (!targetNode) return;
const observer = new MutationObserver(() => {
if (this.settings.model === 'gpt-4o-mini-tts') {
this.populateCharacterInstructions();
}
});
observer.observe(targetNode, { childList: true, subtree: true });
this.voiceMapObserver = observer;
}
onSettingsChange() {
// Update dynamically
this.settings.model = String($('#openai-tts-model').find(':selected').val());
this.settings.speed = Number($('#openai-tts-speed').val());
$('#openai-tts-speed-output').text(this.settings.speed);
this.updateInstructionsUI();
saveTtsProviderSettings();
}
updateInstructionsUI() {
if (this.settings.model === 'gpt-4o-mini-tts') {
this.createInstructionsContainer();
$('#openai-instructions-container').show();
this.populateCharacterInstructions();
} else {
$('#openai-instructions-container').hide();
this.voiceMapObserver?.disconnect();
this.voiceMapObserver = null;
}
}
createInstructionsContainer() {
if ($('#openai-instructions-container').length === 0) {
const containerHtml = `
<div id="openai-instructions-container" style="display: none;">
<span>Voice Instructions (GPT-4o Mini TTS)</span><br>
<small>Customize how each character speaks</small>
<div id="openai-character-instructions"></div>
</div>
`;
$('#openai-tts-speed').parent().after(containerHtml);
}
}
populateCharacterInstructions() {
const currentCharacters = $('.tts_voicemap_block_char span').map((i, el) => $(el).text()).get();
$('#openai-character-instructions').empty();
for (const char of currentCharacters) {
if (char === 'SillyTavern System' || char === '[Default Voice]') continue;
const sanitizedName = sanitizeId(char);
const savedInstructions = this.settings.characterInstructions?.[char] || '';
const instructionBlock = document.createElement('div');
const label = document.createElement('label');
const textArea = document.createElement('textarea');
instructionBlock.appendChild(label);
instructionBlock.appendChild(textArea);
instructionBlock.className = 'character-instructions';
label.setAttribute('for', `openai_char_${sanitizedName}`);
label.innerText = `${char}:`;
textArea.id = `openai_char_${sanitizedName}`;
textArea.placeholder = 'e.g., "Speak cheerfully and energetically"';
textArea.className = 'textarea_compact autoSetHeight';
textArea.value = savedInstructions;
textArea.addEventListener('input', () => {
this.saveCharacterInstructions(char, textArea.value);
});
$('#openai-character-instructions').append(instructionBlock);
}
}
saveCharacterInstructions(characterName, instructions) {
if (!this.settings.characterInstructions) {
this.settings.characterInstructions = {};
}
this.settings.characterInstructions[characterName] = instructions;
saveTtsProviderSettings();
}
async checkReady() {
await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
return;
}
async getVoice(voiceName) {
if (!voiceName) {
throw 'TTS Voice name not provided';
}
const voice = OpenAITtsProvider.voices.find(voice => voice.voice_id === voiceName || voice.name === voiceName);
if (!voice) {
throw `TTS Voice not found: ${voiceName}`;
}
return voice;
}
async generateTts(text, voiceId, characterName = null) {
const response = await this.fetchTtsGeneration(text, voiceId, characterName);
return response;
}
async fetchTtsVoiceObjects() {
return OpenAITtsProvider.voices;
}
async previewTtsVoice(_) {
return;
}
async fetchTtsGeneration(inputText, voiceId, characterName = null) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
const requestBody = {
'text': inputText,
'voice': voiceId,
'model': this.settings.model,
'speed': this.settings.speed,
};
if (this.settings.model === 'gpt-4o-mini-tts' && characterName) {
const instructions = this.settings.characterInstructions?.[characterName];
if (instructions && instructions.trim()) {
requestBody.instructions = substituteParams(instructions);
}
}
const response = await fetch('/api/openai/generate-voice', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify(requestBody),
});
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
}

View File

@@ -0,0 +1,151 @@
import { getRequestHeaders } from '../../../script.js';
import { splitRecursive } from '../../utils.js';
import { getPreviewString, saveTtsProviderSettings } from './index.js';
export class PollinationsTtsProvider {
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
defaultSettings = {
// TODO: Make this configurable
model: 'openai-audio',
voiceMap: {},
};
get settingsHtml() {
return '';
}
onSettingsChange() {
this.voices = [];
saveTtsProviderSettings();
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
try {
await this.checkReady();
console.debug('Pollinations TTS: Settings loaded');
} catch {
console.debug('Pollinations TTS: Settings loaded, but not ready');
}
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
await this.checkReady();
}
//#################//
// TTS Interfaces //
//#################//
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
voice => voice.name == voiceName || voice.voice_id == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
/**
* Generate TTS audio for the given text using the specified voice.
* @param {string} text Text to generate
* @param {string} voiceId Voice ID
* @returns {AsyncGenerator<Response>} Audio response generator
*/
generateTts(text, voiceId) {
return this.fetchTtsGeneration(text, voiceId);
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
const response = await fetch('/api/speech/pollinations/voices', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({ model: this.settings.model }),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
const responseJson = await response.json();
return responseJson
.sort()
.map(x => ({ name: x, voice_id: x, preview_url: false, lang: 'en-US' }));
}
/**
* Preview TTS for a given voice ID.
* @param {string} id Voice ID
*/
async previewTtsVoice(id) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const voice = await this.getVoice(id);
const text = getPreviewString(voice.lang);
for await (const response of this.generateTts(text, id)) {
const audio = await response.blob();
const url = URL.createObjectURL(audio);
await new Promise(resolve => {
const audioElement = new Audio();
audioElement.src = url;
audioElement.play();
audioElement.onended = () => resolve();
});
URL.revokeObjectURL(url);
}
}
async* fetchTtsGeneration(text, voiceId) {
const MAX_LENGTH = 1000;
console.info(`Generating new TTS for voice_id ${voiceId}`);
const chunks = splitRecursive(text, MAX_LENGTH);
for (const chunk of chunks) {
const response = await fetch('/api/speech/pollinations/generate', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
model: this.settings.model,
text: 'Say exactly this and nothing else:' + '\n' + chunk,
voice: voiceId,
}),
});
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
yield response;
}
}
}

View File

@@ -0,0 +1,81 @@
# Provider Requirements.
Because I don't know how, or if you can, and/or maybe I am just too lazy to implement interfaces in JS, here's the requirements of a provider that the extension needs to operate.
### class YourTtsProvider
#### Required
Exported for use in extension index.js, and added to providers list in index.js
1. generateTts(text, voiceId)
2. fetchTtsVoiceObjects()
3. onRefreshClick()
4. checkReady()
5. loadSettings(settingsObject)
6. settings field
7. settingsHtml field
#### Optional
1. previewTtsVoice()
2. separator field
3. processText(text)
4. dispose()
# Requirement Descriptions
### generateTts(text, voiceId)
Must return `audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']`
Must take text to be rendered and the voiceId to identify the voice to be used
### fetchTtsVoiceObjects()
Required.
Used by the TTS extension to get a list of voice objects from the provider.
Must return an list of voice objects representing the available voices.
1. name: a friendly user facing name to assign to characters. Shows in dropdown list next to user.
2. voice_id: the provider specific id of the voice used in fetchTtsGeneration() call
3. preview_url: a URL to a local audio file that will be used to sample voices
4. lang: OPTIONAL language string
### getVoice(voiceName)
Required.
Must return a single voice object matching the provided voiceName. The voice object must have the following at least:
1. name: a friendly user facing name to assign to characters. Shows in dropdown list next to user.
2. voice_id: the provider specific id of the voice used in fetchTtsGeneration() call
3. preview_url: a URL to a local audio file that will be used to sample voices
4. lang: OPTIONAL language indicator
### onRefreshClick()
Required.
Users click this button to reconnect/reinit the selected provider.
Responds to the user clicking the refresh button, which is intended to re-initialize the Provider into a working state, like retrying connections or checking if everything is loaded.
### checkReady()
Required.
Return without error to let TTS extension know that the provider is ready.
Return an error to block the main TTS extension for initializing the provider and UI. The error will be put in the TTS extension UI directly.
### loadSettings(settingsObject)
Required.
Handle the input settings from the TTS extension on provider load.
Put code in here to load your provider settings.
### settings field
Required, used for storing any provider state that needs to be saved.
Anything stored in this field is automatically persisted under extension_settings[providerName] by the main extension in `saveTtsProviderSettings()`, as well as loaded when the provider is selected in `loadTtsProvider(provider)`.
TTS extension doesn't expect any specific contents.
### settingsHtml field
Required, injected into the TTS extension UI. Besides adding it, not relied on by TTS extension directly.
### previewTtsVoice()
Optional.
Function to handle playing previews of voice samples if no direct preview_url is available in fetchTtsVoiceObjects() response
### separator field
Optional.
Used when narrate quoted text is enabled.
Defines the string of characters used to introduce separation between between the groups of extracted quoted text sent to the provider. The provider will use this to introduce pauses by default using `...`
### processText(text)
Optional.
A function applied to the input text before passing it to the TTS generator. Can be async.
### dispose()
Optional.
Function to handle cleanup of provider resources when the provider is switched.

View File

@@ -0,0 +1,344 @@
import { getPreviewString, saveTtsProviderSettings } from './index.js';
export { SBVits2TtsProvider };
class SBVits2TtsProvider {
//########//
// Config //
//########//
settings;
ready = false;
voices = [];
separator = '. ';
audioElement = document.createElement('audio');
/**
* Perform any text processing before passing to TTS engine.
* @param {string} text Input text
* @returns {string} Processed text
*/
processText(text) {
// backup for auto_split
text = text.replace(/\n+/g, '<br>');
return text;
}
languageLabels = {
'Chinese': 'ZH',
'English': 'EN',
'Japanese': 'JP',
};
langKey2LangCode = {
'ZH': 'zh-CN',
'EN': 'en-US',
'JP': 'ja-JP',
};
defaultSettings = {
provider_endpoint: 'http://localhost:5000',
sdp_ratio: 0.2,
noise: 0.6,
noisew: 0.8,
length: 1,
language: 'JP',
auto_split: true,
split_interval: 0.5,
assist_text: '',
assist_text_weight: 1,
style: 'Neutral',
style_weight: 1,
reference_audio_path: '',
};
get settingsHtml() {
let html = `
<label for="sbvits_api_language">Language</label>
<select id="sbvits_api_language">`;
for (let language in this.languageLabels) {
if (this.languageLabels[language] == this.settings?.language) {
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
continue;
}
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
}
html += `
</select>
<label">SBVits2 Settings:</label><br/>
<label for="sbvits_tts_endpoint">Provider Endpoint:</label>
<input id="sbvits_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
<span>Use <a target="_blank" href="https://github.com/litagin02/Style-Bert-VITS2">Style-Bert-VITS2 API Server</a>.</span><br/>
<label for="sbvits_sdp_ratio">sdp_ratio: <span id="sbvits_sdp_ratio_output">${this.defaultSettings.sdp_ratio}</span></label>
<input id="sbvits_sdp_ratio" type="range" value="${this.defaultSettings.sdp_ratio}" min="0.0" max="1" step="0.01" />
<label for="sbvits_noise">noise: <span id="sbvits_noise_output">${this.defaultSettings.noise}</span></label>
<input id="sbvits_noise" type="range" value="${this.defaultSettings.noise}" min="0.1" max="2" step="0.01" />
<label for="sbvits_noisew">noisew: <span id="sbvits_noisew_output">${this.defaultSettings.noisew}</span></label>
<input id="sbvits_noisew" type="range" value="${this.defaultSettings.noisew}" min="0.1" max="2" step="0.01" />
<label for="sbvits_length">length: <span id="sbvits_length_output">${this.defaultSettings.length}</span></label>
<input id="sbvits_length" type="range" value="${this.defaultSettings.length}" min="0.0" max="5" step="0.01" />
<label for="sbvits_auto_split" class="checkbox_label">
<input id="sbvits_auto_split" type="checkbox" ${this.defaultSettings.auto_split ? 'checked' : ''} />
Enable Text Splitting
</label>
<label for="sbvits_split_interval">split_interval: <span id="sbvits_split_interval_output">${this.defaultSettings.split_interval}</span></label>
<input id="sbvits_split_interval" type="range" value="${this.defaultSettings.split_interval}" min="0.0" max="5" step="0.01" />
<label for="sbvits_assist_text">assist_text:</label>
<input id="sbvits_assist_text" type="text" class="text_pole" maxlength="512" value="${this.defaultSettings.assist_text}"/>
<label for="sbvits_assist_text_weight">assist_text_weight: <span id="sbvits_assist_text_weight_output">${this.defaultSettings.assist_text_weight}</span></label>
<input id="sbvits_assist_text_weight" type="range" value="${this.defaultSettings.assist_text_weight}" min="0.0" max="1" step="0.01" />
<label for="sbvits_style_weight">style_weight: <span id="sbvits_style_weight_output">${this.defaultSettings.style_weight}</span></label>
<input id="sbvits_style_weight" type="range" value="${this.defaultSettings.style_weight}" min="0.0" max="20" step="0.01" />
<label for="sbvits_reference_audio_path">reference_audio_path:</label>
<input id="sbvits_reference_audio_path" type="text" class="text_pole" maxlength="512" value="${this.defaultSettings.reference_audio_path}"/>
`;
return html;
}
onSettingsChange() {
// Used when provider settings are updated from UI
this.settings.provider_endpoint = $('#sbvits_tts_endpoint').val();
this.settings.language = $('#sbvits_api_language').val();
this.settings.assist_text = $('#sbvits_assist_text').val();
this.settings.reference_audio_path = $('#sbvits_reference_audio_path').val();
// Update the default TTS settings based on input fields
this.settings.sdp_ratio = $('#sbvits_sdp_ratio').val();
this.settings.noise = $('#sbvits_noise').val();
this.settings.noisew = $('#sbvits_noisew').val();
this.settings.length = $('#sbvits_length').val();
this.settings.auto_split = $('#sbvits_auto_split').is(':checked');
this.settings.split_interval = $('#sbvits_split_interval').val();
this.settings.assist_text_weight = $('#sbvits_assist_text_weight').val();
this.settings.style_weight = $('#sbvits_style_weight').val();
// Update the UI to reflect changes
$('#sbvits_sdp_ratio_output').text(this.settings.sdp_ratio);
$('#sbvits_noise_output').text(this.settings.noise);
$('#sbvits_noisew_output').text(this.settings.noisew);
$('#sbvits_length_output').text(this.settings.length);
$('#sbvits_split_interval_output').text(this.settings.split_interval);
$('#sbvits_assist_text_weight_output').text(this.settings.assist_text_weight);
$('#sbvits_style_weight_output').text(this.settings.style_weight);
saveTtsProviderSettings();
this.changeTTSSettings();
}
async loadSettings(settings) {
// Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
console.debug(`Ignoring non-user-configurable setting: ${key}`);
}
}
// Set initial values from the settings
$('#sbvits_tts_endpoint').val(this.settings.provider_endpoint);
$('#sbvits_api_language').val(this.settings.language);
$('#sbvits_assist_text').val(this.settings.assist_text);
$('#sbvits_reference_audio_path').val(this.settings.reference_audio_path);
$('#sbvits_sdp_ratio').val(this.settings.sdp_ratio);
$('#sbvits_noise').val(this.settings.noise);
$('#sbvits_noisew').val(this.settings.noisew);
$('#sbvits_length').val(this.settings.length);
$('#sbvits_auto_split').prop('checked', this.settings.auto_split);
$('#sbvits_split_interval').val(this.settings.split_interval);
$('#sbvits_assist_text_weight').val(this.settings.assist_text_weight);
$('#sbvits_style_weight').val(this.settings.style_weight);
// Update the UI to reflect changes
$('#sbvits_sdp_ratio_output').text(this.settings.sdp_ratio);
$('#sbvits_noise_output').text(this.settings.noise);
$('#sbvits_noisew_output').text(this.settings.noisew);
$('#sbvits_length_output').text(this.settings.length);
$('#sbvits_split_interval_output').text(this.settings.split_interval);
$('#sbvits_assist_text_weight_output').text(this.settings.assist_text_weight);
$('#sbvits_style_weight_output').text(this.settings.style_weight);
// Register input/change event listeners to update settings on user interaction
$('#sbvits_tts_endpoint').on('input', () => { this.onSettingsChange(); });
$('#sbvits_api_language').on('change', () => { this.onSettingsChange(); });
$('#sbvits_assist_text').on('input', () => { this.onSettingsChange(); });
$('#sbvits_reference_audio_path').on('input', () => { this.onSettingsChange(); });
$('#sbvits_sdp_ratio').on('change', () => { this.onSettingsChange(); });
$('#sbvits_noise').on('change', () => { this.onSettingsChange(); });
$('#sbvits_noisew').on('change', () => { this.onSettingsChange(); });
$('#sbvits_length').on('change', () => { this.onSettingsChange(); });
$('#sbvits_auto_split').on('change', () => { this.onSettingsChange(); });
$('#sbvits_split_interval').on('change', () => { this.onSettingsChange(); });
$('#sbvits_assist_text_weight').on('change', () => { this.onSettingsChange(); });
$('#sbvits_style_weight').on('change', () => { this.onSettingsChange(); });
await this.checkReady();
console.info('SBVits2: Settings loaded');
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
}
async onRefreshClick() {
return;
}
//#################//
// TTS Interfaces //
//#################//
/**
* Get a voice from the TTS provider.
* @param {string} voiceName Voice name to get
* @returns {Promise<Object>} Voice object
*/
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
v => v.name == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
const response = await fetch(`${this.settings.provider_endpoint}/models/info`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
}
const data = await response.json();
const voices = Object.keys(data).flatMap(key => {
const config = data[key];
const spk2id = config.spk2id;
const style2id = config.style2id;
return Object.entries(spk2id).flatMap(([speaker, speaker_id]) => {
return Object.entries(style2id).map(([style, styleId]) => {
return {
name: `${speaker} (${style})`,
voice_id: `${key}-${speaker_id}-${style}`,
preview_url: false,
};
});
});
});
this.voices = voices; // Assign to the class property
return voices; // Also return this list
}
// Each time a parameter is changed, we change the configuration
async changeTTSSettings() {
}
/**
* Fetch TTS generation from the API.
* @param {string} inputText Text to generate TTS for
* @param {string} voiceId Voice ID to use (model_id-speaker_id-style)
* @returns {Promise<Response>} Fetch response
*/
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
const [model_id, speaker_id, ...rest] = voiceId.split('-');
const style = rest.join('-');
const params = new URLSearchParams();
// restore for auto_split
inputText = inputText.replaceAll('<br>', '\n');
params.append('text', inputText);
params.append('model_id', model_id);
params.append('speaker_id', speaker_id);
params.append('sdp_ratio', this.settings.sdp_ratio);
params.append('noise', this.settings.noise);
params.append('noisew', this.settings.noisew);
params.append('length', this.settings.length);
params.append('language', this.settings.language);
params.append('auto_split', this.settings.auto_split);
params.append('split_interval', this.settings.split_interval);
if (this.settings.assist_text) {
params.append('assist_text', this.settings.assist_text);
params.append('assist_text_weight', this.settings.assist_text_weight);
}
params.append('style', style);
params.append('style_weight', this.settings.style_weight);
if (this.settings.reference_audio_path) {
params.append('reference_audio_path', this.settings.reference_audio_path);
}
const url = `${this.settings.provider_endpoint}/voice?${params.toString()}`;
const response = await fetch(
url,
{
method: 'POST',
headers: {
},
},
);
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
/**
* Preview TTS for a given voice ID.
* @param {string} id Voice ID
*/
async previewTtsVoice(id) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const lang_code = this.langKey2LangCode[this.settings.lang] ?? 'ja-JP';
const text = getPreviewString(lang_code);
const response = await this.fetchTtsGeneration(text, id);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
const audio = await response.blob();
const url = URL.createObjectURL(audio);
this.audioElement.src = url;
this.audioElement.play();
}
// Interface not used
async fetchTtsFromHistory(history_item_id) {
return Promise.resolve(history_item_id);
}
}

View File

@@ -0,0 +1,95 @@
<div id="tts_settings">
<div class="inline-drawer">
<div class="inline-drawer-toggle inline-drawer-header">
<b>TTS</b>
<div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
</div>
<div class="inline-drawer-content">
<div id="tts_status">
</div>
<span data-i18n="Select TTS Provider">Select TTS Provider</span>
<br>
<div class="tts_block">
<select id="tts_provider" class="flex1">
</select>
<input id="tts_refresh" data-i18n="[value]tts_refresh" class="menu_button" type="submit" value="Reload" />
</div>
<div>
<label class="checkbox_label" for="tts_enabled">
<input type="checkbox" id="tts_enabled" name="tts_enabled">
<small data-i18n="tts_enabled">Enabled</small>
</label>
<label class="checkbox_label" for="tts_narrate_user">
<input type="checkbox" id="tts_narrate_user">
<small data-i18n="Narrate user messages">Narrate user messages</small>
</label>
<label class="checkbox_label" for="tts_auto_generation">
<input type="checkbox" id="tts_auto_generation">
<small data-i18n="Auto Generation">Auto Generation</small>
</label>
<label class="checkbox_label" for="tts_periodic_auto_generation" data-i18n="[title]Requires auto generation to be enabled." title="Requires auto generation to be enabled.">
<input type="checkbox" id="tts_periodic_auto_generation">
<small data-i18n="Narrate by paragraphs (when streaming)">Narrate by paragraphs (when streaming)</small>
</label>
<label class="checkbox_label" for="tts_narrate_by_paragraphs">
<input type="checkbox" id="tts_narrate_by_paragraphs">
<small data-i18n="Narrate by paragraphs (when not streaming)">Narrate by paragraphs (when not streaming)</small>
</label>
<label class="checkbox_label" for="tts_narrate_quoted">
<input type="checkbox" id="tts_narrate_quoted">
<small data-i18n="Only narrate quotes">Only narrate "quotes"</small>
</label>
<label class="checkbox_label" for="tts_narrate_dialogues">
<input type="checkbox" id="tts_narrate_dialogues">
<small data-i18n="Ignore text, even quotes, inside asterisk">Ignore *text, even "quotes", inside asterisks*</small>
</label>
<label class="checkbox_label" for="tts_narrate_translated_only">
<input type="checkbox" id="tts_narrate_translated_only">
<small data-i18n="Narrate only the translated text">Narrate only the translated text</small>
</label>
<label class="checkbox_label" for="tts_skip_codeblocks">
<input type="checkbox" id="tts_skip_codeblocks">
<small data-i18n="Skip codeblocks">Skip codeblocks</small>
</label>
<label class="checkbox_label" for="tts_skip_tags">
<input type="checkbox" id="tts_skip_tags">
<small data-i18n="Skip tagged blocks">Skip &lt;tagged&gt; blocks</small>
</label>
<label class="checkbox_label" for="tts_pass_asterisks">
<input type="checkbox" id="tts_pass_asterisks">
<small data-i18n="Pass Asterisks to TTS Engine">Pass Asterisks to TTS Engine</small>
</label>
<label class="checkbox_label" for="tts_multi_voice_enabled"
data-i18n="[title]Works best when: Pass Asterisks to TTS Engine is enabled, and both Only narrate quotes and Ignore *text, even 'quotes', inside asterisks* are disabled."
title="Works best when: Pass Asterisks to TTS Engine is enabled, and both Only narrate quotes and Ignore *text, even 'quotes', inside asterisks* are disabled.">
<input type="checkbox" id="tts_multi_voice_enabled">
<small data-i18n="Different voices for quotes and text inside asterisks">
Different voices for "quotes", *text inside asterisks* and other text
</small>
</label>
</div>
<div id="playback_rate_block" class="range-block">
<hr>
<div class="range-block-title justifyLeft">
<small data-i18n="Audio Playback Speed">Audio Playback Speed</small>
</div>
<div class="range-block-range-and-counter">
<div class="range-block-range">
<input type="range" id="playback_rate" name="volume" min="0" max="3" step="0.05">
</div>
<div class="range-block-counter">
<input type="number" min="0" max="3" step="0.05" data-for="playback_rate" id="playback_rate_counter">
</div>
</div>
</div>
<div id="tts_voicemap_block">
</div>
<hr>
<form id="tts_provider_settings">
</form>
<div class="tts_buttons">
<input id="tts_voices" class="menu_button" data-i18n="[value]Available voices" type="submit" value="Available voices" />
</div>
</div>
</div>
</div>

View File

@@ -0,0 +1,176 @@
import { doExtrasFetch, getApiUrl, modules } from '../../extensions.js';
import { saveTtsProviderSettings } from './index.js';
export { SileroTtsProvider };
class SileroTtsProvider {
//########//
// Config //
//########//
settings;
ready = false;
voices = [];
separator = ' ';
defaultSettings = {
provider_endpoint: 'http://localhost:8001/tts',
voiceMap: {},
};
get settingsHtml() {
let html = `
<label for="silero_tts_endpoint">Provider Endpoint:</label>
<input id="silero_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
<span>
<span>Use <a target="_blank" href="https://github.com/SillyTavern/SillyTavern-extras">SillyTavern Extras API</a> or <a target="_blank" href="https://github.com/ouoertheo/silero-api-server">Silero TTS Server</a>.</span>
`;
return html;
}
onSettingsChange() {
// Used when provider settings are updated from UI
this.settings.provider_endpoint = $('#silero_tts_endpoint').val();
saveTtsProviderSettings();
this.refreshSession();
}
async loadSettings(settings) {
// Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
const apiCheckInterval = setInterval(() => {
// Use Extras API if TTS support is enabled
if (modules.includes('tts') || modules.includes('silero-tts')) {
const baseUrl = new URL(getApiUrl());
baseUrl.pathname = '/api/tts';
this.settings.provider_endpoint = baseUrl.toString();
$('#silero_tts_endpoint').val(this.settings.provider_endpoint);
clearInterval(apiCheckInterval);
}
}, 2000);
$('#silero_tts_endpoint').val(this.settings.provider_endpoint);
$('#silero_tts_endpoint').on('input', () => { this.onSettingsChange(); });
this.refreshSession();
await this.checkReady();
console.debug('SileroTTS: Settings loaded');
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
return;
}
async refreshSession() {
await this.initSession();
}
//#################//
// TTS Interfaces //
//#################//
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
sileroVoice => sileroVoice.name == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
const response = await doExtrasFetch(`${this.settings.provider_endpoint}/speakers`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
}
const responseJson = await response.json();
return responseJson;
}
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
const response = await doExtrasFetch(
`${this.settings.provider_endpoint}/generate`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache', // Added this line to disable caching of file so new files are always played - Rolyat 7/7/23
},
body: JSON.stringify({
'text': inputText,
'speaker': voiceId,
'session': 'sillytavern',
}),
},
);
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
async initSession() {
console.info('Silero TTS: requesting new session');
try {
const response = await doExtrasFetch(
`${this.settings.provider_endpoint}/session`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache',
},
body: JSON.stringify({
'path': 'sillytavern',
}),
},
);
if (!response.ok && response.status !== 404) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
} catch (error) {
console.info('Silero TTS: endpoint not available', error);
}
}
// Interface not used by Silero TTS
async fetchTtsFromHistory(history_item_id) {
return Promise.resolve(history_item_id);
}
}

View File

@@ -0,0 +1,199 @@
import { getPreviewString, saveTtsProviderSettings } from './index.js';
import { getBase64Async } from '../../utils.js';
import { getRequestHeaders } from '../../../script.js';
export { SpeechT5TtsProvider };
class SpeechT5TtsProvider {
//########//
// Config //
//########//
settings;
ready = false;
voices = [];
separator = ' .. ';
audioElement = document.createElement('audio');
defaultSettings = {
speakers: [],
speaker: '',
voiceMap: {},
};
get settingsHtml() {
let html = `
<label for="speecht5_tts_speaker">Speaker:</label>
<div class="flex-container">
<select id="speecht5_tts_speaker" class="text_pole flex1">
</select>
<div id="speecht5_tts_speaker_upload_button" class="menu_button" title="Upload speaker">
<i class="fa-solid fa-upload"></i>
</div>
<div id="speecht5_tts_delete_speaker_button" class="menu_button" title="Delete speaker">
<i class="fa-solid fa-trash"></i>
</div>
</div>
<input type="file" id="speecht5_tts_speaker_upload" class="displayNone">
<div><i>Loading model for the first time may take a while!</i></div>
`;
return html;
}
onSettingsChange() {
// Used when provider settings are updated from UI
this.settings.speaker = $('#speecht5_tts_speaker').val();
saveTtsProviderSettings();
}
async previewTtsVoice(voiceId) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const text = getPreviewString('en-US');
const response = await this.fetchTtsGeneration(text, voiceId);
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const audio = await response.blob();
const url = URL.createObjectURL(audio);
this.audioElement.src = url;
this.audioElement.play();
this.audioElement.onended = () => URL.revokeObjectURL(url);
}
async loadSettings(settings) {
// Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
for (const speaker of this.settings.speakers) {
$('#speecht5_tts_speaker').append($('<option>', {
value: speaker.voice_id,
text: speaker.name,
}));
}
$('#speecht5_tts_speaker').val(this.settings.speaker);
$('#speecht5_tts_speaker').on('change', this.onSettingsChange.bind(this));
$('#speecht5_tts_speaker_upload_button').on('click', () => {
$('#speecht5_tts_speaker_upload').trigger('click');
});
$('#speecht5_tts_speaker_upload').on('change', async (event) => {
const file = event.target.files[0];
if (file.size != 2048) {
toastr.error('Invalid speaker file size, expected 2048 bytes');
return;
}
const data = await getBase64Async(file);
const speaker = {
voice_id: file.name,
name: file.name,
data: data,
lang: 'en-US',
preview_url: false,
};
this.settings.speakers.push(speaker);
$('#speecht5_tts_speaker').append($('<option>', {
value: speaker.voice_id,
text: speaker.name,
}));
$('#speecht5_tts_speaker').val(speaker.name);
this.onSettingsChange();
});
$('#speecht5_tts_delete_speaker_button').on('click', () => {
const confirmDelete = confirm('Are you sure you want to delete this speaker?');
if (!confirmDelete) {
return;
}
const speaker = this.settings.speakers.find(s => s.voice_id === this.settings.speaker);
if (!speaker) {
toastr.error('Speaker not found');
return;
}
const index = this.settings.speakers.indexOf(speaker);
this.settings.speakers.splice(index, 1);
$(`#speecht5_tts_speaker option[value="${speaker.voice_id}"]`).remove();
if (this.settings.speakers.length == 0) {
console.log('No speakers left');
return;
}
$('#speecht5_tts_speaker').val(this.settings.speakers[0].voice_id);
this.onSettingsChange();
});
await this.checkReady();
console.debug('SpeechT5: Settings loaded');
}
async checkReady() {
return Promise.resolve();
}
async getVoice(voiceName) {
return this.settings.speakers.find(s => s.voice_id === voiceName);
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
async fetchTtsVoiceObjects() {
return this.settings.speakers;
}
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
const speaker = await this.getVoice(voiceId);
if (!speaker) {
toastr.error(`Speaker not found: ${voiceId}`, 'TTS Generation Failed');
throw new Error(`Speaker not found: ${voiceId}`);
}
const response = await fetch(
'/api/speech/synthesize',
{
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
'text': inputText,
'speaker': speaker.data,
'model': 'Xenova/speecht5_tts',
}),
},
);
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
async fetchTtsFromHistory(history_item_id) {
return Promise.resolve(history_item_id);
}
}

View File

@@ -0,0 +1,130 @@
@import './css/minimax-tts.css';
@import './css/openai-tts.css';
.voice_preview {
margin: 0.25rem 0.5rem;
display: flex;
justify-content: space-between;
align-items: center;
gap: 0.5rem;
}
.voice_preview .voice_name {
text-align: left;
flex: 1;
}
.voice_preview .voice_lang {
width: 4rem;
text-align: left;
}
.voice_preview .fa-play {
cursor: pointer;
}
.tts-button {
margin: 0;
outline: none;
border: none;
cursor: pointer;
transition: var(--animation-duration-2x);
opacity: 0.7;
align-items: center;
justify-content: center;
}
.tts-button:hover {
opacity: 1;
}
.tts_block {
display: flex;
align-items: baseline;
column-gap: 5px;
flex-wrap: wrap;
}
.tts_custom_voices {
display: flex;
align-items: baseline;
gap: 5px;
}
.novel_tts_hints {
font-size: calc(0.9 * var(--mainFontSize));
display: flex;
flex-direction: column;
gap: 5px;
margin-bottom: 5px;
}
.at-settings-row {
display: flex;
justify-content: space-between;
align-items: center;
width: 100%;
}
.at-settings-option {
flex: 1;
margin: 0 10px;
}
.at-endpoint-option {
flex: 1;
margin: 0 10px;
margin-right: 25px;
width: 38%;
}
.at-website-row {
display: flex;
justify-content: start;
align-items: center;
margin-top: 10px;
margin-bottom: 10px;
}
.at-website-option {
flex: 1;
margin-right: 10px;
margin-left: 10px;
}
.at-settings-separator {
margin-top: 10px;
margin-bottom: 10px;
padding: 18px;
font-weight: bold;
border-top: 1px solid #e1e1e1; /* Grey line */
border-bottom: 1px solid #e1e1e1; /* Grey line */
text-align: center;
}
.at-status-message {
flex: 1;
margin: 0 10px;
}
.at-model-endpoint-row {
display: flex;
justify-content: space-between;
align-items: center;
width: 100%;
}
.at-model-option, .endpoint-option {
flex: 1;
margin: 0 10px;
margin-left: 10px;
}
.at-endpoint-option {
width: 38%;
}
#at-status_info {
color: lightgreen;
}

View File

@@ -0,0 +1,296 @@
import { isMobile } from '../../RossAscends-mods.js';
import { getPreviewString } from './index.js';
import { saveTtsProviderSettings } from './index.js';
export { SystemTtsProvider };
import { t } from '../../i18n.js';
/**
* Chunkify
* Google Chrome Speech Synthesis Chunking Pattern
* Fixes inconsistencies with speaking long texts in speechUtterance objects
* Licensed under the MIT License
*
* Peter Woolley and Brett Zamir
* Modified by Haaris for bug fixes
*/
var speechUtteranceChunker = function (utt, settings, callback) {
settings = settings || {};
var newUtt;
var txt = (settings && settings.offset !== undefined ? utt.text.substring(settings.offset) : utt.text);
if (utt.voice && utt.voice.voiceURI === 'native') { // Not part of the spec
newUtt = utt;
newUtt.text = txt;
newUtt.addEventListener('end', function () {
if (speechUtteranceChunker.cancel) {
speechUtteranceChunker.cancel = false;
}
if (callback !== undefined) {
callback();
}
});
}
else {
var chunkLength = (settings && settings.chunkLength) || 160;
var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');
var chunkArr = txt.match(pattRegex);
if (chunkArr == null || chunkArr[0] === undefined || chunkArr[0].length <= 2) {
//call once all text has been spoken...
if (callback !== undefined) {
callback();
}
return;
}
var chunk = chunkArr[0];
newUtt = new SpeechSynthesisUtterance(chunk);
var x;
for (x in utt) {
if (Object.hasOwn(utt, x) && x !== 'text') {
newUtt[x] = utt[x];
}
}
newUtt.lang = utt.lang;
newUtt.voice = utt.voice;
newUtt.rate = utt.rate;
newUtt.pitch = utt.pitch;
newUtt.addEventListener('end', function () {
if (speechUtteranceChunker.cancel) {
speechUtteranceChunker.cancel = false;
return;
}
settings.offset = settings.offset || 0;
settings.offset += chunk.length;
speechUtteranceChunker(utt, settings, callback);
});
}
if (settings.modifier) {
settings.modifier(newUtt);
}
console.log(newUtt); //IMPORTANT!! Do not remove: Logging the object out fixes some onend firing issues.
//placing the speak invocation inside a callback fixes ordering and onend issues.
setTimeout(function () {
speechSynthesis.speak(newUtt);
}, 0);
};
class SystemTtsProvider {
//########//
// Config //
//########//
// Static constants for the simulated default voice
static BROWSER_DEFAULT_VOICE_ID = '__browser_default__';
static BROWSER_DEFAULT_VOICE_NAME = 'System Default Voice';
settings;
ready = false;
voices = [];
separator = ' ... ';
defaultSettings = {
voiceMap: {},
rate: 1,
pitch: 1,
};
get settingsHtml() {
if (!('speechSynthesis' in window)) {
return t`Your browser or operating system doesn't support speech synthesis`;
}
return '<p>' + t`Uses the voices provided by your operating system` + `</p>
<label for="system_tts_rate">` + t`Rate:` + ` <span id="system_tts_rate_output"></span></label>
<input id="system_tts_rate" type="range" value="${this.defaultSettings.rate}" min="0.1" max="2" step="0.01" />
<label for="system_tts_pitch">` + t`Pitch:` + ` <span id="system_tts_pitch_output"></span></label>
<input id="system_tts_pitch" type="range" value="${this.defaultSettings.pitch}" min="0" max="2" step="0.01" />`;
}
onSettingsChange() {
this.settings.rate = Number($('#system_tts_rate').val());
this.settings.pitch = Number($('#system_tts_pitch').val());
$('#system_tts_pitch_output').text(this.settings.pitch);
$('#system_tts_rate_output').text(this.settings.rate);
saveTtsProviderSettings();
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// iOS should only allows speech synthesis trigged by user interaction
if (isMobile()) {
let hasEnabledVoice = false;
document.addEventListener('click', () => {
if (hasEnabledVoice) {
return;
}
const utterance = new SpeechSynthesisUtterance(' . ');
utterance.volume = 0;
speechSynthesis.speak(utterance);
hasEnabledVoice = true;
});
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
$('#system_tts_rate').val(this.settings.rate || this.defaultSettings.rate);
$('#system_tts_pitch').val(this.settings.pitch || this.defaultSettings.pitch);
// Trigger updates
$('#system_tts_rate').on('input', () => { this.onSettingsChange(); });
$('#system_tts_pitch').on('input', () => { this.onSettingsChange(); });
$('#system_tts_pitch_output').text(this.settings.pitch);
$('#system_tts_rate_output').text(this.settings.rate);
console.debug('SystemTTS: Settings loaded');
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
return;
}
//#################//
// TTS Interfaces //
//#################//
fetchTtsVoiceObjects() {
if (!('speechSynthesis' in window)) {
return Promise.resolve([]);
}
return new Promise((resolve) => {
setTimeout(() => {
let voices = speechSynthesis.getVoices();
if (voices.length === 0) {
// Edge compat: Provide default when voices empty
console.warn('SystemTTS: getVoices() returned empty list. Providing browser default option.');
const defaultVoice = {
name: SystemTtsProvider.BROWSER_DEFAULT_VOICE_NAME,
voice_id: SystemTtsProvider.BROWSER_DEFAULT_VOICE_ID,
preview_url: false,
lang: navigator.language || 'en-US',
};
resolve([defaultVoice]);
} else {
const mappedVoices = voices
.sort((a, b) => a.lang.localeCompare(b.lang) || a.name.localeCompare(b.name))
.map(x => ({ name: x.name, voice_id: x.voiceURI, preview_url: false, lang: x.lang }));
resolve(mappedVoices);
}
}, 50);
});
}
previewTtsVoice(voiceId) {
if (!('speechSynthesis' in window)) {
throw new Error('Speech synthesis API is not supported');
}
let voice = null;
if (voiceId !== SystemTtsProvider.BROWSER_DEFAULT_VOICE_ID) {
const voices = speechSynthesis.getVoices();
voice = voices.find(x => x.voiceURI === voiceId);
if (!voice && voices.length > 0) {
console.warn(`SystemTTS Preview: Voice ID "${voiceId}" not found among available voices. Using browser default.`);
} else if (!voice && voices.length === 0) {
console.warn('SystemTTS Preview: Voice list is empty. Using browser default.');
}
} else {
console.log('SystemTTS Preview: Using browser default voice as requested.');
}
speechSynthesis.cancel();
const langForPreview = voice ? voice.lang : (navigator.language || 'en-US');
const text = getPreviewString(langForPreview);
const utterance = new SpeechSynthesisUtterance(text);
if (voice) {
utterance.voice = voice;
}
utterance.rate = this.settings.rate || 1;
utterance.pitch = this.settings.pitch || 1;
utterance.onerror = (event) => {
console.error(`SystemTTS Preview Error: ${event.error}`, event);
};
speechSynthesis.speak(utterance);
}
async getVoice(voiceName) {
if (!('speechSynthesis' in window)) {
return { voice_id: null, name: 'API Not Supported' };
}
if (voiceName === SystemTtsProvider.BROWSER_DEFAULT_VOICE_NAME) {
return {
voice_id: SystemTtsProvider.BROWSER_DEFAULT_VOICE_ID,
name: SystemTtsProvider.BROWSER_DEFAULT_VOICE_NAME,
};
}
const voices = speechSynthesis.getVoices();
if (voices.length === 0) {
console.warn('SystemTTS: Empty voice list, using default fallback');
return {
voice_id: SystemTtsProvider.BROWSER_DEFAULT_VOICE_ID,
name: SystemTtsProvider.BROWSER_DEFAULT_VOICE_NAME,
};
}
const match = voices.find(x => x.name == voiceName);
if (!match) {
throw new Error(`SystemTTS getVoice: TTS Voice name "${voiceName}" not found`);
}
return { voice_id: match.voiceURI, name: match.name };
}
async generateTts(text, voiceId) {
if (!('speechSynthesis' in window)) {
throw 'Speech synthesis API is not supported';
}
const silence = await fetch('/sounds/silence.mp3');
return new Promise((resolve, reject) => {
const voices = speechSynthesis.getVoices();
const voice = voices.find(x => x.voiceURI === voiceId);
const utterance = new SpeechSynthesisUtterance(text);
utterance.voice = voice;
utterance.rate = this.settings.rate || 1;
utterance.pitch = this.settings.pitch || 1;
utterance.onend = () => resolve(silence);
utterance.onerror = () => reject();
speechUtteranceChunker(utterance, {
chunkLength: 200,
}, function () {
resolve(silence);
console.log('System TTS done');
});
});
}
}

View File

@@ -0,0 +1,566 @@
import { getPreviewString, saveTtsProviderSettings } from './index.js';
export { TtsWebuiProvider };
class TtsWebuiProvider {
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
audioContext = null;
audioWorkletNode = null;
currentVolume = 1.0; // Track current volume
defaultSettings = {
voiceMap: {},
model: 'chatterbox',
speed: 1,
volume: 1.0,
available_voices: [''],
provider_endpoint: 'http://127.0.0.1:7778/v1/audio/speech',
streaming: true,
stream_chunk_size: 100,
desired_length: 80,
max_length: 200,
halve_first_chunk: true,
exaggeration: 0.5,
cfg_weight: 0.5,
temperature: 0.8,
device: 'auto',
dtype: 'float32',
cpu_offload: false,
chunked: true,
cache_voice: false,
tokens_per_slice: 1000,
remove_milliseconds: 45,
remove_milliseconds_start: 25,
chunk_overlap_method: 'zero',
seed: -1,
};
get settingsHtml() {
let html = `
<h4 class="textAlignCenter">TTS WebUI Settings</h4>
<div class="flex gap10px marginBot10 alignItemsFlexEnd">
<div class="flex1 flexFlowColumn">
<label for="tts_webui_endpoint">Provider Endpoint:</label>
<input id="tts_webui_endpoint" type="text" class="text_pole" maxlength="500" value="${this.defaultSettings.provider_endpoint}"/>
</div>
</div>
<div class="flex gap10px marginBot10">
<div class="flex1 flexFlowColumn">
<label for="tts_webui_model">Model:</label>
<input id="tts_webui_model" type="text" class="text_pole" maxlength="500" value="${this.defaultSettings.model}"/>
</div>
<div class="flex1 flexFlowColumn">
<label for="tts_webui_voices">Available Voices (comma separated):</label>
<input id="tts_webui_voices" type="text" class="text_pole" value="${this.defaultSettings.available_voices.join()}"/>
</div>
</div>
<div class="flex gap10px marginBot10">
<div class="flex1 flexFlowColumn">
<label for="tts_webui_streaming" class="checkbox_label alignItemsCenter flexGap5">
<input id="tts_webui_streaming" type="checkbox" />
<span>Streaming</span>
</label>
</div>
<div class="flex1 flexFlowColumn">
<label for="tts_webui_volume">Volume: <span id="tts_webui_volume_output">${this.defaultSettings.volume}</span></label>
<input type="range" id="tts_webui_volume" value="${this.defaultSettings.volume}" min="0" max="2" step="0.1">
</div>
</div>
<hr>
<h4 class="textAlignCenter">Generation Settings</h4>
<div class="flex gap10px marginBot10">
<div class="flex1 flexFlowColumn">
<label for="tts_webui_exaggeration">Exaggeration: <span id="tts_webui_exaggeration_output">${this.defaultSettings.exaggeration}</span></label>
<input id="tts_webui_exaggeration" type="range" value="${this.defaultSettings.exaggeration}" min="0" max="2" step="0.1" />
</div>
<div class="flex1 flexFlowColumn">
<label for="tts_webui_cfg_weight">CFG Weight: <span id="tts_webui_cfg_weight_output">${this.defaultSettings.cfg_weight}</span></label>
<input id="tts_webui_cfg_weight" type="range" value="${this.defaultSettings.cfg_weight}" min="0" max="2" step="0.1" />
</div>
</div>
<div class="flex gap10px marginBot10">
<div class="flex1 flexFlowColumn">
<label for="tts_webui_temperature">Temperature: <span id="tts_webui_temperature_output">${this.defaultSettings.temperature}</span></label>
<input id="tts_webui_temperature" type="range" value="${this.defaultSettings.temperature}" min="0" max="2" step="0.1" />
</div>
<div class="flex1 flexFlowColumn">
<label for="tts_webui_seed">Seed (-1 for random):</label>
<input id="tts_webui_seed" type="text" class="text_pole" value="${this.defaultSettings.seed}"/>
</div>
</div>
<hr>
<h4 class="textAlignCenter">Chunking</h4>
<div class="flex gap10px marginBot10">
<div class="flex1 flexFlowColumn">
<label for="tts_webui_chunked" class="checkbox_label alignItemsCenter flexGap5">
<input id="tts_webui_chunked" type="checkbox" />
<span>Split prompt into chunks</span>
</label>
</div>
<div class="flex1 flexFlowColumn">
<label for="tts_webui_halve_first_chunk" class="checkbox_label alignItemsCenter flexGap5">
<input id="tts_webui_halve_first_chunk" type="checkbox" />
<span>Halve First Chunk</span>
</label>
</div>
</div>
<div class="flex gap10px marginBot10">
<div class="flex1 flexFlowColumn">
<label for="tts_webui_desired_length">Desired Length: <span id="tts_webui_desired_length_output">${this.defaultSettings.desired_length}</span></label>
<input id="tts_webui_desired_length" type="range" value="${this.defaultSettings.desired_length}" min="25" max="300" step="5" />
</div>
<div class="flex1 flexFlowColumn">
<label for="tts_webui_max_length">Max Length: <span id="tts_webui_max_length_output">${this.defaultSettings.max_length}</span></label>
<input id="tts_webui_max_length" type="range" value="${this.defaultSettings.max_length}" min="50" max="450" step="5" />
</div>
</div>
<hr>
<h4 class="textAlignCenter">Model</h4>
<div class="flex gap10px marginBot10">
<div class="flex1 flexFlowColumn">
<label for="tts_webui_device">Device:</label>
<select id="tts_webui_device">
<option value="auto" ${this.defaultSettings.device === 'auto' ? 'selected' : ''}>Auto</option>
<option value="cuda" ${this.defaultSettings.device === 'cuda' ? 'selected' : ''}>CUDA</option>
<option value="mps" ${this.defaultSettings.device === 'mps' ? 'selected' : ''}>MPS</option>
<option value="cpu" ${this.defaultSettings.device === 'cpu' ? 'selected' : ''}>CPU</option>
</select>
</div>
<div class="flex1 flexFlowColumn">
<label for="tts_webui_dtype">Data Type:</label>
<select id="tts_webui_dtype">
<option value="float32" ${this.defaultSettings.dtype === 'float32' ? 'selected' : ''}>Float32</option>
<option value="float16" ${this.defaultSettings.dtype === 'float16' ? 'selected' : ''}>Float16</option>
<option value="bfloat16" ${this.defaultSettings.dtype === 'bfloat16' ? 'selected' : ''}>BFloat16</option>
</select>
</div>
</div>
<div class="flex gap10px marginBot10">
<div class="flex1 flexFlowColumn">
<label for="tts_webui_cpu_offload" class="checkbox_label alignItemsCenter flexGap5">
<input id="tts_webui_cpu_offload" type="checkbox" />
<span>CPU Offload</span>
</label>
</div>
<div class="flex1">
<!-- Empty for spacing -->
</div>
</div>
<hr>
<h4 class="textAlignCenter">Streaming (Advanced Settings)</h4>
<div class="flex gap10px marginBot10">
<div class="flex1 flexFlowColumn">
<label for="tts_webui_tokens_per_slice">Tokens Per Slice: <span id="tts_webui_tokens_per_slice_output">${this.defaultSettings.tokens_per_slice}</span></label>
<input id="tts_webui_tokens_per_slice" type="range" value="${this.defaultSettings.tokens_per_slice}" min="15" max="1000" step="1" />
</div>
<div class="flex1 flexFlowColumn">
<label for="tts_webui_chunk_overlap_method">Chunk Overlap Method:</label>
<select id="tts_webui_chunk_overlap_method">
<option value="zero" ${this.defaultSettings.chunk_overlap_method === 'zero' ? 'selected' : ''}>Zero</option>
<option value="full" ${this.defaultSettings.chunk_overlap_method === 'full' ? 'selected' : ''}>Full</option>
</select>
</div>
</div>
<div class="flex gap10px marginBot10">
<div class="flex1 flexFlowColumn">
<label for="tts_webui_remove_milliseconds">Remove Milliseconds: <span id="tts_webui_remove_milliseconds_output">${this.defaultSettings.remove_milliseconds}</span></label>
<input id="tts_webui_remove_milliseconds" type="range" value="${this.defaultSettings.remove_milliseconds}" min="0" max="100" step="1" />
</div>
<div class="flex1 flexFlowColumn">
<label for="tts_webui_remove_milliseconds_start">Remove Milliseconds Start: <span id="tts_webui_remove_milliseconds_start_output">${this.defaultSettings.remove_milliseconds_start}</span></label>
<input id="tts_webui_remove_milliseconds_start" type="range" value="${this.defaultSettings.remove_milliseconds_start}" min="0" max="100" step="1" />
</div>
</div>`;
return html;
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
$('#tts_webui_endpoint').val(this.settings.provider_endpoint);
$('#tts_webui_endpoint').on('input', () => { this.onSettingsChange(); });
$('#tts_webui_model').val(this.settings.model);
$('#tts_webui_model').on('input', () => { this.onSettingsChange(); });
$('#tts_webui_voices').val(this.settings.available_voices.join());
$('#tts_webui_voices').on('input', () => { this.onSettingsChange(); });
$('#tts_webui_streaming').prop('checked', this.settings.streaming);
$('#tts_webui_streaming').on('change', () => { this.onSettingsChange(); });
$('#tts_webui_volume').val(this.settings.volume);
$('#tts_webui_volume').on('input', () => {
this.onSettingsChange();
});
$('#tts_webui_stream_chunk_size').val(this.settings.stream_chunk_size);
$('#tts_webui_stream_chunk_size').on('input', () => { this.onSettingsChange(); });
$('#tts_webui_desired_length').val(this.settings.desired_length);
$('#tts_webui_desired_length').on('input', () => { this.onSettingsChange(); });
$('#tts_webui_max_length').val(this.settings.max_length);
$('#tts_webui_max_length').on('input', () => { this.onSettingsChange(); });
$('#tts_webui_halve_first_chunk').prop('checked', this.settings.halve_first_chunk);
$('#tts_webui_halve_first_chunk').on('change', () => { this.onSettingsChange(); });
$('#tts_webui_exaggeration').val(this.settings.exaggeration);
$('#tts_webui_exaggeration').on('input', () => { this.onSettingsChange(); });
$('#tts_webui_cfg_weight').val(this.settings.cfg_weight);
$('#tts_webui_cfg_weight').on('input', () => { this.onSettingsChange(); });
$('#tts_webui_temperature').val(this.settings.temperature);
$('#tts_webui_temperature').on('input', () => { this.onSettingsChange(); });
$('#tts_webui_device').val(this.settings.device);
$('#tts_webui_device').on('change', () => { this.onSettingsChange(); });
$('#tts_webui_dtype').val(this.settings.dtype);
$('#tts_webui_dtype').on('change', () => { this.onSettingsChange(); });
$('#tts_webui_cpu_offload').prop('checked', this.settings.cpu_offload);
$('#tts_webui_cpu_offload').on('change', () => { this.onSettingsChange(); });
$('#tts_webui_chunked').prop('checked', this.settings.chunked);
$('#tts_webui_chunked').on('change', () => { this.onSettingsChange(); });
$('#tts_webui_tokens_per_slice').val(this.settings.tokens_per_slice);
$('#tts_webui_tokens_per_slice').on('input', () => { this.onSettingsChange(); });
$('#tts_webui_remove_milliseconds').val(this.settings.remove_milliseconds);
$('#tts_webui_remove_milliseconds').on('input', () => { this.onSettingsChange(); });
$('#tts_webui_remove_milliseconds_start').val(this.settings.remove_milliseconds_start);
$('#tts_webui_remove_milliseconds_start').on('input', () => { this.onSettingsChange(); });
$('#tts_webui_chunk_overlap_method').val(this.settings.chunk_overlap_method);
$('#tts_webui_chunk_overlap_method').on('change', () => { this.onSettingsChange(); });
$('#tts_webui_seed').val(this.settings.seed);
$('#tts_webui_seed').on('input', () => { this.onSettingsChange(); });
// Update output labels
$('#tts_webui_volume_output').text(this.settings.volume);
$('#tts_webui_desired_length_output').text(this.settings.desired_length);
$('#tts_webui_max_length_output').text(this.settings.max_length);
$('#tts_webui_exaggeration_output').text(this.settings.exaggeration);
$('#tts_webui_cfg_weight_output').text(this.settings.cfg_weight);
$('#tts_webui_temperature_output').text(this.settings.temperature);
$('#tts_webui_tokens_per_slice_output').text(this.settings.tokens_per_slice);
$('#tts_webui_remove_milliseconds_output').text(this.settings.remove_milliseconds);
$('#tts_webui_remove_milliseconds_start_output').text(this.settings.remove_milliseconds_start);
await this.checkReady();
console.debug('OpenAI Compatible TTS: Settings loaded');
}
onSettingsChange() {
// Update dynamically
this.settings.provider_endpoint = String($('#tts_webui_endpoint').val());
this.settings.model = String($('#tts_webui_model').val());
this.settings.available_voices = String($('#tts_webui_voices').val()).split(',');
this.settings.volume = Number($('#tts_webui_volume').val());
this.settings.streaming = $('#tts_webui_streaming').is(':checked');
this.settings.stream_chunk_size = Number($('#tts_webui_stream_chunk_size').val());
this.settings.desired_length = Number($('#tts_webui_desired_length').val());
this.settings.max_length = Number($('#tts_webui_max_length').val());
this.settings.halve_first_chunk = $('#tts_webui_halve_first_chunk').is(':checked');
this.settings.exaggeration = Number($('#tts_webui_exaggeration').val());
this.settings.cfg_weight = Number($('#tts_webui_cfg_weight').val());
this.settings.temperature = Number($('#tts_webui_temperature').val());
this.settings.device = String($('#tts_webui_device').val());
this.settings.dtype = String($('#tts_webui_dtype').val());
this.settings.cpu_offload = $('#tts_webui_cpu_offload').is(':checked');
this.settings.chunked = $('#tts_webui_chunked').is(':checked');
this.settings.tokens_per_slice = Number($('#tts_webui_tokens_per_slice').val());
this.settings.remove_milliseconds = Number($('#tts_webui_remove_milliseconds').val());
this.settings.remove_milliseconds_start = Number($('#tts_webui_remove_milliseconds_start').val());
this.settings.chunk_overlap_method = String($('#tts_webui_chunk_overlap_method').val());
this.settings.seed = parseInt($('#tts_webui_seed').val()) || -1;
// Apply volume change immediately
this.setVolume(this.settings.volume);
// Update output labels
$('#tts_webui_volume_output').text(this.settings.volume);
$('#tts_webui_desired_length_output').text(this.settings.desired_length);
$('#tts_webui_max_length_output').text(this.settings.max_length);
$('#tts_webui_exaggeration_output').text(this.settings.exaggeration);
$('#tts_webui_cfg_weight_output').text(this.settings.cfg_weight);
$('#tts_webui_temperature_output').text(this.settings.temperature);
$('#tts_webui_tokens_per_slice_output').text(this.settings.tokens_per_slice);
$('#tts_webui_remove_milliseconds_output').text(this.settings.remove_milliseconds);
$('#tts_webui_remove_milliseconds_start_output').text(this.settings.remove_milliseconds_start);
saveTtsProviderSettings();
}
async checkReady() {
await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
await this.fetchTtsVoiceObjects();
console.info('TTS voices refreshed');
}
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
oaicVoice => oaicVoice.name == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
if (this.settings.streaming) {
// Stream audio in real-time
await this.processStreamingAudio(response);
// Return empty string since audio is already played via AudioWorklet
return '';
}
return response;
}
async fetchTtsVoiceObjects() {
// Try to fetch voices from the provider endpoint
try {
const voicesEndpoint = this.settings.provider_endpoint.replace('/speech', '/voices/' + this.settings.model);
const response = await fetch(voicesEndpoint);
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const responseJson = await response.json();
console.info('Discovered voices from provider:', responseJson);
this.voices = responseJson.voices.map(({ value, label }) => ({
name: label,
voice_id: value,
lang: 'en-US',
}));
return this.voices;
} catch (error) {
console.warn('Voice discovery failed, using configured voices:', error);
}
// Fallback to configured voices
this.voices = this.settings.available_voices.map(name => ({
name, voice_id: name, lang: 'en-US',
}));
return this.voices;
}
async initAudioWorklet(wavSampleRate) {
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: wavSampleRate });
// Load the PCM processor from separate file
const processorUrl = './scripts/extensions/tts/lib/pcm-processor.js';
await this.audioContext.audioWorklet.addModule(processorUrl);
this.audioWorkletNode = new AudioWorkletNode(this.audioContext, 'pcm-processor');
this.audioWorkletNode.connect(this.audioContext.destination);
}
parseWavHeader(buffer) {
const view = new DataView(buffer);
// Sample rate is at bytes 24-27 (little endian)
const sampleRate = view.getUint32(24, true);
// Number of channels is at bytes 22-23 (little endian)
const channels = view.getUint16(22, true);
// Bits per sample is at bytes 34-35 (little endian)
const bitsPerSample = view.getUint16(34, true);
return { sampleRate, channels, bitsPerSample };
}
async processStreamingAudio(response) {
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const reader = response.body.getReader();
let headerParsed = false;
let wavInfo = null;
const processStream = async ({ done, value }) => {
if (done) {
return;
}
if (!headerParsed) {
// Parse WAV header to get sample rate
wavInfo = this.parseWavHeader(value.buffer);
console.log('WAV Info:', wavInfo);
// Initialize AudioWorklet with correct sample rate
await this.initAudioWorklet(wavInfo.sampleRate);
// Skip WAV header (first 44 bytes typically)
const pcmData = value.slice(44);
this.audioWorkletNode.port.postMessage({ pcmData });
headerParsed = true;
const next = await reader.read();
return processStream(next);
}
// Send PCM data to AudioWorklet for immediate playback
this.audioWorkletNode.port.postMessage({ pcmData: value });
const next = await reader.read();
return processStream(next);
};
const firstChunk = await reader.read();
await processStream(firstChunk);
}
async previewTtsVoice(voiceId) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const text = getPreviewString('en-US');
const response = await this.fetchTtsGeneration(text, voiceId);
if (this.settings.streaming) {
// Use shared streaming method
await this.processStreamingAudio(response);
} else {
// For non-streaming, response is a fetch Response object
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const audio = await response.blob();
const url = URL.createObjectURL(audio);
this.audioElement.src = url;
this.audioElement.play();
this.audioElement.onended = () => URL.revokeObjectURL(url);
}
}
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
const settings = this.settings;
const streaming = settings.streaming;
const chatterboxParams = [
'desired_length',
'max_length',
'halve_first_chunk',
'exaggeration',
'cfg_weight',
'temperature',
'device',
'dtype',
'cpu_offload',
'chunked',
'cache_voice',
'tokens_per_slice',
'remove_milliseconds',
'remove_milliseconds_start',
'chunk_overlap_method',
'seed',
];
const getParams = settings => Object.fromEntries(
Object.entries(settings).filter(([key]) =>
chatterboxParams.includes(key),
),
);
const requestBody = {
model: settings.model,
voice: voiceId,
input: inputText,
response_format: 'wav',
speed: settings.speed,
stream: streaming,
params: getParams(settings),
};
const headers = {
'Content-Type': 'application/json',
'Cache-Control': streaming ? 'no-cache' : undefined,
};
if (streaming) {
headers['Cache-Control'] = 'no-cache';
}
const response = await fetch(settings.provider_endpoint, {
method: 'POST',
headers,
body: JSON.stringify(requestBody),
});
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(
`HTTP ${response.status}: ${await response.text()}`,
);
}
return response;
}
setVolume(volume) {
// Clamp volume between 0.0 and 2.0 (0% to 200%)
this.currentVolume = Math.max(0, Math.min(2.0, volume));
// Set volume for regular audio element (non-streaming)
this.audioElement.volume = Math.min(this.currentVolume, 1.0); // HTML audio element max is 1.0
// Set volume for AudioWorklet (streaming)
if (this.audioWorkletNode) {
this.audioWorkletNode.port.postMessage({ volume: this.currentVolume });
}
}
}

View File

@@ -0,0 +1,404 @@
import { getPreviewString, saveTtsProviderSettings } from './index.js';
export { VITSTtsProvider };
class VITSTtsProvider {
//########//
// Config //
//########//
settings;
ready = false;
voices = [];
separator = '. ';
audioElement = document.createElement('audio');
/**
* Perform any text processing before passing to TTS engine.
* @param {string} text Input text
* @returns {string} Processed text
*/
processText(text) {
return text;
}
audioFormats = ['wav', 'ogg', 'silk', 'mp3', 'flac'];
languageLabels = {
'Auto': 'auto',
'Chinese': 'zh',
'English': 'en',
'Japanese': 'ja',
'Korean': 'ko',
};
langKey2LangCode = {
'zh': 'zh-CN',
'en': 'en-US',
'ja': 'ja-JP',
'ko': 'ko-KR',
};
modelTypes = {
VITS: 'VITS',
W2V2_VITS: 'W2V2-VITS',
BERT_VITS2: 'BERT-VITS2',
};
defaultSettings = {
provider_endpoint: 'http://localhost:23456',
format: 'wav',
lang: 'auto',
length: 1.0,
noise: 0.33,
noisew: 0.4,
segment_size: 50,
streaming: false,
dim_emotion: 0,
sdp_ratio: 0.2,
emotion: 0,
text_prompt: '',
style_text: '',
style_weight: 1,
};
get settingsHtml() {
let html = `
<label for="vits_lang">Text Language</label>
<select id="vits_lang">`;
for (let language in this.languageLabels) {
if (this.languageLabels[language] == this.settings?.lang) {
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
continue;
}
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
}
html += `
</select>
<label>VITS / W2V2-VITS / Bert-VITS2 Settings:</label><br/>
<label for="vits_endpoint">Provider Endpoint:</label>
<input id="vits_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
<span>Use <a target="_blank" href="https://github.com/Artrajz/vits-simple-api">vits-simple-api</a>.</span><br/>
<label for="vits_format">Audio format:</label>
<select id="vits_format">`;
for (let format of this.audioFormats) {
if (format == this.settings?.format) {
html += `<option value="${format}" selected="selected">${format}</option>`;
continue;
}
html += `<option value="${format}">${format}</option>`;
}
html += `
</select>
<label for="vits_length">Audio length: <span id="vits_length_output">${this.defaultSettings.length}</span></label>
<input id="vits_length" type="range" value="${this.defaultSettings.length}" min="0.0" max="5" step="0.01" />
<label for="vits_noise">Noise: <span id="vits_noise_output">${this.defaultSettings.noise}</span></label>
<input id="vits_noise" type="range" value="${this.defaultSettings.noise}" min="0.1" max="2" step="0.01" />
<label for="vits_noisew">SDP noise: <span id="vits_noisew_output">${this.defaultSettings.noisew}</span></label>
<input id="vits_noisew" type="range" value="${this.defaultSettings.noisew}" min="0.1" max="2" step="0.01" />
<label for="vits_segment_size">Segment Size: <span id="vits_segment_size_output">${this.defaultSettings.segment_size}</span></label>
<input id="vits_segment_size" type="range" value="${this.defaultSettings.segment_size}" min="0" max="1000" step="1" />
<label for="vits_streaming" class="checkbox_label">
<input id="vits_streaming" type="checkbox" />
<span>Streaming</span>
</label>
<label>W2V2-VITS Settings:</label><br/>
<label for="vits_dim_emotion">Dimensional emotion:</label>
<input id="vits_dim_emotion" type="number" class="text_pole" min="0" max="5457" step="1" value="${this.defaultSettings.dim_emotion}"/>
<label>BERT-VITS2 Settings:</label><br/>
<label for="vits_sdp_ratio">sdp_ratio: <span id="vits_sdp_ratio_output">${this.defaultSettings.sdp_ratio}</span></label>
<input id="vits_sdp_ratio" type="range" value="${this.defaultSettings.sdp_ratio}" min="0.0" max="1" step="0.01" />
<label for="vits_emotion">emotion: <span id="vits_emotion_output">${this.defaultSettings.emotion}</span></label>
<input id="vits_emotion" type="range" value="${this.defaultSettings.emotion}" min="0" max="9" step="1" />
<label for="vits_text_prompt">Text Prompt:</label>
<input id="vits_text_prompt" type="text" class="text_pole" maxlength="512" value="${this.defaultSettings.text_prompt}"/>
<label for="vits_style_text">Style text:</label>
<input id="vits_style_text" type="text" class="text_pole" maxlength="512" value="${this.defaultSettings.style_text}"/>
<label for="vits_style_weight">Style weight <span id="vits_style_weight_output">${this.defaultSettings.style_weight}</span></label>
<input id="vits_style_weight" type="range" value="${this.defaultSettings.style_weight}" min="0" max="1" step="0.01" />
`;
return html;
}
onSettingsChange() {
// Used when provider settings are updated from UI
this.settings.provider_endpoint = $('#vits_endpoint').val();
this.settings.lang = $('#vits_lang').val();
this.settings.format = $('#vits_format').val();
this.settings.dim_emotion = $('#vits_dim_emotion').val();
this.settings.text_prompt = $('#vits_text_prompt').val();
this.settings.style_text = $('#vits_style_text').val();
// Update the default TTS settings based on input fields
this.settings.length = $('#vits_length').val();
this.settings.noise = $('#vits_noise').val();
this.settings.noisew = $('#vits_noisew').val();
this.settings.segment_size = $('#vits_segment_size').val();
this.settings.streaming = $('#vits_streaming').is(':checked');
this.settings.sdp_ratio = $('#vits_sdp_ratio').val();
this.settings.emotion = $('#vits_emotion').val();
this.settings.style_weight = $('#vits_style_weight').val();
// Update the UI to reflect changes
$('#vits_length_output').text(this.settings.length);
$('#vits_noise_output').text(this.settings.noise);
$('#vits_noisew_output').text(this.settings.noisew);
$('#vits_segment_size_output').text(this.settings.segment_size);
$('#vits_sdp_ratio_output').text(this.settings.sdp_ratio);
$('#vits_emotion_output').text(this.settings.emotion);
$('#vits_style_weight_output').text(this.settings.style_weight);
saveTtsProviderSettings();
this.changeTTSSettings();
}
async loadSettings(settings) {
// Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
console.debug(`Ignoring non-user-configurable setting: ${key}`);
}
}
// Set initial values from the settings
$('#vits_endpoint').val(this.settings.provider_endpoint);
$('#vits_lang').val(this.settings.lang);
$('#vits_format').val(this.settings.format);
$('#vits_length').val(this.settings.length);
$('#vits_noise').val(this.settings.noise);
$('#vits_noisew').val(this.settings.noisew);
$('#vits_segment_size').val(this.settings.segment_size);
$('#vits_streaming').prop('checked', this.settings.streaming);
$('#vits_dim_emotion').val(this.settings.dim_emotion);
$('#vits_sdp_ratio').val(this.settings.sdp_ratio);
$('#vits_emotion').val(this.settings.emotion);
$('#vits_text_prompt').val(this.settings.text_prompt);
$('#vits_style_text').val(this.settings.style_text);
$('#vits_style_weight').val(this.settings.style_weight);
// Update the UI to reflect changes
$('#vits_length_output').text(this.settings.length);
$('#vits_noise_output').text(this.settings.noise);
$('#vits_noisew_output').text(this.settings.noisew);
$('#vits_segment_size_output').text(this.settings.segment_size);
$('#vits_sdp_ratio_output').text(this.settings.sdp_ratio);
$('#vits_emotion_output').text(this.settings.emotion);
$('#vits_style_weight_output').text(this.settings.style_weight);
// Register input/change event listeners to update settings on user interaction
$('#vits_endpoint').on('input', () => { this.onSettingsChange(); });
$('#vits_lang').on('change', () => { this.onSettingsChange(); });
$('#vits_format').on('change', () => { this.onSettingsChange(); });
$('#vits_length').on('change', () => { this.onSettingsChange(); });
$('#vits_noise').on('change', () => { this.onSettingsChange(); });
$('#vits_noisew').on('change', () => { this.onSettingsChange(); });
$('#vits_segment_size').on('change', () => { this.onSettingsChange(); });
$('#vits_streaming').on('change', () => { this.onSettingsChange(); });
$('#vits_dim_emotion').on('change', () => { this.onSettingsChange(); });
$('#vits_sdp_ratio').on('change', () => { this.onSettingsChange(); });
$('#vits_emotion').on('change', () => { this.onSettingsChange(); });
$('#vits_text_prompt').on('change', () => { this.onSettingsChange(); });
$('#vits_style_text').on('change', () => { this.onSettingsChange(); });
$('#vits_style_weight').on('change', () => { this.onSettingsChange(); });
await this.checkReady();
console.info('VITS: Settings loaded');
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
}
async onRefreshClick() {
return;
}
//#################//
// TTS Interfaces //
//#################//
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
v => v.name == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async getVoiceById(voiceId) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
v => v.voice_id == voiceId,
)[0];
if (!match) {
throw `TTS Voice id ${voiceId} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
const response = await fetch(`${this.settings.provider_endpoint}/voice/speakers`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
}
const jsonData = await response.json();
const voices = [];
const addVoices = (modelType) => {
jsonData[modelType].forEach(voice => {
voices.push({
name: `[${modelType}] ${voice.name} (${voice.lang})`,
voice_id: `${modelType}&${voice.id}`,
preview_url: false,
lang: voice.lang,
});
});
};
for (const key in this.modelTypes) {
addVoices(this.modelTypes[key]);
}
this.voices = voices; // Assign to the class property
return voices; // Also return this list
}
// Each time a parameter is changed, we change the configuration
async changeTTSSettings() {
}
/**
* Fetch TTS generation from the API.
* @param {string} inputText Text to generate TTS for
* @param {string} voiceId Voice ID to use (model_type&speaker_id))
* @returns {Promise<Response|string>} Fetch response
*/
async fetchTtsGeneration(inputText, voiceId, lang = null, forceNoStreaming = false) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
const streaming = !forceNoStreaming && this.settings.streaming;
const [model_type, speaker_id] = voiceId.split('&');
const params = new URLSearchParams();
params.append('text', inputText);
params.append('id', speaker_id);
if (streaming) {
params.append('streaming', streaming);
// Streaming response only supports MP3
}
else {
params.append('format', this.settings.format);
}
params.append('lang', lang ?? this.settings.lang);
params.append('length', this.settings.length);
params.append('noise', this.settings.noise);
params.append('noisew', this.settings.noisew);
params.append('segment_size', this.settings.segment_size);
if (model_type == this.modelTypes.W2V2_VITS) {
params.append('emotion', this.settings.dim_emotion);
}
else if (model_type == this.modelTypes.BERT_VITS2) {
params.append('sdp_ratio', this.settings.sdp_ratio);
params.append('emotion', this.settings.emotion);
if (this.settings.text_prompt) {
params.append('text_prompt', this.settings.text_prompt);
}
if (this.settings.style_text) {
params.append('style_text', this.settings.style_text);
params.append('style_weight', this.settings.style_weight);
}
}
const url = `${this.settings.provider_endpoint}/voice/${model_type.toLowerCase()}`;
if (streaming) {
return url + `?${params.toString()}`;
}
const response = await fetch(
url,
{
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
},
body: params,
},
);
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
/**
* Preview TTS for a given voice ID.
* @param {string} id Voice ID
*/
async previewTtsVoice(id) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const voice = await this.getVoiceById(id);
const lang = voice.lang.includes(this.settings.lang) ? this.settings.lang : voice.lang[0];
let lang_code = this.langKey2LangCode[lang];
const text = getPreviewString(lang_code);
const response = await this.fetchTtsGeneration(text, id, lang, true);
if (typeof response != 'string') {
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
const audio = await response.blob();
const url = URL.createObjectURL(audio);
this.audioElement.src = url;
this.audioElement.play();
}
}
// Interface not used
async fetchTtsFromHistory(history_item_id) {
return Promise.resolve(history_item_id);
}
}

View File

@@ -0,0 +1,327 @@
import { doExtrasFetch, getApiUrl, modules } from '../../extensions.js';
import { saveTtsProviderSettings } from './index.js';
export { XTTSTtsProvider };
class XTTSTtsProvider {
//########//
// Config //
//########//
settings;
ready = false;
voices = [];
separator = '. ';
/**
* Perform any text processing before passing to TTS engine.
* @param {string} text Input text
* @returns {string} Processed text
*/
processText(text) {
// Replace fancy ellipsis with "..."
text = text.replace(/…/g, '...');
// Remove quotes
text = text.replace(/["“”‘’]/g, '');
// Replace multiple "." with single "."
text = text.replace(/\.+/g, '.');
return text;
}
languageLabels = {
'Arabic': 'ar',
'Brazilian Portuguese': 'pt',
'Chinese': 'zh-cn',
'Czech': 'cs',
'Dutch': 'nl',
'English': 'en',
'French': 'fr',
'German': 'de',
'Italian': 'it',
'Polish': 'pl',
'Russian': 'ru',
'Spanish': 'es',
'Turkish': 'tr',
'Japanese': 'ja',
'Korean': 'ko',
'Hungarian': 'hu',
'Hindi': 'hi',
};
defaultSettings = {
provider_endpoint: 'http://localhost:8020',
language: 'en',
temperature: 0.75,
length_penalty: 1.0,
repetition_penalty: 5.0,
top_k: 50,
top_p: 0.85,
speed: 1,
enable_text_splitting: true,
stream_chunk_size: 100,
voiceMap: {},
streaming: false,
};
get settingsHtml() {
let html = `
<label for="xtts_api_language">Language</label>
<select id="xtts_api_language">`;
for (let language in this.languageLabels) {
if (this.languageLabels[language] == this.settings?.language) {
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
continue;
}
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
}
html += `
</select>
<label">XTTS Settings:</label><br/>
<label for="xtts_tts_endpoint">Provider Endpoint:</label>
<input id="xtts_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
<span>Use <a target="_blank" href="https://github.com/daswer123/xtts-api-server">XTTSv2 TTS Server</a>.</span>
<label for="xtts_tts_streaming" class="checkbox_label">
<input id="xtts_tts_streaming" type="checkbox" />
<span>Streaming <small>(RVC not supported)</small></span>
</label>
<label for="xtts_speed">Speed: <span id="xtts_tts_speed_output">${this.defaultSettings.speed}</span></label>
<input id="xtts_speed" type="range" value="${this.defaultSettings.speed}" min="0.5" max="2" step="0.01" />
<label for="xtts_temperature">Temperature: <span id="xtts_tts_temperature_output">${this.defaultSettings.temperature}</span></label>
<input id="xtts_temperature" type="range" value="${this.defaultSettings.temperature}" min="0.01" max="1" step="0.01" />
<label for="xtts_length_penalty">Length Penalty: <span id="xtts_length_penalty_output">${this.defaultSettings.length_penalty}</span></label>
<input id="xtts_length_penalty" type="range" value="${this.defaultSettings.length_penalty}" min="0.5" max="2" step="0.1" />
<label for="xtts_repetition_penalty">Repetition Penalty: <span id="xtts_repetition_penalty_output">${this.defaultSettings.repetition_penalty}</span></label>
<input id="xtts_repetition_penalty" type="range" value="${this.defaultSettings.repetition_penalty}" min="1" max="10" step="0.1" />
<label for="xtts_top_k">Top K: <span id="xtts_top_k_output">${this.defaultSettings.top_k}</span></label>
<input id="xtts_top_k" type="range" value="${this.defaultSettings.top_k}" min="0" max="100" step="1" />
<label for="xtts_top_p">Top P: <span id="xtts_top_p_output">${this.defaultSettings.top_p}</span></label>
<input id="xtts_top_p" type="range" value="${this.defaultSettings.top_p}" min="0" max="1" step="0.01" />
<label for="xtts_stream_chunk_size">Stream Chunk Size: <span id="xtts_stream_chunk_size_output">${this.defaultSettings.stream_chunk_size}</span></label>
<input id="xtts_stream_chunk_size" type="range" value="${this.defaultSettings.stream_chunk_size}" min="100" max="400" step="1" />
<label for="xtts_enable_text_splitting" class="checkbox_label">
<input id="xtts_enable_text_splitting" type="checkbox" ${this.defaultSettings.enable_text_splitting ? 'checked' : ''} />
Enable Text Splitting
</label>
`;
return html;
}
onSettingsChange() {
// Used when provider settings are updated from UI
this.settings.provider_endpoint = $('#xtts_tts_endpoint').val();
this.settings.language = $('#xtts_api_language').val();
// Update the default TTS settings based on input fields
this.settings.speed = $('#xtts_speed').val();
this.settings.temperature = $('#xtts_temperature').val();
this.settings.length_penalty = $('#xtts_length_penalty').val();
this.settings.repetition_penalty = $('#xtts_repetition_penalty').val();
this.settings.top_k = $('#xtts_top_k').val();
this.settings.top_p = $('#xtts_top_p').val();
this.settings.stream_chunk_size = $('#xtts_stream_chunk_size').val();
this.settings.enable_text_splitting = $('#xtts_enable_text_splitting').is(':checked');
this.settings.streaming = $('#xtts_tts_streaming').is(':checked');
// Update the UI to reflect changes
$('#xtts_tts_speed_output').text(this.settings.speed);
$('#xtts_tts_temperature_output').text(this.settings.temperature);
$('#xtts_length_penalty_output').text(this.settings.length_penalty);
$('#xtts_repetition_penalty_output').text(this.settings.repetition_penalty);
$('#xtts_top_k_output').text(this.settings.top_k);
$('#xtts_top_p_output').text(this.settings.top_p);
$('#xtts_stream_chunk_size_output').text(this.settings.stream_chunk_size);
saveTtsProviderSettings();
this.changeTTSSettings();
}
async loadSettings(settings) {
// Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
const apiCheckInterval = setInterval(() => {
// Use Extras API if TTS support is enabled
if (modules.includes('tts') || modules.includes('xtts-tts')) {
const baseUrl = new URL(getApiUrl());
baseUrl.pathname = '/api/tts';
this.settings.provider_endpoint = baseUrl.toString();
$('#xtts_tts_endpoint').val(this.settings.provider_endpoint);
clearInterval(apiCheckInterval);
}
}, 2000);
// Set initial values from the settings
$('#xtts_tts_endpoint').val(this.settings.provider_endpoint);
$('#xtts_api_language').val(this.settings.language);
$('#xtts_speed').val(this.settings.speed);
$('#xtts_temperature').val(this.settings.temperature);
$('#xtts_length_penalty').val(this.settings.length_penalty);
$('#xtts_repetition_penalty').val(this.settings.repetition_penalty);
$('#xtts_top_k').val(this.settings.top_k);
$('#xtts_top_p').val(this.settings.top_p);
$('#xtts_enable_text_splitting').prop('checked', this.settings.enable_text_splitting);
$('#xtts_stream_chunk_size').val(this.settings.stream_chunk_size);
$('#xtts_tts_streaming').prop('checked', this.settings.streaming);
// Update the UI to reflect changes
$('#xtts_tts_speed_output').text(this.settings.speed);
$('#xtts_tts_temperature_output').text(this.settings.temperature);
$('#xtts_length_penalty_output').text(this.settings.length_penalty);
$('#xtts_repetition_penalty_output').text(this.settings.repetition_penalty);
$('#xtts_top_k_output').text(this.settings.top_k);
$('#xtts_top_p_output').text(this.settings.top_p);
$('#xtts_stream_chunk_size_output').text(this.settings.stream_chunk_size);
// Register input/change event listeners to update settings on user interaction
$('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); });
$('#xtts_api_language').on('change', () => { this.onSettingsChange(); });
$('#xtts_speed').on('input', () => { this.onSettingsChange(); });
$('#xtts_temperature').on('input', () => { this.onSettingsChange(); });
$('#xtts_length_penalty').on('input', () => { this.onSettingsChange(); });
$('#xtts_repetition_penalty').on('input', () => { this.onSettingsChange(); });
$('#xtts_top_k').on('input', () => { this.onSettingsChange(); });
$('#xtts_top_p').on('input', () => { this.onSettingsChange(); });
$('#xtts_enable_text_splitting').on('change', () => { this.onSettingsChange(); });
$('#xtts_stream_chunk_size').on('input', () => { this.onSettingsChange(); });
$('#xtts_tts_streaming').on('change', () => { this.onSettingsChange(); });
await this.checkReady();
console.debug('XTTS: Settings loaded');
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
}
async onRefreshClick() {
return;
}
//#################//
// TTS Interfaces //
//#################//
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
XTTSVoice => XTTSVoice.name == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
const response = await doExtrasFetch(`${this.settings.provider_endpoint}/speakers`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
}
const responseJson = await response.json();
return responseJson;
}
// Each time a parameter is changed, we change the configuration
async changeTTSSettings() {
if (!this.settings.provider_endpoint) {
return;
}
const response = await doExtrasFetch(
`${this.settings.provider_endpoint}/set_tts_settings`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache',
},
body: JSON.stringify({
'temperature': this.settings.temperature,
'speed': this.settings.speed,
'length_penalty': this.settings.length_penalty,
'repetition_penalty': this.settings.repetition_penalty,
'top_p': this.settings.top_p,
'top_k': this.settings.top_k,
'enable_text_splitting': this.settings.enable_text_splitting,
'stream_chunk_size': this.settings.stream_chunk_size,
}),
},
);
return response;
}
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
if (this.settings.streaming) {
const params = new URLSearchParams();
params.append('text', inputText);
params.append('speaker_wav', voiceId);
params.append('language', this.settings.language);
return `${this.settings.provider_endpoint}/tts_stream/?${params.toString()}`;
}
const response = await doExtrasFetch(
`${this.settings.provider_endpoint}/tts_to_audio/`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache', // Added this line to disable caching of file so new files are always played - Rolyat 7/7/23
},
body: JSON.stringify({
'text': inputText,
'speaker_wav': voiceId,
'language': this.settings.language,
}),
},
);
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
// Interface not used by XTTS TTS
async fetchTtsFromHistory(history_item_id) {
return Promise.resolve(history_item_id);
}
}