🎨 优化扩展模块,完成ai接入和对话功能
This commit is contained in:
1077
data/st-core-scripts/scripts/extensions/tts/alltalk.js
Normal file
1077
data/st-core-scripts/scripts/extensions/tts/alltalk.js
Normal file
File diff suppressed because it is too large
Load Diff
208
data/st-core-scripts/scripts/extensions/tts/azure.js
Normal file
208
data/st-core-scripts/scripts/extensions/tts/azure.js
Normal file
@@ -0,0 +1,208 @@
|
||||
import { event_types, eventSource, getRequestHeaders } from '../../../script.js';
|
||||
import { SECRET_KEYS, secret_state } from '../../secrets.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
export { AzureTtsProvider };
|
||||
|
||||
class AzureTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
region: '',
|
||||
voiceMap: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div class="azure_tts_settings">
|
||||
<div class="flex-container alignItemsBaseline">
|
||||
<h4 for="azure_tts_key" class="flex1 margin0">
|
||||
<a href="https://portal.azure.com/" target="_blank">Azure TTS Key</a>
|
||||
</h4>
|
||||
<div id="azure_tts_key" class="menu_button menu_button_icon manage-api-keys" data-key="api_key_azure_tts">
|
||||
<i class="fa-solid fa-key"></i>
|
||||
<span>Click to set</span>
|
||||
</div>
|
||||
</div>
|
||||
<label for="azure_tts_region">Region:</label>
|
||||
<input id="azure_tts_region" type="text" class="text_pole" placeholder="e.g. westus" />
|
||||
<hr>
|
||||
</div>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
constructor() {
|
||||
this.handler = async function (/** @type {string} */ key) {
|
||||
if (key !== SECRET_KEYS.AZURE_TTS) return;
|
||||
$('#azure_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.AZURE_TTS]);
|
||||
await this.onRefreshClick();
|
||||
}.bind(this);
|
||||
}
|
||||
|
||||
dispose() {
|
||||
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
|
||||
eventSource.removeListener(event, this.handler);
|
||||
});
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update dynamically
|
||||
this.settings.region = String($('#azure_tts_region').val());
|
||||
// Reset voices
|
||||
this.voices = [];
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#azure_tts_region').val(this.settings.region).on('input', () => this.onSettingsChange());
|
||||
$('#azure_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.AZURE_TTS]);
|
||||
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
|
||||
eventSource.on(event, this.handler);
|
||||
});
|
||||
|
||||
try {
|
||||
await this.checkReady();
|
||||
console.debug('Azure: Settings loaded');
|
||||
} catch {
|
||||
console.debug('Azure: Settings loaded, but not ready');
|
||||
}
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
if (secret_state[SECRET_KEYS.AZURE_TTS]) {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
} else {
|
||||
this.voices = [];
|
||||
}
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
await this.checkReady();
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
voice => voice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
if (!secret_state[SECRET_KEYS.AZURE_TTS]) {
|
||||
console.warn('Azure TTS API Key not set');
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!this.settings.region) {
|
||||
console.warn('Azure TTS region not set');
|
||||
return [];
|
||||
}
|
||||
|
||||
const response = await fetch('/api/azure/list', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
region: this.settings.region,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
let responseJson = await response.json();
|
||||
responseJson = responseJson
|
||||
.sort((a, b) => a.Locale.localeCompare(b.Locale) || a.ShortName.localeCompare(b.ShortName))
|
||||
.map(x => ({ name: x.ShortName, voice_id: x.ShortName, preview_url: false, lang: x.Locale }));
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} id Voice ID
|
||||
*/
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const voice = await this.getVoice(id);
|
||||
const text = getPreviewString(voice.lang);
|
||||
const response = await this.fetchTtsGeneration(text, id);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(text, voiceId) {
|
||||
if (!secret_state[SECRET_KEYS.AZURE_TTS]) {
|
||||
throw new Error('Azure TTS API Key not set');
|
||||
}
|
||||
|
||||
if (!this.settings.region) {
|
||||
throw new Error('Azure TTS region not set');
|
||||
}
|
||||
|
||||
const response = await fetch('/api/azure/generate', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
text: text,
|
||||
voice: voiceId,
|
||||
region: this.settings.region,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
||||
649
data/st-core-scripts/scripts/extensions/tts/chatterbox.js
Normal file
649
data/st-core-scripts/scripts/extensions/tts/chatterbox.js
Normal file
@@ -0,0 +1,649 @@
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { ChatterboxTtsProvider };
|
||||
|
||||
class ChatterboxTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings = {};
|
||||
constructor() {
|
||||
// Initialize with default settings
|
||||
this.settings = {
|
||||
provider_endpoint: this.settings.provider_endpoint || 'http://localhost:8004',
|
||||
voice_mode: this.settings.voice_mode || 'predefined',
|
||||
predefined_voice: this.settings.predefined_voice || 'S1',
|
||||
reference_voice: this.settings.reference_voice || '',
|
||||
temperature: this.settings.temperature || 0.8,
|
||||
exaggeration: this.settings.exaggeration || 0.5,
|
||||
cfg_weight: this.settings.cfg_weight || 0.5,
|
||||
seed: this.settings.seed || -1,
|
||||
speed_factor: this.settings.speed_factor || 1.0,
|
||||
language: this.settings.language || 'en',
|
||||
split_text: this.settings.split_text || true,
|
||||
chunk_size: this.settings.chunk_size || 120,
|
||||
output_format: this.settings.output_format || 'wav',
|
||||
voiceMap: this.settings.voiceMap || {},
|
||||
};
|
||||
}
|
||||
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = '. ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
languageLabels = {
|
||||
'English': 'en',
|
||||
'Spanish': 'es',
|
||||
'French': 'fr',
|
||||
'German': 'de',
|
||||
'Italian': 'it',
|
||||
'Portuguese': 'pt',
|
||||
'Polish': 'pl',
|
||||
'Turkish': 'tr',
|
||||
'Russian': 'ru',
|
||||
'Dutch': 'nl',
|
||||
'Czech': 'cs',
|
||||
'Arabic': 'ar',
|
||||
'Chinese': 'zh-cn',
|
||||
'Japanese': 'ja',
|
||||
'Korean': 'ko',
|
||||
'Hindi': 'hi',
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `<div class="chatterbox-settings-container">
|
||||
<div class="chatterbox-settings-header">
|
||||
<h3>Chatterbox TTS Settings</h3>
|
||||
<div class="status-indicator">
|
||||
Status: <span id="chatterbox-status" class="offline">Offline</span>
|
||||
</div>
|
||||
</div>`;
|
||||
|
||||
// Server endpoint
|
||||
html += `<div class="chatterbox-setting-row">
|
||||
<label for="chatterbox-endpoint">Server Endpoint:</label>
|
||||
<input id="chatterbox-endpoint" type="text" class="text_pole" value="${this.settings.provider_endpoint}" />
|
||||
</div>`;
|
||||
|
||||
// Language selection
|
||||
html += `<div class="chatterbox-setting-row">
|
||||
<label for="chatterbox-language">Language:</label>
|
||||
<select id="chatterbox-language">`;
|
||||
for (let language in this.languageLabels) {
|
||||
html += `<option value="${this.languageLabels[language]}" ${this.languageLabels[language] === this.settings.language ? 'selected' : ''}>${language}</option>`;
|
||||
}
|
||||
html += `</select>
|
||||
</div>`;
|
||||
|
||||
// Generation parameters
|
||||
html += `<div class="chatterbox-params-section">
|
||||
<h4>Generation Parameters</h4>`;
|
||||
|
||||
// Temperature
|
||||
html += `<div class="chatterbox-setting-row">
|
||||
<label for="chatterbox-temperature">Temperature: <span id="chatterbox-temperature-value">${this.settings.temperature}</span></label>
|
||||
<input id="chatterbox-temperature" type="range" min="0" max="1" step="0.1" value="${this.settings.temperature}" />
|
||||
</div>`;
|
||||
|
||||
// Exaggeration
|
||||
html += `<div class="chatterbox-setting-row">
|
||||
<label for="chatterbox-exaggeration">Exaggeration: <span id="chatterbox-exaggeration-value">${this.settings.exaggeration}</span></label>
|
||||
<input id="chatterbox-exaggeration" type="range" min="0" max="2" step="0.1" value="${this.settings.exaggeration}" />
|
||||
</div>`;
|
||||
|
||||
// CFG Weight
|
||||
html += `<div class="chatterbox-setting-row">
|
||||
<label for="chatterbox-cfg-weight">CFG Weight: <span id="chatterbox-cfg-weight-value">${this.settings.cfg_weight}</span></label>
|
||||
<input id="chatterbox-cfg-weight" type="range" min="0" max="1" step="0.1" value="${this.settings.cfg_weight}" />
|
||||
</div>`;
|
||||
|
||||
// Speed Factor
|
||||
html += `<div class="chatterbox-setting-row">
|
||||
<label for="chatterbox-speed">Speed Factor: <span id="chatterbox-speed-value">${this.settings.speed_factor}</span></label>
|
||||
<input id="chatterbox-speed" type="range" min="0.5" max="2" step="0.1" value="${this.settings.speed_factor}" />
|
||||
</div>`;
|
||||
|
||||
// Seed
|
||||
html += `<div class="chatterbox-setting-row">
|
||||
<label for="chatterbox-seed">Seed (-1 for random):</label>
|
||||
<input id="chatterbox-seed" class="text_pole" type="number" min="-1" value="${this.settings.seed}" />
|
||||
</div>`;
|
||||
|
||||
// Text chunking
|
||||
html += `<div class="chatterbox-setting-row">
|
||||
<label class="checkbox_label">
|
||||
<input type="checkbox" id="chatterbox-split-text" ${this.settings.split_text ? 'checked' : ''} />
|
||||
Split long texts into chunks
|
||||
</label>
|
||||
</div>`;
|
||||
|
||||
// Chunk size
|
||||
html += `<div class="chatterbox-setting-row" id="chunk-size-row" ${!this.settings.split_text ? 'style="display: none;"' : ''}>
|
||||
<label for="chatterbox-chunk-size">Chunk Size:</label>
|
||||
<input id="chatterbox-chunk-size" class="text_pole" type="number" min="50" max="500" value="${this.settings.chunk_size}" />
|
||||
</div>`;
|
||||
|
||||
// Output format
|
||||
html += `<div class="chatterbox-setting-row">
|
||||
<label for="chatterbox-format">Output Format:</label>
|
||||
<select id="chatterbox-format">
|
||||
<option value="wav" ${this.settings.output_format === 'wav' ? 'selected' : ''}>WAV</option>
|
||||
<option value="opus" ${this.settings.output_format === 'opus' ? 'selected' : ''}>Opus</option>
|
||||
</select>
|
||||
</div>`;
|
||||
|
||||
html += '</div>'; // End params section
|
||||
|
||||
// Footer with links
|
||||
html += `<div class="chatterbox-footer">
|
||||
<a href="${this.settings.provider_endpoint}" target="_blank">Chatterbox Web UI</a> |
|
||||
<a href="https://github.com/devnen/Chatterbox-TTS-Server" target="_blank">Documentation</a>
|
||||
</div>`;
|
||||
|
||||
html += '</div>'; // End container
|
||||
|
||||
// Add CSS styles
|
||||
html += `<style>
|
||||
.chatterbox-settings-container {
|
||||
padding: 10px;
|
||||
}
|
||||
.chatterbox-settings-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.chatterbox-settings-header h3 {
|
||||
margin: 0;
|
||||
}
|
||||
.chatterbox-settings-container .status-indicator {
|
||||
font-weight: bold;
|
||||
}
|
||||
#chatterbox-status.ready { color: #4CAF50; }
|
||||
#chatterbox-status.offline { color: #f44336; }
|
||||
#chatterbox-status.processing { color: #2196F3; }
|
||||
.chatterbox-setting-row {
|
||||
margin-bottom: 10px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
}
|
||||
.chatterbox-setting-row label {
|
||||
flex: 0 0 150px;
|
||||
}
|
||||
.chatterbox-setting-row label.checkbox_label {
|
||||
flex-basis: auto;
|
||||
}
|
||||
.chatterbox-setting-row input[type="text"],
|
||||
.chatterbox-setting-row input[type="number"],
|
||||
.chatterbox-setting-row select {
|
||||
flex: 1;
|
||||
}
|
||||
.chatterbox-setting-row input[type="range"] {
|
||||
flex: 1;
|
||||
}
|
||||
.chatterbox-params-section {
|
||||
margin-top: 15px;
|
||||
padding-top: 15px;
|
||||
border-top: 1px solid #ccc;
|
||||
}
|
||||
.chatterbox-params-section h4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.chatterbox-footer {
|
||||
margin-top: 15px;
|
||||
padding-top: 15px;
|
||||
border-top: 1px solid #ccc;
|
||||
text-align: center;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
</style>`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
//######################//
|
||||
// Startup & Initialize //
|
||||
//######################//
|
||||
|
||||
async loadSettings(settings) {
|
||||
this.updateStatus('Offline');
|
||||
|
||||
if (Object.keys(settings).length === 0) {
|
||||
console.info('Using default Chatterbox TTS Provider settings');
|
||||
} else {
|
||||
// Populate settings with provided values
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update UI elements
|
||||
this.updateUIFromSettings();
|
||||
|
||||
console.debug('ChatterboxTTS: Settings loaded');
|
||||
|
||||
try {
|
||||
// Check if TTS provider is ready
|
||||
await this.checkReady();
|
||||
|
||||
if (this.ready) {
|
||||
// Fetch all voice types for the voice map
|
||||
await this.fetchTtsVoiceObjects();
|
||||
this.updateStatus('Ready');
|
||||
}
|
||||
|
||||
this.setupEventListeners();
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error loading Chatterbox settings:', error);
|
||||
this.updateStatus('Offline');
|
||||
}
|
||||
}
|
||||
|
||||
updateUIFromSettings() {
|
||||
$('#chatterbox-endpoint').val(this.settings.provider_endpoint);
|
||||
$('#chatterbox-language').val(this.settings.language);
|
||||
$('#chatterbox-temperature').val(this.settings.temperature);
|
||||
$('#chatterbox-temperature-value').text(this.settings.temperature);
|
||||
$('#chatterbox-exaggeration').val(this.settings.exaggeration);
|
||||
$('#chatterbox-exaggeration-value').text(this.settings.exaggeration);
|
||||
$('#chatterbox-cfg-weight').val(this.settings.cfg_weight);
|
||||
$('#chatterbox-cfg-weight-value').text(this.settings.cfg_weight);
|
||||
$('#chatterbox-speed').val(this.settings.speed_factor);
|
||||
$('#chatterbox-speed-value').text(this.settings.speed_factor);
|
||||
$('#chatterbox-seed').val(this.settings.seed);
|
||||
$('#chatterbox-split-text').prop('checked', this.settings.split_text);
|
||||
$('#chatterbox-chunk-size').val(this.settings.chunk_size);
|
||||
$('#chatterbox-format').val(this.settings.output_format);
|
||||
|
||||
// Show/hide chunk size based on split text
|
||||
if (this.settings.split_text) {
|
||||
$('#chunk-size-row').show();
|
||||
} else {
|
||||
$('#chunk-size-row').hide();
|
||||
}
|
||||
}
|
||||
|
||||
//##############################//
|
||||
// Check Server is Available //
|
||||
//##############################//
|
||||
|
||||
async checkReady() {
|
||||
try {
|
||||
const response = await fetch(`${this.settings.provider_endpoint}/api/ui/initial-data`);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP Error Response: ${response.status} ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
// Check if we got valid data
|
||||
if (data) {
|
||||
this.ready = true;
|
||||
console.log('Chatterbox TTS service is ready.');
|
||||
} else {
|
||||
this.ready = false;
|
||||
console.log('Chatterbox TTS service returned invalid data.');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error checking Chatterbox TTS service readiness:', error);
|
||||
this.ready = false;
|
||||
}
|
||||
}
|
||||
|
||||
//######################//
|
||||
// Get Available Voices //
|
||||
//######################//
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
try {
|
||||
// Always fetch predefined voices
|
||||
const predefinedResponse = await fetch(`${this.settings.provider_endpoint}/get_predefined_voices`);
|
||||
if (!predefinedResponse.ok) {
|
||||
throw new Error(`HTTP ${predefinedResponse.status}: ${predefinedResponse.statusText}`);
|
||||
}
|
||||
|
||||
const predefinedData = await predefinedResponse.json();
|
||||
|
||||
// Transform predefined voices
|
||||
const predefinedVoices = predefinedData.map(voice => ({
|
||||
name: voice.display_name,
|
||||
voice_id: voice.voice_id || voice.filename,
|
||||
preview_url: null,
|
||||
lang: voice.language || 'en',
|
||||
}));
|
||||
|
||||
// Always try to fetch reference voices
|
||||
let referenceVoices = [];
|
||||
try {
|
||||
const refResponse = await fetch(`${this.settings.provider_endpoint}/get_reference_files`);
|
||||
if (refResponse.ok) {
|
||||
const refData = await refResponse.json();
|
||||
referenceVoices = refData.map(filename => ({
|
||||
name: `[Clone] ${filename}`,
|
||||
voice_id: `ref_${filename}`,
|
||||
preview_url: null,
|
||||
lang: 'en',
|
||||
}));
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn('Failed to fetch reference voices:', error);
|
||||
}
|
||||
|
||||
// Combine all voices
|
||||
this.voices = [...predefinedVoices, ...referenceVoices];
|
||||
|
||||
console.log(`Loaded ${this.voices.length} voices (${predefinedVoices.length} predefined, ${referenceVoices.length} reference)`);
|
||||
return this.voices;
|
||||
} catch (error) {
|
||||
console.error('Error fetching Chatterbox voices:', error);
|
||||
this.voices = [];
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
// Alias for internal use
|
||||
async fetchVoices() {
|
||||
return this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
//###########################//
|
||||
// Setup Event Listeners //
|
||||
//###########################//
|
||||
|
||||
setupEventListeners() {
|
||||
// Server endpoint change
|
||||
$('#chatterbox-endpoint').on('input', () => {
|
||||
this.settings.provider_endpoint = $('#chatterbox-endpoint').val();
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
// Language
|
||||
$('#chatterbox-language').on('change', (e) => {
|
||||
this.settings.language = e.target.value;
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
// Parameter sliders
|
||||
$('#chatterbox-temperature').on('input', (e) => {
|
||||
this.settings.temperature = parseFloat(e.target.value);
|
||||
$('#chatterbox-temperature-value').text(this.settings.temperature);
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
$('#chatterbox-exaggeration').on('input', (e) => {
|
||||
this.settings.exaggeration = parseFloat(e.target.value);
|
||||
$('#chatterbox-exaggeration-value').text(this.settings.exaggeration);
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
$('#chatterbox-cfg-weight').on('input', (e) => {
|
||||
this.settings.cfg_weight = parseFloat(e.target.value);
|
||||
$('#chatterbox-cfg-weight-value').text(this.settings.cfg_weight);
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
$('#chatterbox-speed').on('input', (e) => {
|
||||
this.settings.speed_factor = parseFloat(e.target.value);
|
||||
$('#chatterbox-speed-value').text(this.settings.speed_factor);
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
// Seed
|
||||
$('#chatterbox-seed').on('change', (e) => {
|
||||
this.settings.seed = parseInt(e.target.value);
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
// Text splitting
|
||||
$('#chatterbox-split-text').on('change', (e) => {
|
||||
this.settings.split_text = e.target.checked;
|
||||
if (e.target.checked) {
|
||||
$('#chunk-size-row').show();
|
||||
} else {
|
||||
$('#chunk-size-row').hide();
|
||||
}
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
$('#chatterbox-chunk-size').on('change', (e) => {
|
||||
this.settings.chunk_size = parseInt(e.target.value);
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
// Output format
|
||||
$('#chatterbox-format').on('change', (e) => {
|
||||
this.settings.output_format = e.target.value;
|
||||
this.onSettingsChange();
|
||||
});
|
||||
}
|
||||
|
||||
//#############################//
|
||||
// Store ST interface settings //
|
||||
//#############################//
|
||||
|
||||
onSettingsChange() {
|
||||
// Save the updated settings
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
//#########################//
|
||||
// Handle Reload button //
|
||||
//#########################//
|
||||
|
||||
async onRefreshClick() {
|
||||
try {
|
||||
this.updateStatus('Processing');
|
||||
await this.checkReady();
|
||||
|
||||
if (this.ready) {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
this.updateStatus('Ready');
|
||||
} else {
|
||||
this.updateStatus('Offline');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error during refresh:', error);
|
||||
this.updateStatus('Offline');
|
||||
}
|
||||
}
|
||||
|
||||
//##################//
|
||||
// Preview Voice //
|
||||
//##################//
|
||||
|
||||
async previewTtsVoice(voiceId) {
|
||||
try {
|
||||
this.updateStatus('Processing');
|
||||
|
||||
const previewText = 'Hello! This is a preview of the selected voice.';
|
||||
|
||||
// Determine if this is a reference voice
|
||||
let isReferenceVoice = false;
|
||||
let actualVoiceId = voiceId;
|
||||
|
||||
if (voiceId && voiceId.startsWith('ref_')) {
|
||||
isReferenceVoice = true;
|
||||
actualVoiceId = voiceId.substring(4); // Remove 'ref_' prefix
|
||||
}
|
||||
|
||||
// Generate preview using the main TTS endpoint
|
||||
const requestBody = {
|
||||
text: previewText,
|
||||
voice_mode: isReferenceVoice ? 'clone' : 'predefined',
|
||||
temperature: this.settings.temperature,
|
||||
exaggeration: this.settings.exaggeration,
|
||||
cfg_weight: this.settings.cfg_weight,
|
||||
seed: this.settings.seed >= 0 ? this.settings.seed : Math.floor(Math.random() * 2147483648), // Use random seed if -1
|
||||
speed_factor: this.settings.speed_factor,
|
||||
language: this.settings.language,
|
||||
split_text: false, // Don't split for preview
|
||||
output_format: this.settings.output_format,
|
||||
};
|
||||
|
||||
// Add voice-specific parameters
|
||||
if (isReferenceVoice) {
|
||||
requestBody.reference_audio_filename = actualVoiceId;
|
||||
} else {
|
||||
requestBody.predefined_voice_id = actualVoiceId;
|
||||
}
|
||||
|
||||
const response = await fetch(`${this.settings.provider_endpoint}/tts`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
||||
}
|
||||
|
||||
// Get the audio blob and play it
|
||||
const audioBlob = await response.blob();
|
||||
const audioUrl = URL.createObjectURL(audioBlob);
|
||||
|
||||
const audio = new Audio(audioUrl);
|
||||
audio.addEventListener('ended', () => {
|
||||
URL.revokeObjectURL(audioUrl);
|
||||
this.updateStatus('Ready');
|
||||
});
|
||||
|
||||
await audio.play();
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error previewing voice:', error);
|
||||
this.updateStatus('Ready');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
//#####################//
|
||||
// Get Voice Object //
|
||||
//#####################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
// Ensure voices are loaded
|
||||
if (this.voices.length === 0) {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
// Find the voice object by name or voice_id
|
||||
let match = this.voices.find(voice =>
|
||||
voice.name === voiceName ||
|
||||
voice.voice_id === voiceName ||
|
||||
voice.display_name === voiceName,
|
||||
);
|
||||
|
||||
if (!match) {
|
||||
console.warn(`Voice not found: ${voiceName}`);
|
||||
// Check if it's a reference voice that wasn't in the list
|
||||
if (voiceName && voiceName.startsWith('ref_')) {
|
||||
const filename = voiceName.substring(4);
|
||||
return {
|
||||
name: `[Clone] ${filename}`,
|
||||
voice_id: voiceName,
|
||||
preview_url: null,
|
||||
lang: 'en',
|
||||
};
|
||||
}
|
||||
// Return a default voice object
|
||||
return {
|
||||
name: voiceName || 'Default',
|
||||
voice_id: voiceName || this.settings.predefined_voice || 'S1',
|
||||
preview_url: null,
|
||||
lang: 'en',
|
||||
};
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
||||
//##################//
|
||||
// Generate TTS //
|
||||
//##################//
|
||||
|
||||
async generateTts(inputText, voiceId) {
|
||||
try {
|
||||
this.updateStatus('Processing');
|
||||
|
||||
// Determine if this is a reference voice
|
||||
let isReferenceVoice = false;
|
||||
let actualVoiceId = voiceId;
|
||||
|
||||
if (voiceId && voiceId.startsWith('ref_')) {
|
||||
isReferenceVoice = true;
|
||||
actualVoiceId = voiceId.substring(4); // Remove 'ref_' prefix
|
||||
}
|
||||
|
||||
// Prepare the request body
|
||||
const requestBody = {
|
||||
text: inputText,
|
||||
voice_mode: isReferenceVoice ? 'clone' : 'predefined',
|
||||
temperature: this.settings.temperature,
|
||||
exaggeration: this.settings.exaggeration,
|
||||
cfg_weight: this.settings.cfg_weight,
|
||||
seed: this.settings.seed >= 0 ? this.settings.seed : Math.floor(Math.random() * 2147483648), // Use random seed if -1
|
||||
speed_factor: this.settings.speed_factor,
|
||||
language: this.settings.language,
|
||||
split_text: this.settings.split_text,
|
||||
chunk_size: this.settings.chunk_size,
|
||||
output_format: this.settings.output_format,
|
||||
};
|
||||
|
||||
// Add voice-specific parameters
|
||||
if (isReferenceVoice) {
|
||||
requestBody.reference_audio_filename = actualVoiceId;
|
||||
} else {
|
||||
requestBody.predefined_voice_id = actualVoiceId || this.settings.predefined_voice;
|
||||
}
|
||||
|
||||
console.log('Generating TTS with params:', requestBody);
|
||||
|
||||
const response = await fetch(`${this.settings.provider_endpoint}/tts`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error('TTS generation error:', errorText);
|
||||
throw new Error(`HTTP ${response.status}: ${errorText}`);
|
||||
}
|
||||
|
||||
this.updateStatus('Ready');
|
||||
|
||||
// Return the response directly - SillyTavern expects a Response object
|
||||
return response;
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error in generateTts:', error);
|
||||
this.updateStatus('Ready');
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
//######################//
|
||||
// Update Status //
|
||||
//######################//
|
||||
|
||||
updateStatus(status) {
|
||||
const statusElement = document.getElementById('chatterbox-status');
|
||||
if (statusElement) {
|
||||
statusElement.textContent = status;
|
||||
statusElement.className = status.toLowerCase();
|
||||
}
|
||||
}
|
||||
}
|
||||
233
data/st-core-scripts/scripts/extensions/tts/chutes.js
Normal file
233
data/st-core-scripts/scripts/extensions/tts/chutes.js
Normal file
@@ -0,0 +1,233 @@
|
||||
import { event_types, eventSource, getRequestHeaders } from '../../../script.js';
|
||||
import { SECRET_KEYS, secret_state } from '../../secrets.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { ChutesTtsProvider };
|
||||
|
||||
class ChutesTtsProvider {
|
||||
settings;
|
||||
voices = [];
|
||||
models = [];
|
||||
separator = ' . ';
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
model: 'kokoro',
|
||||
speed: 1,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div class="flex-container alignItemsCenter">
|
||||
<div class="flex1">Chutes TTS API</div>
|
||||
<div id="chutes_tts_key" class="menu_button menu_button_icon manage-api-keys" data-key="api_key_chutes">
|
||||
<i class="fa-solid fa-key"></i>
|
||||
<span>API Key</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex-container flexFlowColumn">
|
||||
<div class="flex1">
|
||||
<label for="chutes_tts_model">Model</label>
|
||||
<select id="chutes_tts_model" class="text_pole"></select>
|
||||
</div>
|
||||
<div>
|
||||
<label for="chutes_tts_speed">Speed <span id="chutes_tts_speed_output"></span></label>
|
||||
<input type="range" id="chutes_tts_speed" value="1" min="0.25" max="3" step="0.05">
|
||||
</div>
|
||||
</div>`;
|
||||
return html;
|
||||
}
|
||||
|
||||
constructor() {
|
||||
this.handler = async function (/** @type {string} */ key) {
|
||||
if (key !== SECRET_KEYS.CHUTES) return;
|
||||
$('#chutes_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.CHUTES]);
|
||||
await this.onRefreshClick();
|
||||
}.bind(this);
|
||||
}
|
||||
|
||||
dispose() {
|
||||
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
|
||||
eventSource.removeListener(event, this.handler);
|
||||
});
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
this.settings.model = $('#chutes_tts_model').val();
|
||||
this.settings.speed = Number($('#chutes_tts_speed').val());
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
if (Object.keys(settings).length === 0) {
|
||||
Object.assign(settings, this.defaultSettings);
|
||||
}
|
||||
|
||||
this.settings = settings;
|
||||
|
||||
if (!this.settings.voiceMap) {
|
||||
this.settings.voiceMap = {};
|
||||
}
|
||||
|
||||
// Update UI
|
||||
$('#chutes_tts_model').val(this.settings.model);
|
||||
$('#chutes_tts_speed').val(this.settings.speed);
|
||||
$('#chutes_tts_speed_output').text(this.settings.speed);
|
||||
|
||||
$('#chutes_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.CHUTES]);
|
||||
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
|
||||
eventSource.on(event, this.handler);
|
||||
});
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
$('#chutes_tts_model').on('change', () => this.onSettingsChange());
|
||||
$('#chutes_tts_speed').on('input', () => {
|
||||
const value = $('#chutes_tts_speed').val();
|
||||
$('#chutes_tts_speed_output').text(String(value));
|
||||
this.onSettingsChange();
|
||||
});
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
await this.updateModels();
|
||||
if (this.models.length === 0) {
|
||||
// No models available
|
||||
}
|
||||
await this.updateVoices();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return await this.checkReady();
|
||||
}
|
||||
|
||||
async updateModels() {
|
||||
// For Chutes TTS, we always use the Kokoro model currently.
|
||||
this.models = ['kokoro'];
|
||||
|
||||
$('#chutes_tts_model').empty();
|
||||
$('#chutes_tts_model').append($('<option>').val('kokoro').text('Kokoro'));
|
||||
$('#chutes_tts_model').val('kokoro');
|
||||
|
||||
this.settings.model = 'kokoro';
|
||||
}
|
||||
|
||||
async updateVoices() {
|
||||
// Kokoro voices list
|
||||
const kokoroVoices = [
|
||||
{ id: 'af_alloy', name: 'Alloy (Female)', lang: 'en-US' },
|
||||
{ id: 'af_aoede', name: 'Aoede (Female)', lang: 'en-US' },
|
||||
{ id: 'af_bella', name: 'Bella (Female)', lang: 'en-US' },
|
||||
{ id: 'af_heart', name: 'Heart (Female) - Default', lang: 'en-US' },
|
||||
{ id: 'af_jessica', name: 'Jessica (Female)', lang: 'en-US' },
|
||||
{ id: 'af_kore', name: 'Kore (Female)', lang: 'en-US' },
|
||||
{ id: 'af_nicole', name: 'Nicole (Female)', lang: 'en-US' },
|
||||
{ id: 'af_nova', name: 'Nova (Female)', lang: 'en-US' },
|
||||
{ id: 'af_river', name: 'River (Female)', lang: 'en-US' },
|
||||
{ id: 'af_sarah', name: 'Sarah (Female)', lang: 'en-US' },
|
||||
{ id: 'af_sky', name: 'Sky (Female)', lang: 'en-US' },
|
||||
{ id: 'am_adam', name: 'Adam (Male)', lang: 'en-US' },
|
||||
{ id: 'am_echo', name: 'Echo (Male)', lang: 'en-US' },
|
||||
{ id: 'am_eric', name: 'Eric (Male)', lang: 'en-US' },
|
||||
{ id: 'am_fenrir', name: 'Fenrir (Male)', lang: 'en-US' },
|
||||
{ id: 'am_liam', name: 'Liam (Male)', lang: 'en-US' },
|
||||
{ id: 'am_michael', name: 'Michael (Male)', lang: 'en-US' },
|
||||
{ id: 'am_onyx', name: 'Onyx (Male)', lang: 'en-US' },
|
||||
{ id: 'am_puck', name: 'Puck (Male)', lang: 'en-US' },
|
||||
{ id: 'am_santa', name: 'Santa (Male)', lang: 'en-US' },
|
||||
{ id: 'bf_alice', name: 'Alice (British Female)', lang: 'en-GB' },
|
||||
{ id: 'bf_emma', name: 'Emma (British Female)', lang: 'en-GB' },
|
||||
{ id: 'bf_isabella', name: 'Isabella (British Female)', lang: 'en-GB' },
|
||||
{ id: 'bf_lily', name: 'Lily (British Female)', lang: 'en-GB' },
|
||||
{ id: 'bm_daniel', name: 'Daniel (British Male)', lang: 'en-GB' },
|
||||
{ id: 'bm_fable', name: 'Fable (British Male)', lang: 'en-GB' },
|
||||
{ id: 'bm_george', name: 'George (British Male)', lang: 'en-GB' },
|
||||
{ id: 'bm_lewis', name: 'Lewis (British Male)', lang: 'en-GB' },
|
||||
{ id: 'ef_dora', name: 'Dora (European Female)', lang: 'es-ES' },
|
||||
{ id: 'em_alex', name: 'Alex (European Male)', lang: 'es-ES' },
|
||||
{ id: 'em_santa', name: 'Santa (European Male)', lang: 'es-ES' },
|
||||
{ id: 'ff_siwis', name: 'Siwis (French Female)', lang: 'fr-FR' },
|
||||
{ id: 'hf_alpha', name: 'Alpha (Hindi Female)', lang: 'hi-IN' },
|
||||
{ id: 'hf_beta', name: 'Beta (Hindi Female)', lang: 'hi-IN' },
|
||||
{ id: 'hm_omega', name: 'Omega (Hindi Male)', lang: 'hi-IN' },
|
||||
{ id: 'hm_psi', name: 'Psi (Hindi Male)', lang: 'hi-IN' },
|
||||
{ id: 'if_sara', name: 'Sara (Italian Female)', lang: 'it-IT' },
|
||||
{ id: 'im_nicola', name: 'Nicola (Italian Male)', lang: 'it-IT' },
|
||||
{ id: 'jf_alpha', name: 'Alpha (Japanese Female)', lang: 'ja-JP' },
|
||||
{ id: 'jf_gongitsune', name: 'Gongitsune (Japanese Female)', lang: 'ja-JP' },
|
||||
{ id: 'jf_nezumi', name: 'Nezumi (Japanese Female)', lang: 'ja-JP' },
|
||||
{ id: 'jf_tebukuro', name: 'Tebukuro (Japanese Female)', lang: 'ja-JP' },
|
||||
{ id: 'jm_kumo', name: 'Kumo (Japanese Male)', lang: 'ja-JP' },
|
||||
{ id: 'pf_dora', name: 'Dora (Portuguese Female)', lang: 'pt-PT' },
|
||||
{ id: 'pm_alex', name: 'Alex (Portuguese Male)', lang: 'pt-PT' },
|
||||
{ id: 'pm_santa', name: 'Santa (Portuguese Male)', lang: 'pt-PT' },
|
||||
{ id: 'zf_xiaobei', name: 'Xiaobei (Chinese Female)', lang: 'zh-CN' },
|
||||
{ id: 'zf_xiaoni', name: 'Xiaoni (Chinese Female)', lang: 'zh-CN' },
|
||||
{ id: 'zf_xiaoxiao', name: 'Xiaoxiao (Chinese Female)', lang: 'zh-CN' },
|
||||
{ id: 'zf_xiaoyi', name: 'Xiaoyi (Chinese Female)', lang: 'zh-CN' },
|
||||
{ id: 'zm_yunjian', name: 'Yunjian (Chinese Male)', lang: 'zh-CN' },
|
||||
{ id: 'zm_yunxi', name: 'Yunxi (Chinese Male)', lang: 'zh-CN' },
|
||||
{ id: 'zm_yunxia', name: 'Yunxia (Chinese Male)', lang: 'zh-CN' },
|
||||
{ id: 'zm_yunyang', name: 'Yunyang (Chinese Male)', lang: 'zh-CN' },
|
||||
];
|
||||
|
||||
this.voices = kokoroVoices.map(v => ({
|
||||
name: v.name,
|
||||
voice_id: v.id,
|
||||
lang: v.lang,
|
||||
}));
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length === 0) {
|
||||
await this.updateVoices();
|
||||
}
|
||||
const voice = this.voices.find(v => v.name === voiceName || v.voice_id === voiceName);
|
||||
return voice || this.voices.find(v => v.voice_id === 'af_heart');
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(text, voiceId) {
|
||||
const apiKey = secret_state[SECRET_KEYS.CHUTES];
|
||||
|
||||
if (!apiKey) {
|
||||
throw new Error('No Chutes API key found');
|
||||
}
|
||||
|
||||
const response = await fetch('/api/openai/chutes/generate-voice', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
input: text,
|
||||
voice: voiceId || 'af_heart',
|
||||
speed: this.settings.speed || 1,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`Chutes TTS failed: ${error}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
if (this.voices.length === 0) {
|
||||
await this.updateVoices();
|
||||
}
|
||||
|
||||
const voiceIds = this.voices
|
||||
.map(voice => ({ name: voice.name, voice_id: voice.voice_id, preview_url: false }));
|
||||
return voiceIds;
|
||||
}
|
||||
|
||||
async previewTtsVoice(voiceId) {
|
||||
const text = getPreviewString(voiceId);
|
||||
await this.generateTts(text, voiceId);
|
||||
}
|
||||
}
|
||||
771
data/st-core-scripts/scripts/extensions/tts/coqui.js
Normal file
771
data/st-core-scripts/scripts/extensions/tts/coqui.js
Normal file
@@ -0,0 +1,771 @@
|
||||
/*
|
||||
TODO:
|
||||
- Hide voice map its just confusing
|
||||
- Delete useless call
|
||||
*/
|
||||
|
||||
import { doExtrasFetch, extension_settings, getApiUrl, modules } from '../../extensions.js';
|
||||
import { initVoiceMap } from './index.js';
|
||||
import { POPUP_TYPE, callGenericPopup } from '../../popup.js';
|
||||
|
||||
export { CoquiTtsProvider };
|
||||
|
||||
const DEBUG_PREFIX = '<Coqui TTS module> ';
|
||||
|
||||
let inApiCall = false;
|
||||
let coquiApiModels = {}; // Initialized only once
|
||||
let coquiApiModelsFull = {}; // Initialized only once
|
||||
let coquiLocalModels = []; // Initialized only once
|
||||
let coquiLocalModelsReceived = false;
|
||||
/*
|
||||
coquiApiModels format [language][dataset][name]:coqui-api-model-id, example:
|
||||
{
|
||||
"en": {
|
||||
"vctk": {
|
||||
"vits": "tts_models/en/vctk/vits"
|
||||
}
|
||||
},
|
||||
"ja": {
|
||||
"kokoro": {
|
||||
"tacotron2-DDC": "tts_models/ja/kokoro/tacotron2-DDC"
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
const languageLabels = {
|
||||
'multilingual': 'Multilingual',
|
||||
'en': 'English',
|
||||
'fr': 'French',
|
||||
'es': 'Spanish',
|
||||
'ja': 'Japanese',
|
||||
};
|
||||
|
||||
function throwIfModuleMissing() {
|
||||
if (!modules.includes('coqui-tts')) {
|
||||
const message = 'Coqui TTS module not loaded. Add coqui-tts to enable-modules and restart the Extras API.';
|
||||
// toastr.error(message, { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
throw new Error(DEBUG_PREFIX, message);
|
||||
}
|
||||
}
|
||||
|
||||
function resetModelSettings() {
|
||||
$('#coqui_api_model_settings_language').val('none');
|
||||
$('#coqui_api_model_settings_speaker').val('none');
|
||||
}
|
||||
|
||||
class CoquiTtsProvider {
|
||||
//#############################//
|
||||
// Extension UI and Settings //
|
||||
//#############################//
|
||||
|
||||
settings;
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
customVoices: {},
|
||||
voiceIds: [],
|
||||
voiceMapDict: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div class="flex wide100p flexGap10 alignitemscenter">
|
||||
<div>
|
||||
<div style="flex: 50%;">
|
||||
<small>To use CoquiTTS, select the origin, language, and model, then click Add Voice. The voice will then be available to add to a character. Voices are saved globally. </small><br>
|
||||
<label for="coqui_voicename_select">Select Saved Voice:</label>
|
||||
<select id="coqui_voicename_select">
|
||||
<!-- Populated by JS -->
|
||||
</select>
|
||||
<div class="tts_block">
|
||||
<input id="coqui_remove_voiceId_mapping" class="menu_button" type="button" value="Remove Voice" />
|
||||
<input id="coqui_add_voiceId_mapping" class="menu_button" type="button" value="Add Voice" />
|
||||
</div>
|
||||
<label for="coqui_model_origin">Models:</label>
|
||||
<select id="coqui_model_origin">gpu_mode
|
||||
<option value="none">Select Origin</option>
|
||||
<option value="coqui-api">Coqui API (Tested)</option>
|
||||
<option value="coqui-api-full">Coqui API (Experimental)</option>
|
||||
<option value="local">My Models</option>
|
||||
</select>
|
||||
|
||||
<div id="coqui_api_model_div">
|
||||
<select id="coqui_api_language">
|
||||
<!-- Populated by JS and request -->
|
||||
</select>
|
||||
|
||||
<select id="coqui_api_model_name">
|
||||
<!-- Populated by JS and request -->
|
||||
</select>
|
||||
|
||||
<div id="coqui_api_model_settings">
|
||||
<select id="coqui_api_model_settings_language">
|
||||
<!-- Populated by JS and request -->
|
||||
</select>
|
||||
<select id="coqui_api_model_settings_speaker">
|
||||
<!-- Populated by JS and request -->
|
||||
</select>
|
||||
</div>
|
||||
<span id="coqui_api_model_install_status">Model installed on extras server</span>
|
||||
<input id="coqui_api_model_install_button" class="menu_button" type="button" value="Install" />
|
||||
</div>
|
||||
|
||||
<div id="coqui_local_model_div">
|
||||
<select id="coqui_local_model_name">
|
||||
<!-- Populated by JS and request -->
|
||||
</select>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw DEBUG_PREFIX + `Invalid setting passed to extension: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
await initLocalModels();
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
|
||||
$('#coqui_api_model_div').hide();
|
||||
$('#coqui_local_model_div').hide();
|
||||
|
||||
$('#coqui_api_language').show();
|
||||
$('#coqui_api_model_name').hide();
|
||||
$('#coqui_api_model_settings').hide();
|
||||
$('#coqui_api_model_install_status').hide();
|
||||
$('#coqui_api_model_install_button').hide();
|
||||
|
||||
let that = this;
|
||||
$('#coqui_model_origin').on('change', function () { that.onModelOriginChange(); });
|
||||
$('#coqui_api_language').on('change', function () { that.onModelLanguageChange(); });
|
||||
$('#coqui_api_model_name').on('change', function () { that.onModelNameChange(); });
|
||||
|
||||
$('#coqui_remove_voiceId_mapping').on('click', function () { that.onRemoveClick(); });
|
||||
$('#coqui_add_voiceId_mapping').on('click', function () { that.onAddClick(); });
|
||||
|
||||
// Load coqui-api settings from json file
|
||||
await fetch('/scripts/extensions/tts/coqui_api_models_settings.json')
|
||||
.then(response => response.json())
|
||||
.then(json => {
|
||||
coquiApiModels = json;
|
||||
console.debug(DEBUG_PREFIX,'initialized coqui-api model list to', coquiApiModels);
|
||||
/*
|
||||
$('#coqui_api_language')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select model language</option>')
|
||||
.val('none');
|
||||
|
||||
for(let language in coquiApiModels) {
|
||||
$("#coqui_api_language").append(new Option(languageLabels[language],language));
|
||||
console.log(DEBUG_PREFIX,"added language",language);
|
||||
}*/
|
||||
});
|
||||
|
||||
// Load coqui-api FULL settings from json file
|
||||
await fetch('/scripts/extensions/tts/coqui_api_models_settings_full.json')
|
||||
.then(response => response.json())
|
||||
.then(json => {
|
||||
coquiApiModelsFull = json;
|
||||
console.debug(DEBUG_PREFIX,'initialized coqui-api full model list to', coquiApiModelsFull);
|
||||
/*
|
||||
$('#coqui_api_full_language')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select model language</option>')
|
||||
.val('none');
|
||||
|
||||
for(let language in coquiApiModelsFull) {
|
||||
$("#coqui_api_full_language").append(new Option(languageLabels[language],language));
|
||||
console.log(DEBUG_PREFIX,"added language",language);
|
||||
}*/
|
||||
});
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady(){
|
||||
throwIfModuleMissing();
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
updateCustomVoices() {
|
||||
// Takes voiceMapDict and converts it to a string to save to voiceMap
|
||||
this.settings.customVoices = {};
|
||||
for (let voiceName in this.settings.voiceMapDict) {
|
||||
const voiceId = this.settings.voiceMapDict[voiceName];
|
||||
this.settings.customVoices[voiceName] = voiceId['model_id'];
|
||||
|
||||
if (voiceId['model_language'] != null)
|
||||
this.settings.customVoices[voiceName] += '[' + voiceId['model_language'] + ']';
|
||||
|
||||
if (voiceId['model_speaker'] != null)
|
||||
this.settings.customVoices[voiceName] += '[' + voiceId['model_speaker'] + ']';
|
||||
}
|
||||
|
||||
// Update UI select list with voices
|
||||
$('#coqui_voicename_select').empty();
|
||||
$('#coqui_voicename_select')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select Voice</option>')
|
||||
.val('none');
|
||||
for (const voiceName in this.settings.voiceMapDict) {
|
||||
$('#coqui_voicename_select').append(new Option(voiceName, voiceName));
|
||||
}
|
||||
|
||||
this.onSettingsChange();
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
console.debug(DEBUG_PREFIX, 'Settings changes', this.settings);
|
||||
extension_settings.tts.Coqui = this.settings;
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
this.checkReady();
|
||||
}
|
||||
|
||||
async onAddClick() {
|
||||
if (inApiCall) {
|
||||
return; //TODO: block dropdown
|
||||
}
|
||||
|
||||
// Ask user for voiceId name to save voice
|
||||
const voiceName = await callGenericPopup('Name of Coqui voice to add to voice select dropdown:', POPUP_TYPE.INPUT);
|
||||
|
||||
const model_origin = $('#coqui_model_origin').val();
|
||||
const model_language = $('#coqui_api_language').val();
|
||||
const model_name = $('#coqui_api_model_name').val();
|
||||
let model_setting_language = $('#coqui_api_model_settings_language').val();
|
||||
let model_setting_speaker = $('#coqui_api_model_settings_speaker').val();
|
||||
|
||||
|
||||
if (!voiceName) {
|
||||
toastr.error('Voice name empty, please enter one.', DEBUG_PREFIX + ' voice mapping voice name', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_origin == 'none') {
|
||||
toastr.error('Origin not selected, please select one.', DEBUG_PREFIX + ' voice mapping origin', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_origin == 'local') {
|
||||
const model_id = $('#coqui_local_model_name').val();
|
||||
|
||||
if (model_name == 'none') {
|
||||
toastr.error('Model not selected, please select one.', DEBUG_PREFIX + ' voice mapping model', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
return;
|
||||
}
|
||||
|
||||
this.settings.voiceMapDict[voiceName] = { model_type: 'local', model_id: 'local/' + model_id };
|
||||
console.debug(DEBUG_PREFIX, 'Registered new voice map: ', voiceName, ':', this.settings.voiceMapDict[voiceName]);
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_language == 'none') {
|
||||
toastr.error('Language not selected, please select one.', DEBUG_PREFIX + ' voice mapping language', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_name == 'none') {
|
||||
toastr.error('Model not selected, please select one.', DEBUG_PREFIX + ' voice mapping model', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
this.updateCustomVoices(); // Overide any manual modification
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_setting_language == 'none')
|
||||
model_setting_language = null;
|
||||
|
||||
if (model_setting_speaker == 'none')
|
||||
model_setting_speaker = null;
|
||||
|
||||
const tokens = $('#coqui_api_model_name').val().split('/');
|
||||
const model_dataset = tokens[0];
|
||||
const model_label = tokens[1];
|
||||
const model_id = 'tts_models/' + model_language + '/' + model_dataset + '/' + model_label;
|
||||
|
||||
let modelDict = coquiApiModels;
|
||||
if (model_origin == 'coqui-api-full')
|
||||
modelDict = coquiApiModelsFull;
|
||||
|
||||
if (model_setting_language == null & 'languages' in modelDict[model_language][model_dataset][model_label]) {
|
||||
toastr.error('Model language not selected, please select one.', DEBUG_PREFIX + ' voice mapping model language', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_setting_speaker == null & 'speakers' in modelDict[model_language][model_dataset][model_label]) {
|
||||
toastr.error('Model speaker not selected, please select one.', DEBUG_PREFIX + ' voice mapping model speaker', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
return;
|
||||
}
|
||||
|
||||
console.debug(DEBUG_PREFIX, 'Current custom voices: ', this.settings.customVoices);
|
||||
|
||||
this.settings.voiceMapDict[voiceName] = { model_type: 'coqui-api', model_id: model_id, model_language: model_setting_language, model_speaker: model_setting_speaker };
|
||||
|
||||
console.debug(DEBUG_PREFIX, 'Registered new voice map: ', voiceName, ':', this.settings.voiceMapDict[voiceName]);
|
||||
|
||||
this.updateCustomVoices();
|
||||
initVoiceMap(); // Update TTS extension voiceMap
|
||||
|
||||
let successMsg = voiceName + ':' + model_id;
|
||||
if (model_setting_language != null)
|
||||
successMsg += '[' + model_setting_language + ']';
|
||||
if (model_setting_speaker != null)
|
||||
successMsg += '[' + model_setting_speaker + ']';
|
||||
toastr.info(successMsg, DEBUG_PREFIX + ' voice map updated', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
let match = await this.fetchTtsVoiceObjects();
|
||||
match = match.filter(
|
||||
voice => voice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found in CoquiTTS Provider voice list`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async onRemoveClick() {
|
||||
const voiceName = $('#coqui_voicename_select').val();
|
||||
|
||||
if (voiceName === 'none') {
|
||||
toastr.error('Voice not selected, please select one.', DEBUG_PREFIX + ' voice mapping voiceId', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
return;
|
||||
}
|
||||
|
||||
// Todo erase from voicemap
|
||||
delete (this.settings.voiceMapDict[voiceName]);
|
||||
this.updateCustomVoices();
|
||||
initVoiceMap(); // Update TTS extension voiceMap
|
||||
}
|
||||
|
||||
async onModelOriginChange() {
|
||||
throwIfModuleMissing();
|
||||
resetModelSettings();
|
||||
const model_origin = $('#coqui_model_origin').val();
|
||||
|
||||
if (model_origin == 'none') {
|
||||
$('#coqui_local_model_div').hide();
|
||||
$('#coqui_api_model_div').hide();
|
||||
}
|
||||
|
||||
// show coqui model selected list (SAFE)
|
||||
if (model_origin == 'coqui-api') {
|
||||
$('#coqui_local_model_div').hide();
|
||||
|
||||
$('#coqui_api_language')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select model language</option>')
|
||||
.val('none');
|
||||
|
||||
for(let language in coquiApiModels) {
|
||||
let languageLabel = language;
|
||||
if (language in languageLabels)
|
||||
languageLabel = languageLabels[language];
|
||||
$('#coqui_api_language').append(new Option(languageLabel,language));
|
||||
console.log(DEBUG_PREFIX,'added language',languageLabel,'(',language,')');
|
||||
}
|
||||
|
||||
$('#coqui_api_model_div').show();
|
||||
}
|
||||
|
||||
// show coqui model full list (UNSAFE)
|
||||
if (model_origin == 'coqui-api-full') {
|
||||
$('#coqui_local_model_div').hide();
|
||||
|
||||
$('#coqui_api_language')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select model language</option>')
|
||||
.val('none');
|
||||
|
||||
for(let language in coquiApiModelsFull) {
|
||||
let languageLabel = language;
|
||||
if (language in languageLabels)
|
||||
languageLabel = languageLabels[language];
|
||||
$('#coqui_api_language').append(new Option(languageLabel,language));
|
||||
console.log(DEBUG_PREFIX,'added language',languageLabel,'(',language,')');
|
||||
}
|
||||
|
||||
$('#coqui_api_model_div').show();
|
||||
}
|
||||
|
||||
|
||||
// show local model list
|
||||
if (model_origin == 'local') {
|
||||
$('#coqui_api_model_div').hide();
|
||||
$('#coqui_local_model_div').show();
|
||||
}
|
||||
}
|
||||
|
||||
async onModelLanguageChange() {
|
||||
throwIfModuleMissing();
|
||||
resetModelSettings();
|
||||
$('#coqui_api_model_settings').hide();
|
||||
const model_origin = $('#coqui_model_origin').val();
|
||||
const model_language = $('#coqui_api_language').val();
|
||||
console.debug(model_language);
|
||||
|
||||
if (model_language == 'none') {
|
||||
$('#coqui_api_model_name').hide();
|
||||
return;
|
||||
}
|
||||
|
||||
$('#coqui_api_model_name').show();
|
||||
$('#coqui_api_model_name')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select model</option>')
|
||||
.val('none');
|
||||
|
||||
let modelDict = coquiApiModels;
|
||||
if (model_origin == 'coqui-api-full')
|
||||
modelDict = coquiApiModelsFull;
|
||||
|
||||
for(let model_dataset in modelDict[model_language])
|
||||
for(let model_name in modelDict[model_language][model_dataset]) {
|
||||
const model_id = model_dataset + '/' + model_name;
|
||||
const model_label = model_name + ' (' + model_dataset + ' dataset)';
|
||||
$('#coqui_api_model_name').append(new Option(model_label, model_id));
|
||||
}
|
||||
}
|
||||
|
||||
async onModelNameChange() {
|
||||
throwIfModuleMissing();
|
||||
resetModelSettings();
|
||||
$('#coqui_api_model_settings').hide();
|
||||
const model_origin = $('#coqui_model_origin').val();
|
||||
|
||||
// No model selected
|
||||
if ($('#coqui_api_model_name').val() == 'none') {
|
||||
$('#coqui_api_model_install_button').off('click');
|
||||
$('#coqui_api_model_install_button').hide();
|
||||
return;
|
||||
}
|
||||
|
||||
// Get languages and speakers options
|
||||
const model_language = $('#coqui_api_language').val();
|
||||
const tokens = $('#coqui_api_model_name').val().split('/');
|
||||
const model_dataset = tokens[0];
|
||||
const model_name = tokens[1];
|
||||
|
||||
let modelDict = coquiApiModels;
|
||||
if (model_origin == 'coqui-api-full')
|
||||
modelDict = coquiApiModelsFull;
|
||||
|
||||
const model_settings = modelDict[model_language][model_dataset][model_name];
|
||||
|
||||
if ('languages' in model_settings) {
|
||||
$('#coqui_api_model_settings').show();
|
||||
$('#coqui_api_model_settings_language').show();
|
||||
$('#coqui_api_model_settings_language')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select language</option>')
|
||||
.val('none');
|
||||
|
||||
for (let i = 0; i < model_settings['languages'].length; i++) {
|
||||
const language_label = JSON.stringify(model_settings['languages'][i]).replaceAll('"', '');
|
||||
$('#coqui_api_model_settings_language').append(new Option(language_label, i));
|
||||
}
|
||||
}
|
||||
else {
|
||||
$('#coqui_api_model_settings_language').hide();
|
||||
}
|
||||
|
||||
if ('speakers' in model_settings) {
|
||||
$('#coqui_api_model_settings').show();
|
||||
$('#coqui_api_model_settings_speaker').show();
|
||||
$('#coqui_api_model_settings_speaker')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select speaker</option>')
|
||||
.val('none');
|
||||
|
||||
for (let i = 0; i < model_settings['speakers'].length; i++) {
|
||||
const speaker_label = JSON.stringify(model_settings['speakers'][i]).replaceAll('"', '');
|
||||
$('#coqui_api_model_settings_speaker').append(new Option(speaker_label, i));
|
||||
}
|
||||
}
|
||||
else {
|
||||
$('#coqui_api_model_settings_speaker').hide();
|
||||
}
|
||||
|
||||
$('#coqui_api_model_install_status').text('Requesting model to extras server...');
|
||||
$('#coqui_api_model_install_status').show();
|
||||
|
||||
// Check if already installed and propose to do it otherwise
|
||||
const model_id = modelDict[model_language][model_dataset][model_name]['id'];
|
||||
console.debug(DEBUG_PREFIX,'Check if model is already installed',model_id);
|
||||
let result = await CoquiTtsProvider.checkmodel_state(model_id);
|
||||
result = await result.json();
|
||||
const model_state = result['model_state'];
|
||||
|
||||
console.debug(DEBUG_PREFIX, ' Model state:', model_state);
|
||||
|
||||
if (model_state == 'installed') {
|
||||
$('#coqui_api_model_install_status').text('Model already installed on extras server');
|
||||
$('#coqui_api_model_install_button').hide();
|
||||
}
|
||||
else {
|
||||
let action = 'download';
|
||||
if (model_state == 'corrupted') {
|
||||
action = 'repare';
|
||||
//toastr.error("Click install button to reinstall the model "+$("#coqui_api_model_name").find(":selected").text(), DEBUG_PREFIX+" corrupted model install", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
$('#coqui_api_model_install_status').text('Model found but incomplete try install again (maybe still downloading)'); // (remove and download again)
|
||||
}
|
||||
else {
|
||||
toastr.info('Click download button to install the model ' + $('#coqui_api_model_name').find(':selected').text(), DEBUG_PREFIX + ' model not installed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
$('#coqui_api_model_install_status').text('Model not found on extras server');
|
||||
}
|
||||
|
||||
const onModelNameChange_pointer = this.onModelNameChange;
|
||||
|
||||
$('#coqui_api_model_install_button').off('click').on('click', async function () {
|
||||
try {
|
||||
$('#coqui_api_model_install_status').text('Downloading model...');
|
||||
$('#coqui_api_model_install_button').hide();
|
||||
//toastr.info("For model "+model_id, DEBUG_PREFIX+" Started "+action, { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
let apiResult = await CoquiTtsProvider.installModel(model_id, action);
|
||||
apiResult = await apiResult.json();
|
||||
|
||||
console.debug(DEBUG_PREFIX, 'Response:', apiResult);
|
||||
|
||||
if (apiResult['status'] == 'done') {
|
||||
$('#coqui_api_model_install_status').text('Model installed and ready to use!');
|
||||
$('#coqui_api_model_install_button').hide();
|
||||
onModelNameChange_pointer();
|
||||
}
|
||||
|
||||
if (apiResult['status'] == 'downloading') {
|
||||
toastr.error('Check extras console for progress', DEBUG_PREFIX + ' already downloading', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
$('#coqui_api_model_install_status').text('Already downloading a model, check extras console!');
|
||||
$('#coqui_api_model_install_button').show();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
toastr.error(error, DEBUG_PREFIX + ' error with model download', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
onModelNameChange_pointer();
|
||||
}
|
||||
// will refresh model status
|
||||
});
|
||||
|
||||
$('#coqui_api_model_install_button').show();
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//#############################//
|
||||
// API Calls //
|
||||
//#############################//
|
||||
|
||||
/*
|
||||
Check model installation state, return one of ["installed", "corrupted", "absent"]
|
||||
*/
|
||||
static async checkmodel_state(model_id) {
|
||||
throwIfModuleMissing();
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/text-to-speech/coqui/coqui-api/check-model-state';
|
||||
|
||||
const apiResult = await doExtrasFetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'model_id': model_id,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!apiResult.ok) {
|
||||
toastr.error(apiResult.statusText, DEBUG_PREFIX + ' Check model state request failed');
|
||||
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
|
||||
}
|
||||
|
||||
return apiResult;
|
||||
}
|
||||
|
||||
static async installModel(model_id, action) {
|
||||
throwIfModuleMissing();
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/text-to-speech/coqui/coqui-api/install-model';
|
||||
|
||||
const apiResult = await doExtrasFetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'model_id': model_id,
|
||||
'action': action,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!apiResult.ok) {
|
||||
toastr.error(apiResult.statusText, DEBUG_PREFIX + ' Install model ' + model_id + ' request failed');
|
||||
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
|
||||
}
|
||||
|
||||
return apiResult;
|
||||
}
|
||||
|
||||
/*
|
||||
Retrieve user custom models
|
||||
*/
|
||||
static async getLocalModelList() {
|
||||
throwIfModuleMissing();
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/text-to-speech/coqui/local/get-models';
|
||||
|
||||
const apiResult = await doExtrasFetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'model_id': 'model_id',
|
||||
'action': 'action',
|
||||
}),
|
||||
});
|
||||
|
||||
if (!apiResult.ok) {
|
||||
toastr.error(apiResult.statusText, DEBUG_PREFIX + ' Get local model list request failed');
|
||||
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
|
||||
}
|
||||
|
||||
return apiResult;
|
||||
}
|
||||
|
||||
|
||||
// Expect voiceId format to be like:
|
||||
// tts_models/multilingual/multi-dataset/your_tts[2][1]
|
||||
// tts_models/en/ljspeech/glow-tts
|
||||
// ts_models/ja/kokoro/tacotron2-DDC
|
||||
async generateTts(text, voiceId) {
|
||||
throwIfModuleMissing();
|
||||
voiceId = this.settings.customVoices[voiceId];
|
||||
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/text-to-speech/coqui/generate-tts';
|
||||
|
||||
let language = 'none';
|
||||
let speaker = 'none';
|
||||
const tokens = voiceId.replaceAll(']', '').replaceAll('"', '').split('[');
|
||||
const model_id = tokens[0];
|
||||
|
||||
console.debug(DEBUG_PREFIX, 'Preparing TTS request for', tokens);
|
||||
|
||||
// First option
|
||||
if (tokens.length > 1) {
|
||||
const option1 = tokens[1];
|
||||
|
||||
if (model_id.includes('multilingual'))
|
||||
language = option1;
|
||||
else
|
||||
speaker = option1;
|
||||
}
|
||||
|
||||
// Second option
|
||||
if (tokens.length > 2)
|
||||
speaker = tokens[2];
|
||||
|
||||
const apiResult = await doExtrasFetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'text': text,
|
||||
'model_id': model_id,
|
||||
'language_id': parseInt(language),
|
||||
'speaker_id': parseInt(speaker),
|
||||
}),
|
||||
});
|
||||
|
||||
if (!apiResult.ok) {
|
||||
toastr.error(apiResult.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
|
||||
}
|
||||
|
||||
return apiResult;
|
||||
}
|
||||
|
||||
// Dirty hack to say not implemented
|
||||
async fetchTtsVoiceObjects() {
|
||||
const voiceIds = Object
|
||||
.keys(this.settings.voiceMapDict)
|
||||
.map(voice => ({ name: voice, voice_id: voice, preview_url: false }));
|
||||
return voiceIds;
|
||||
}
|
||||
|
||||
// Do nothing
|
||||
previewTtsVoice(id) {
|
||||
return;
|
||||
}
|
||||
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
}
|
||||
|
||||
async function initLocalModels() {
|
||||
if (!modules.includes('coqui-tts'))
|
||||
return;
|
||||
|
||||
// Initialized local model once
|
||||
if (!coquiLocalModelsReceived) {
|
||||
let result = await CoquiTtsProvider.getLocalModelList();
|
||||
result = await result.json();
|
||||
|
||||
coquiLocalModels = result['models_list'];
|
||||
|
||||
$('#coqui_local_model_name').show();
|
||||
$('#coqui_local_model_name')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select model</option>')
|
||||
.val('none');
|
||||
|
||||
for (const model_dataset of coquiLocalModels)
|
||||
$('#coqui_local_model_name').append(new Option(model_dataset, model_dataset));
|
||||
|
||||
coquiLocalModelsReceived = true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,190 @@
|
||||
{
|
||||
"multilingual": {
|
||||
"multi-dataset": {
|
||||
"your_tts": {
|
||||
"id": "tts_models/multilingual/multi-dataset/your_tts",
|
||||
"languages": [
|
||||
"en",
|
||||
"fr-fr",
|
||||
"pt-br"
|
||||
],
|
||||
"speakers": [
|
||||
"female-en-5",
|
||||
"female-en-5\n",
|
||||
"female-pt-4\n",
|
||||
"male-en-2",
|
||||
"male-en-2\n",
|
||||
"male-pt-3\n"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"en": {
|
||||
"ljspeech": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/en/ljspeech/tacotron2-DDC"
|
||||
},
|
||||
"glow-tts": {
|
||||
"id": "tts_models/en/ljspeech/glow-tts"
|
||||
},
|
||||
"speedy-speech": {
|
||||
"id": "tts_models/en/ljspeech/speedy-speech"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/en/ljspeech/vits"
|
||||
}
|
||||
},
|
||||
"vctk": {
|
||||
"vits": {
|
||||
"id": "tts_models/en/vctk/vits",
|
||||
"speakers": [
|
||||
"ED\n",
|
||||
"p225",
|
||||
"p226",
|
||||
"p227",
|
||||
"p228",
|
||||
"p229",
|
||||
"p230",
|
||||
"p231",
|
||||
"p232",
|
||||
"p233",
|
||||
"p234",
|
||||
"p236",
|
||||
"p237",
|
||||
"p238",
|
||||
"p239",
|
||||
"p240",
|
||||
"p241",
|
||||
"p243",
|
||||
"p244",
|
||||
"p245",
|
||||
"p246",
|
||||
"p247",
|
||||
"p248",
|
||||
"p249",
|
||||
"p250",
|
||||
"p251",
|
||||
"p252",
|
||||
"p253",
|
||||
"p254",
|
||||
"p255",
|
||||
"p256",
|
||||
"p257",
|
||||
"p258",
|
||||
"p259",
|
||||
"p260",
|
||||
"p261",
|
||||
"p262",
|
||||
"p263",
|
||||
"p264",
|
||||
"p265",
|
||||
"p266",
|
||||
"p267",
|
||||
"p268",
|
||||
"p269",
|
||||
"p270",
|
||||
"p271",
|
||||
"p272",
|
||||
"p273",
|
||||
"p274",
|
||||
"p275",
|
||||
"p276",
|
||||
"p277",
|
||||
"p278",
|
||||
"p279",
|
||||
"p280",
|
||||
"p281",
|
||||
"p282",
|
||||
"p283",
|
||||
"p284",
|
||||
"p285",
|
||||
"p286",
|
||||
"p287",
|
||||
"p288",
|
||||
"p292",
|
||||
"p293",
|
||||
"p294",
|
||||
"p295",
|
||||
"p297",
|
||||
"p298",
|
||||
"p299",
|
||||
"p300",
|
||||
"p301",
|
||||
"p302",
|
||||
"p303",
|
||||
"p304",
|
||||
"p305",
|
||||
"p306",
|
||||
"p307",
|
||||
"p308",
|
||||
"p310",
|
||||
"p311",
|
||||
"p312",
|
||||
"p313",
|
||||
"p314",
|
||||
"p316",
|
||||
"p317",
|
||||
"p318",
|
||||
"p323",
|
||||
"p326",
|
||||
"p329",
|
||||
"p330",
|
||||
"p333",
|
||||
"p334",
|
||||
"p335",
|
||||
"p336",
|
||||
"p339",
|
||||
"p340",
|
||||
"p341",
|
||||
"p343",
|
||||
"p345",
|
||||
"p347",
|
||||
"p351",
|
||||
"p360",
|
||||
"p361",
|
||||
"p362",
|
||||
"p363",
|
||||
"p364",
|
||||
"p374",
|
||||
"p376"
|
||||
]
|
||||
}
|
||||
},
|
||||
"jenny": {
|
||||
"jenny": {
|
||||
"id": "tts_models/en/jenny/jenny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"es": {
|
||||
"mai": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/es/mai/tacotron2-DDC"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/es/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"fr": {
|
||||
"mai": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/fr/mai/tacotron2-DDC"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/fr/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ja": {
|
||||
"kokoro": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/ja/kokoro/tacotron2-DDC"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,870 @@
|
||||
{
|
||||
"multilingual": {
|
||||
"multi-dataset": {
|
||||
"your_tts": {
|
||||
"id": "tts_models/multilingual/multi-dataset/your_tts",
|
||||
"languages": [
|
||||
"en",
|
||||
"fr-fr",
|
||||
"pt-br"
|
||||
],
|
||||
"speakers": [
|
||||
"female-en-5",
|
||||
"female-en-5\n",
|
||||
"female-pt-4\n",
|
||||
"male-en-2",
|
||||
"male-en-2\n",
|
||||
"male-pt-3\n"
|
||||
]
|
||||
},
|
||||
"bark": {
|
||||
"id": "tts_models/multilingual/multi-dataset/bark"
|
||||
}
|
||||
}
|
||||
},
|
||||
"bg": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/bg/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"cs": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/cs/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"da": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/da/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"et": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/et/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ga": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/ga/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"en": {
|
||||
"ek1": {
|
||||
"tacotron2": {
|
||||
"id": "tts_models/en/ek1/tacotron2"
|
||||
}
|
||||
},
|
||||
"ljspeech": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/en/ljspeech/tacotron2-DDC"
|
||||
},
|
||||
"tacotron2-DDC_ph": {
|
||||
"id": "tts_models/en/ljspeech/tacotron2-DDC_ph"
|
||||
},
|
||||
"glow-tts": {
|
||||
"id": "tts_models/en/ljspeech/glow-tts"
|
||||
},
|
||||
"speedy-speech": {
|
||||
"id": "tts_models/en/ljspeech/speedy-speech"
|
||||
},
|
||||
"tacotron2-DCA": {
|
||||
"id": "tts_models/en/ljspeech/tacotron2-DCA"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/en/ljspeech/vits"
|
||||
},
|
||||
"vits--neon": {
|
||||
"id": "tts_models/en/ljspeech/vits--neon"
|
||||
},
|
||||
"fast_pitch": {
|
||||
"id": "tts_models/en/ljspeech/fast_pitch"
|
||||
},
|
||||
"overflow": {
|
||||
"id": "tts_models/en/ljspeech/overflow"
|
||||
},
|
||||
"neural_hmm": {
|
||||
"id": "tts_models/en/ljspeech/neural_hmm"
|
||||
}
|
||||
},
|
||||
"vctk": {
|
||||
"vits": {
|
||||
"id": "tts_models/en/vctk/vits",
|
||||
"speakers": [
|
||||
"ED\n",
|
||||
"p225",
|
||||
"p226",
|
||||
"p227",
|
||||
"p228",
|
||||
"p229",
|
||||
"p230",
|
||||
"p231",
|
||||
"p232",
|
||||
"p233",
|
||||
"p234",
|
||||
"p236",
|
||||
"p237",
|
||||
"p238",
|
||||
"p239",
|
||||
"p240",
|
||||
"p241",
|
||||
"p243",
|
||||
"p244",
|
||||
"p245",
|
||||
"p246",
|
||||
"p247",
|
||||
"p248",
|
||||
"p249",
|
||||
"p250",
|
||||
"p251",
|
||||
"p252",
|
||||
"p253",
|
||||
"p254",
|
||||
"p255",
|
||||
"p256",
|
||||
"p257",
|
||||
"p258",
|
||||
"p259",
|
||||
"p260",
|
||||
"p261",
|
||||
"p262",
|
||||
"p263",
|
||||
"p264",
|
||||
"p265",
|
||||
"p266",
|
||||
"p267",
|
||||
"p268",
|
||||
"p269",
|
||||
"p270",
|
||||
"p271",
|
||||
"p272",
|
||||
"p273",
|
||||
"p274",
|
||||
"p275",
|
||||
"p276",
|
||||
"p277",
|
||||
"p278",
|
||||
"p279",
|
||||
"p280",
|
||||
"p281",
|
||||
"p282",
|
||||
"p283",
|
||||
"p284",
|
||||
"p285",
|
||||
"p286",
|
||||
"p287",
|
||||
"p288",
|
||||
"p292",
|
||||
"p293",
|
||||
"p294",
|
||||
"p295",
|
||||
"p297",
|
||||
"p298",
|
||||
"p299",
|
||||
"p300",
|
||||
"p301",
|
||||
"p302",
|
||||
"p303",
|
||||
"p304",
|
||||
"p305",
|
||||
"p306",
|
||||
"p307",
|
||||
"p308",
|
||||
"p310",
|
||||
"p311",
|
||||
"p312",
|
||||
"p313",
|
||||
"p314",
|
||||
"p316",
|
||||
"p317",
|
||||
"p318",
|
||||
"p323",
|
||||
"p326",
|
||||
"p329",
|
||||
"p330",
|
||||
"p333",
|
||||
"p334",
|
||||
"p335",
|
||||
"p336",
|
||||
"p339",
|
||||
"p340",
|
||||
"p341",
|
||||
"p343",
|
||||
"p345",
|
||||
"p347",
|
||||
"p351",
|
||||
"p360",
|
||||
"p361",
|
||||
"p362",
|
||||
"p363",
|
||||
"p364",
|
||||
"p374",
|
||||
"p376"
|
||||
]
|
||||
},
|
||||
"fast_pitch": {
|
||||
"id": "tts_models/en/vctk/fast_pitch",
|
||||
"speakers": [
|
||||
"VCTK_p225",
|
||||
"VCTK_p226",
|
||||
"VCTK_p227",
|
||||
"VCTK_p228",
|
||||
"VCTK_p229",
|
||||
"VCTK_p230",
|
||||
"VCTK_p231",
|
||||
"VCTK_p232",
|
||||
"VCTK_p233",
|
||||
"VCTK_p234",
|
||||
"VCTK_p236",
|
||||
"VCTK_p237",
|
||||
"VCTK_p238",
|
||||
"VCTK_p239",
|
||||
"VCTK_p240",
|
||||
"VCTK_p241",
|
||||
"VCTK_p243",
|
||||
"VCTK_p244",
|
||||
"VCTK_p245",
|
||||
"VCTK_p246",
|
||||
"VCTK_p247",
|
||||
"VCTK_p248",
|
||||
"VCTK_p249",
|
||||
"VCTK_p250",
|
||||
"VCTK_p251",
|
||||
"VCTK_p252",
|
||||
"VCTK_p253",
|
||||
"VCTK_p254",
|
||||
"VCTK_p255",
|
||||
"VCTK_p256",
|
||||
"VCTK_p257",
|
||||
"VCTK_p258",
|
||||
"VCTK_p259",
|
||||
"VCTK_p260",
|
||||
"VCTK_p261",
|
||||
"VCTK_p262",
|
||||
"VCTK_p263",
|
||||
"VCTK_p264",
|
||||
"VCTK_p265",
|
||||
"VCTK_p266",
|
||||
"VCTK_p267",
|
||||
"VCTK_p268",
|
||||
"VCTK_p269",
|
||||
"VCTK_p270",
|
||||
"VCTK_p271",
|
||||
"VCTK_p272",
|
||||
"VCTK_p273",
|
||||
"VCTK_p274",
|
||||
"VCTK_p275",
|
||||
"VCTK_p276",
|
||||
"VCTK_p277",
|
||||
"VCTK_p278",
|
||||
"VCTK_p279",
|
||||
"VCTK_p280",
|
||||
"VCTK_p281",
|
||||
"VCTK_p282",
|
||||
"VCTK_p283",
|
||||
"VCTK_p284",
|
||||
"VCTK_p285",
|
||||
"VCTK_p286",
|
||||
"VCTK_p287",
|
||||
"VCTK_p288",
|
||||
"VCTK_p292",
|
||||
"VCTK_p293",
|
||||
"VCTK_p294",
|
||||
"VCTK_p295",
|
||||
"VCTK_p297",
|
||||
"VCTK_p298",
|
||||
"VCTK_p299",
|
||||
"VCTK_p300",
|
||||
"VCTK_p301",
|
||||
"VCTK_p302",
|
||||
"VCTK_p303",
|
||||
"VCTK_p304",
|
||||
"VCTK_p305",
|
||||
"VCTK_p306",
|
||||
"VCTK_p307",
|
||||
"VCTK_p308",
|
||||
"VCTK_p310",
|
||||
"VCTK_p311",
|
||||
"VCTK_p312",
|
||||
"VCTK_p313",
|
||||
"VCTK_p314",
|
||||
"VCTK_p316",
|
||||
"VCTK_p317",
|
||||
"VCTK_p318",
|
||||
"VCTK_p323",
|
||||
"VCTK_p326",
|
||||
"VCTK_p329",
|
||||
"VCTK_p330",
|
||||
"VCTK_p333",
|
||||
"VCTK_p334",
|
||||
"VCTK_p335",
|
||||
"VCTK_p336",
|
||||
"VCTK_p339",
|
||||
"VCTK_p340",
|
||||
"VCTK_p341",
|
||||
"VCTK_p343",
|
||||
"VCTK_p345",
|
||||
"VCTK_p347",
|
||||
"VCTK_p351",
|
||||
"VCTK_p360",
|
||||
"VCTK_p361",
|
||||
"VCTK_p362",
|
||||
"VCTK_p363",
|
||||
"VCTK_p364",
|
||||
"VCTK_p374",
|
||||
"VCTK_p376"
|
||||
]
|
||||
}
|
||||
},
|
||||
"sam": {
|
||||
"tacotron-DDC": {
|
||||
"id": "tts_models/en/sam/tacotron-DDC"
|
||||
}
|
||||
},
|
||||
"blizzard2013": {
|
||||
"capacitron-t2-c50": {
|
||||
"id": "tts_models/en/blizzard2013/capacitron-t2-c50"
|
||||
},
|
||||
"capacitron-t2-c150_v2": {
|
||||
"id": "tts_models/en/blizzard2013/capacitron-t2-c150_v2"
|
||||
}
|
||||
},
|
||||
"multi-dataset": {
|
||||
"tortoise-v2": {
|
||||
"id": "tts_models/en/multi-dataset/tortoise-v2"
|
||||
}
|
||||
},
|
||||
"jenny": {
|
||||
"jenny": {
|
||||
"id": "tts_models/en/jenny/jenny"
|
||||
}
|
||||
}
|
||||
},
|
||||
"es": {
|
||||
"mai": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/es/mai/tacotron2-DDC"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/es/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"fr": {
|
||||
"mai": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/fr/mai/tacotron2-DDC"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/fr/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"uk": {
|
||||
"mai": {
|
||||
"glow-tts": {
|
||||
"id": "tts_models/uk/mai/glow-tts"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/uk/mai/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"zh-CN": {
|
||||
"baker": {
|
||||
"tacotron2-DDC-GST": {
|
||||
"id": "tts_models/zh-CN/baker/tacotron2-DDC-GST"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nl": {
|
||||
"mai": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/nl/mai/tacotron2-DDC"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/nl/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"de": {
|
||||
"thorsten": {
|
||||
"tacotron2-DCA": {
|
||||
"id": "tts_models/de/thorsten/tacotron2-DCA"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/de/thorsten/vits"
|
||||
},
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/de/thorsten/tacotron2-DDC"
|
||||
}
|
||||
},
|
||||
"css10": {
|
||||
"vits-neon": {
|
||||
"id": "tts_models/de/css10/vits-neon"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ja": {
|
||||
"kokoro": {
|
||||
"tacotron2-DDC": {
|
||||
"id": "tts_models/ja/kokoro/tacotron2-DDC"
|
||||
}
|
||||
}
|
||||
},
|
||||
"tr": {
|
||||
"common-voice": {
|
||||
"glow-tts": {
|
||||
"id": "tts_models/tr/common-voice/glow-tts"
|
||||
}
|
||||
}
|
||||
},
|
||||
"it": {
|
||||
"mai_female": {
|
||||
"glow-tts": {
|
||||
"id": "tts_models/it/mai_female/glow-tts"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/it/mai_female/vits"
|
||||
}
|
||||
},
|
||||
"mai_male": {
|
||||
"glow-tts": {
|
||||
"id": "tts_models/it/mai_male/glow-tts"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/it/mai_male/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ewe": {
|
||||
"openbible": {
|
||||
"vits": {
|
||||
"id": "tts_models/ewe/openbible/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"hau": {
|
||||
"openbible": {
|
||||
"vits": {
|
||||
"id": "tts_models/hau/openbible/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"lin": {
|
||||
"openbible": {
|
||||
"vits": {
|
||||
"id": "tts_models/lin/openbible/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"tw_akuapem": {
|
||||
"openbible": {
|
||||
"vits": {
|
||||
"id": "tts_models/tw_akuapem/openbible/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"tw_asante": {
|
||||
"openbible": {
|
||||
"vits": {
|
||||
"id": "tts_models/tw_asante/openbible/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"yor": {
|
||||
"openbible": {
|
||||
"vits": {
|
||||
"id": "tts_models/yor/openbible/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"hu": {
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/hu/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"el": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/el/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"fi": {
|
||||
"css10": {
|
||||
"vits": {
|
||||
"id": "tts_models/fi/css10/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"hr": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/hr/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"lt": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/lt/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"lv": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/lv/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"mt": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/mt/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"pl": {
|
||||
"mai_female": {
|
||||
"vits": {
|
||||
"id": "tts_models/pl/mai_female/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"pt": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/pt/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ro": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/ro/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"sk": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/sk/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"sl": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/sl/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"sv": {
|
||||
"cv": {
|
||||
"vits": {
|
||||
"id": "tts_models/sv/cv/vits"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ca": {
|
||||
"custom": {
|
||||
"vits": {
|
||||
"id": "tts_models/ca/custom/vits",
|
||||
"speakers": [
|
||||
"00236e350cc84b94a6684f182acf96e68963d7fa1164d4fa56da20f46f210b2dd3ecf189e97fb3c94113a54c12dc20550508f5b7b9b37e1873898d58a308feb5",
|
||||
"00459",
|
||||
"00762",
|
||||
"00983a845f95493fb27125b114c635f3b40060efaee167d32d8a3dd040c877713446c7bd3e6944641227bdb4165ecb8d684ec2ef66c817e65e77c52cc50e62ed",
|
||||
"01591",
|
||||
"02452",
|
||||
"02689",
|
||||
"02992",
|
||||
"02f7d61edf5063ca42953b1068539f1572985aa9448555cfd8d7667121eeedc72c912d95cf33abf61a1f9620f2a01be4251a53aa5440d15849003fb31210d830",
|
||||
"03115",
|
||||
"03386",
|
||||
"03655",
|
||||
"03944",
|
||||
"04247",
|
||||
"04484",
|
||||
"04787",
|
||||
"04910",
|
||||
"05147",
|
||||
"056d7638d714a7dc1efe1c47d390d0659fbfdfc7df5249e8bfe10ba346cc76d5cda93fc8ecbeadffd4924c4f9cfb6b32c1739c8af1e2d58d7cec88b2cf18795f",
|
||||
"05739",
|
||||
"06008",
|
||||
"06042",
|
||||
"06279",
|
||||
"06311",
|
||||
"06582",
|
||||
"06705",
|
||||
"06942",
|
||||
"06c6d2e093624103c268e2cba37466147fd564bff1312a78d1c5be9ba168af4cf4819c7a91d5321d7aa9bd20ad6c702ca2cb005496dd20c45d293200b2b8a7b9",
|
||||
"07140",
|
||||
"07245",
|
||||
"07803",
|
||||
"08001",
|
||||
"08106",
|
||||
"085503e68b0772f1b3aa4de86a57bb26e3750660e7929a14a653c729787a110cc8b3704f8ea09842f72be46b6ffbb35bdb3732308b31dceefc3b33e5ad3f7975",
|
||||
"08664",
|
||||
"08935",
|
||||
"08967",
|
||||
"09204",
|
||||
"09598",
|
||||
"09901",
|
||||
"0befb1084ad00d656f45a87ad83f074c61e3b3767cf6f5463fd5bc199ab7fd4733c5f02e3a100359e953977cc2a2689bd4824ef6e3178a7108cb45a0204fb3cb",
|
||||
"0c6bf67821762116d753c9b48ebed8a2ccfa0a956d5dbf19feb0ac0bc2096154ca288ae7c5e324a3092db395cc24c64c6a4e4fb0e01429b7343cafc7ac1b2e13",
|
||||
"0d0a943d348b4f0948da443c4d020b2e690731955ce8c318c0fb72663cfec3cd3458488ff9ff9cee6d221c85771b8eb83cc087dda37d4109bbb1614039e5f565",
|
||||
"0da83aed14276e120e2581be32891bb088a22c272feb6f03b4bac1b827cccdbc8fee277a885f58e98931819e0d6171526c5fa7b2e788f68a2852e4d5314f613b",
|
||||
"0ff19536d6147f61b24d50c0c993a7a687df4d253c2052e4fa30b1624c87e60075649d888f51ce71318fb8789cb378879091aba020256d66ac19f024833c3e63",
|
||||
"125d9d1721de26a9b89d0e4f4d386e07458d287ebb2f338879e8886847abe6b3209f79e2bc335bcfc437350184df5a7d9e1a08ffb5239674edfd1cf95a9d1e24",
|
||||
"1378866a4d2b6965c03eed8e48e03fffd089638acdf1fa82ed20a9856406e083f0c0e1f5043c4d3bf67dbc383f7cd28b602eff1d8bd8bf8c1a0191dc98540322",
|
||||
"14bc32c10eb26503a4e799c3a762bfe5949d7a232074e854ecfad8139acaa4257c563a502e06a263f2d5fa8337114a9741d4a634a0f914adae74e5f9a80f145f",
|
||||
"151fcb1168f41a51c49e20b426605109e1d9dd50be2926aa9878040fe325eba2f2c470357a735e9e24b7412e78e2550a8fbd0ea77fed80e4d8f50a21f2064948",
|
||||
"1610e29603954ee12f408eaeb83a5c99781b5efe5f64fdf2e3c5e0ba9756b5b11bfeefda40a787842f7d5da653135043e3f43e7f786499cb51ad6181ef8acc9d",
|
||||
"1887c37f4187a4c3213ba4b58d0ef15f903a3720ee94d5ce59c33db193f60db416a4d9607f213ae44ca3eafc7217fb9871e2ee1837ad4cc0f34794e5c543f9f5",
|
||||
"1add23d44d2d913f0ea6e061fe292b0563653fa15b4ecc9cce0c2b83dc5a743d6d3dd4625efa112f751852c348a667a9456ef3486ae7e8c5954dedc69f998ea2",
|
||||
"1b7fc0c4e437188bdf1b03ed21d45b780b525fd0dc3900b9759d0755e34bc25e31d64e69c5bd547ed0eda67d104fc0d658b8ec78277810830167c53ef8ced24b",
|
||||
"1b8354b1fe9255578225b3d2255d5e781eed7d13ab61e84bc08be5d6465ec468c533563137ca756fcb7d3759af0ac2c0b4d00873782c7bf47ea72fd9be2f9e8c",
|
||||
"1be6c773da6334cc73e23312689bc8a5915529c905e1d5289dbfe00332a7dcb9ae97efe209635e2e5040783777409155926d231a5a5f76357494671512d2b1a4",
|
||||
"1c7af1cc1357fd63bd9ffe915745e20c34588438e1e0d85fdc8c9de4b3bd41d3d61b318b6a69862c6d64dd41f15ef3d994a6bb6c9a9dac69c891308b09ab16a5",
|
||||
"1c7f19a7fa0b166c700bac583b6858ce7adbe19566d66e530953273aae59776757aebaeb30c20a58d74bc50ce1345516af5bbd36168f443fed809bf54c02f63c",
|
||||
"1c80e9d982aa0c12db8498e8275b2281e638e8e5c684a752e19f8f9842979b70a3624785d0c30e0e57112950cae5e892bf554c295c74cf8c82c8ec33c732d8c6",
|
||||
"2256cc5ee6c617347af9a1abd97dfe80f55e6691eb0a042321d46a1bd7ce0baf1c7a4c8ea3fe4184f8638b1c3d9e83b6aa193bd6f9b49d5358523f1fe324cd92",
|
||||
"238532dddf77923ce93cf2e9ed809d088094106a1aad327e8a7b229ce24a339771e59478f7d5162efc1da6f347b44cbb2273ac9154aa3a76c7a8fc458470cc2b",
|
||||
"241ca4fdf2124f550657446301fb8dfc8bdef46d3888ce39bf9d8622c2bbec7e06b198f5e33fadbf30e477fdb37435cea36d10341af1a7d3a80d0ad1caa94bf9",
|
||||
"2421aa51a089ecfe45250bf284d5690a9994a9eb03f2ba4f43d2ad73fe78783ae5f3d3088d772e01fd1d747b2ecd6bb1bfae5feb10a72130d3952ba7304d5c53",
|
||||
"24d967d0e8b84beb3652417724be81ab83c7834afaa7b7d3d7d9591b1a2a7bb75f9b25be548a200570ebd6cc34e91306b675af510ef91cd34a77060b65b9faaa",
|
||||
"25911630ab15956e81427d3e990cf37f79490e305914a15ca7dd7b95dd4d4feb15fd94549cc005376801ce68d637eab6e19ee36017dba5c01bd0f206e5e8dc3b",
|
||||
"26099adbc4db8fcf000e2c7d1da3399662281f9af03831808d29c602431af4fc13f21b38c5c42c5ac4f77ece48448eef99f735d92cdaed857d19da7dd2b888ad",
|
||||
"28e2fe1944a593c44c3de0dc52971f040f0b8901fced2057025bdbafa1fe3b042be19618044ae085d7364e3cb38601e9ce4030329f15af7a0898f9d4c2c5014f",
|
||||
"2b59e9f830e5ea00c500b63eff4e72553e0c2608f5741d35c226e733400412014d7697bd6efb67bc61b19fd61e40f9ed70fde2589fe0b5498915eebc1c8b5d93",
|
||||
"2bc2a177bf56dcc98e05501e7bc6eebd3d1662114764299a4f9e6b060a48095b8ec95d20a5814f71343d65ee3cd2e7f42a80faa51a148005242cc5073e605ba4",
|
||||
"2ce84c6ea6aae52c449b6d34cbc095b2f2c3e6fa20d0e48b2f7d223724ae01375e92a8ad106b029f0562ee735de36d9bca6cd167257c3f68796bd8b1a0ab600f",
|
||||
"2d84f39c2cca33dd28fee650caa022c9a06407462342fae8dc256af7904cdd114af5b4cc883181407b8dcf2dc4a93d45c62c83a317d84e876cf710a521f20d80",
|
||||
"2e6ccdf9f0a7bf0df6fd0572bbb53f25378fc5333b352bc885b3a0d01e5dd672156cf697c127cc998ac546d122c547c421970a6f23dccb60bf2c841146fa6576",
|
||||
"2f92b4704080216aa80b1b39cfa223feeb9ed7c909f5b77be1b6e45fdc8827463bc4c4cd98263f02b57e653ecc2ccc7192aedd92990113433077e7ed44eb1e0a",
|
||||
"2fb95c3b786fa65215534207266e034b294317b2327ee0928be3436258e42db8f4479e86e6006979ec4438dbaa9daa05be21ddc66717d30dc43e36ead349965e",
|
||||
"30b1f81c579755895581259d79a8a5a3ca45b908b0bd14ad1c6418f39aa1e2f47cb4749c69b5440cdb92e3bafb772e19e7bc2b16d196b061addd173a1309e491",
|
||||
"31535cb2ece4710d08fdbeefb6f8f75ed093fee4cf8573bd601d960f8c6156f0fd0a85712761691e86e31160b993ee0eacb10c4c8aed000cc394cf7c7d207a7e",
|
||||
"31e6f3a011661320b2e59b6f8be43f6db2243e9feabc2b9787c1413788e13eb0e5810bed983bf7ff66e46417d183a91ed50b3b9be9d89e4f51aada72293b9881",
|
||||
"32550810ba55b9a67a25d308f0ede521f12cbf6076472ff5bd60a8f5e951c481b784e2f04194fb96116c4f001d84b3993b2c580879671de46333d5f212ff2ca5",
|
||||
"336f82b4645b80c99137018e69bb6f8138a9c8dc05a510e36922503120648625674e1414cd90d0cf46f28cbd5993ae0eaedc9994b72e8eb5242737ddefc0bfb2",
|
||||
"35b962b08846ed7d8a4cc47582a4e607f5ff4136042ce0b1adb55d1e8d58e2dc1abc5807b3601a7f7be1ada5939e1771e128fc916c1b5d39ed3619e251707952",
|
||||
"3637902e0d19f0080313c14d2c9dcde800ec6b71d493459c2f3b2cebf186f028ea289dd59ba1fd4705e53891216f7f4c36dbcb8938aeeaf142317b441b20a837",
|
||||
"3723bd65a05afc7411c2bfca904742062b7b0c081ba126e68c65d28eaa6122f69196f4959fc1795fe03f8e49ec7364863911f9b659684a06b3a126c6f1729551",
|
||||
"373d86f9fa3a127372dd913b7571ed318bfea42173b2b7daebde93c742f3224fb7fe5306085e836d20bfee4201bfe070b4c6b36510f5c9f379f6a3b610f36cd0",
|
||||
"379d321bff71ebcd34792e8f4552d341f30a006b4765f8c6de4fa98d3ca416def88adbcb0253f5849f51793b3d7ca7e53700ec70b5a97e84ccd1f35a2a1fb6e5",
|
||||
"37c12c700c95dc0028b3b82c4cf1fb922d68680c35fe84585bf22674e71c4dc53bd9233ce8b71bd31e9c9b0e000d01d195a3572b9055a73fbec891b1ade250cf",
|
||||
"3a4a32c7cff18f1896e7cbff2c19b4e6f91a95c1e7aab616722600ceb36a86b07fb0e1e0c70cc285dfb6192b53cb67826698b7f3f652549e27a969bde0177fa7",
|
||||
"404ecea5ae8e5f4ec3d2c48494cf7f1d559268542d8f1f7928da2fcde55c9fa3f491ba632f555ec69e8c9e819072df450add7e5886cf5527f446b11544af7d05",
|
||||
"41e5e21b3a3b0c8df01ba5b3c3e6224cc4082f41ca87679344b0273e2216cc272e19426c160f5a9580915c057a3e4000788be6cc7a6f5f346cad5068c7884ce7",
|
||||
"464d9ac63f7958200bc09a141171355bf4f3631d66dc4bbfabd497619a8f055c034c0752987944b2102e02d4b435bcd3ce0527962871112049e1d26865b776e9",
|
||||
"4869d94d4936ab700c5e5bc7b666177b53220082f5f221774b5625d7275cd4f117482dcd1498674b7f885fa41d86f99b8d00b6a6f641829780946651f561fc22",
|
||||
"496b66c9cb705a46cdfef9eeaf29c9d738a4b70b601270985a7df5a06f9e1d6c56be0982995c8cc06902d0ee89bae201c37a91f568331ffe28ad2d150e183fed",
|
||||
"49a7654071536ed5882b8b6e6d2e3558ef796ecd8aab8ceaa24ad8bc9f3420b528ef1413696584c11facd6d5bccd37fe8e274b8c3d139dc251ffb11c3a503aaa",
|
||||
"4b6c7e4e9bde35c471cbf5e2e93b2eb8bbba52b710acecf99910af08b3b35365f24d883ddfdd9825918c31477a5f3fc48f075080c4e97e80fecd6e1936bc92eb",
|
||||
"4bce212aca40bd1834bf741e47954526a8817ecbff8fedda854dbfc2d033a2567bd34b84fa02c3d07855f3dcf413590ae75ad6edf261d66bffb84d77803a7b76",
|
||||
"4cedaa8d96436fd0d2ebdb61d616790a3cb3737d0a93d2ae41d588137c0d3339999d991b7b3c452704be1f5f512ce5a08c0971898fad0ad77f18fd623411cd7f",
|
||||
"4d7e2548403c7e04d809030aa25015c9706e773517e1f72b81bdda22213aeb8f542cc62156bc5ef1c1622e99227fedbcc9c1b3e5e147b854e3b629f8f78bd158",
|
||||
"4de9f262eee7ee7d24ef8933af4610a1c5b97ff055c4fd0f97868e338a017308d460f4b003b74bd2aba7789153593f3b986b814fd93f2e4dfa5b55594fb17c55",
|
||||
"4e5e58a6ec7d9cac969f99b817f981ab7f8d2cbd9ab9dd0a37e45c70a8a8ca3b8e1c43b2013082062ffc1f4f3b268ea78ebb88d613d026a6312f40a6867a1d0b",
|
||||
"4ec8f1e81d7abd9d2dcb3dbd4be86b615f643386f3b1098c37a02a103fe6b36239c05bff6746ce568ca81765b285c1c271af4fb1fd99120341cae2851b776bbb",
|
||||
"4f57d1abde3364d91128e682ba724e6d3bc2ed6b112d2cd679739e478ea6bd671c527edff64c6a7b5c1173f68e02a410f09c2256356fde7d517908310c118382",
|
||||
"503dbbe83f0154e9bba4bc685bf1c1fbdd27293d0e4f837947910e4d320bc4d5bed1ade67a45b541013189a2c133f6f9f6cbc3566fad220c0635f286feec74c5",
|
||||
"51795e8ea8faa28e88f02559f6bdd47d9a0735589d47dd0f2e057b8b01fd3667fd9fd29e2613f200174af1d4b2d3d0860704cebebf2b6e79f1724d6782d7a270",
|
||||
"52cfac480c0cbc60068305d983adbf98814d2cfddb8be0ccfeb7c7f95bdaf31a5f70da944cc2453e6a5fbb9bb4092e36b662e838762855fb016f55e6e3d957c1",
|
||||
"537e815df93312978a9ba479ff2dffc9975c875950a203e8a1b7ffe4cb06625964f59dde1a06b87921a2a91702cc6bab04e159aec7cb2e2fc576cbe25838df2b",
|
||||
"547dd49c2cbe113b60c9df4a8e8b83a532f0da054cea8f1d23db66cc2638f7b5edfee820b4764646be10dbcd05caa5d71483477718a73ce8dfc752204807d9e5",
|
||||
"54f344faa37da0c9ab1ab563735c532ab81fcd1c8431cd8eca4ab7a8774f194e1ffe922547ea42bd1fad36e7493761992eca4821138ded1a9580e9fa38685291",
|
||||
"56071bfe30e977f201fa4d6808f8d7c2f3e6788ba68d12e2eb18386ac2507bb2bbe3c14bab90370066bbf6e2af42afcf1e45b362dba958d38fbc69cdea3874ea",
|
||||
"57e5f7cc5fac058f7c772eb41f8d49bd0fe3070c41eef445b1c073abf9b1cec451aa22764490b7da4c5bcacf4ee453c3153158cb1569f2f9447807cb14dc1126",
|
||||
"5a9a6481f1365def2919871790a95fdccbae145640f3b4b5e11d1d1370ed35c5a4c31e402b3b438892a6bfd9dfbbe2fc97056d2cc24f2ac412b3a5e1adb7003a",
|
||||
"5ba168675a3f2ea8d6d51896c5db84ee59ca65359b1b97e6d79543a6c918fe427f8b6cec79037c452eb086debe1d57049c25481d61a873f0503703266bf0cb84",
|
||||
"5da56ed896575439b7bbca20981f0b50618958d94f08b8f47d13774dca3990d4c571be1f4aa2786bf8fcbd1a594336be49cc26d972d5fb0c0682ab4d5b59d19e",
|
||||
"5ebf04dfec6c9b10a6fe7fd03725901973565a13530c20d02b4332670cce9beab185dd0b0f61f4a87f9c3dacc307d06e062d640f6550d9443a4c06a114e5bca5",
|
||||
"620b0d4c3be90f5f77f0cf9f976e5d7f067689884dc857f2b26a6edb40ef4fd2826213b5028900b168e853d036f1741600b236e04d8dcae5fa26cd2b8975ac04",
|
||||
"6323ec0401b28c1b06afa76760b478535101ae48c6c9367491087143287d9ff76b9c00f39dc838cdb20d65eab16622dc85143f5845791bf85705cb4f20975bbe",
|
||||
"633e7303eae41420e558e186308510783f5c234e9c639c0e6f5b6d37fca6bab766c5d475b2f330910bac93cef6982124e73a1b6bab1a2e99a2e5d797f8547c6d",
|
||||
"6688b60c24d068e19487c0b88a8b0a256854d8090ebebfa9a462fe49a77b8e9f303aa02042069cb0d6f227932cb48863758d0b57a18d53125ad39953bac543aa",
|
||||
"6745c47d0bd557c3dbce201697e8a2fbaff9ca52744d6007a636a237b82d1167795a0c0e2e5eb71b7460ed16e3fdcdac1dba1b7a2910d5168416e236c93ccb76",
|
||||
"6892c6ba9f66d0d7aa0445139081dc82a76d9ef8c7bb049a8eaf090f76c06cb4f1db05739038d7e04167569bb6d0fea55fc15343f7c77cd5a3e2d4c5ed068290",
|
||||
"689a213fd2d66b9d3634c9165b316e49ec53ac96131be42226d462ef1bc3ba38651e94698fd6e6f5c6d6c834d2b9a6732be54a8d6273c1025511d795326ffd3e",
|
||||
"696e8808717101399ab7ff16382db411adfadcd60c6a525539b0f8f88d84b448662fbca212b175379ff78ce7b2e64aa4b4e96d1820ade8eb2f742295f744db7c",
|
||||
"6bdec6b6f7e6b5a187feb6537101d90cca1043e34d53e347f2f0b14e701585361fc4a4cd81577b6d4588844fc8bdba8af66155d9eb6c2eefd461e23d0b2b87e4",
|
||||
"6e5948f904b3048511677d23d3cc9bd678739b234170302e1556c1bd1db8cee4243bf5e012a1320b4c50f6276e05cc5f620c461f640ac7413c23524f63f4aac3",
|
||||
"7115c00371f891d0094a716083b978948431509a16d5a9598e78ec12712db46d46f1674312cd31339e2d6118cca5f7a3f82ec25dce861a059ee31d832cd6dcda",
|
||||
"71b67ba5ec75978632136441a25426dbd48d4c0a55c1a5fc91f0f952b6bac06ab0d9709f0a7bb5a05393499135b76e4d722c7065fb636a227ba58c7fb86438eb",
|
||||
"72a3d5bde83f60653937232cf4d29218ff5988533855fdbc804d9bea7e94eb14a8afbe36a8f8ee576a3ed2345632d4ad36df52efdd9adbdb60da6f890074c6b6",
|
||||
"73d3685f3e78183724e3362f6c4288d522b54a8d2722197dc5ff5006974c1529dc562c1cbb05f023da4922cfc04340eb83b887c5343041febed1ddc44b22f9ed",
|
||||
"74a679bf6c4a1b5856a25780496812416383f0567afcbd9b411ae9a0abab47d466741bd925b03decd7da586c6ea9589c8f40208ac2a22fa4413d4ea6e1a6f0dd",
|
||||
"7638395f7d47fbf631633e2b899044e82e7ce0e07305114921cb0696551966b09993766782aaad70fa40a0f7362be31940381653c659fb73d3e1fd1fa45c257d",
|
||||
"76383f56d9979837d4b3348f9f28877dd1ba58c1bd0ea839bdabf021428c2edfba46ff25558004c5183a73575eb126d4e0746a40e22ab15154d5d6f238a48ca5",
|
||||
"77cd12af0a3d1d8cb64dd577bd2d50ac057d816694e8bc04089a6adb90e53ad6cbe9fe6aaf52596450e0c8178d8f9b88a545b27adcbc89bddf4d7c4bc4dd31fd",
|
||||
"7834da277192e9434b0c039272ee6b3f1b225d2f975aa175fee762fb0d5f16b1edba2e0dbf11d8aa2bab5984482f703f88f0e9d1786a7687710ea2688f307ccd",
|
||||
"79a830901c1bb0e27663dbe14d13df91d887daf0eabb6d3eee7f09768212afcd9cdde458d13042a9d2aa099f390c79ab94f2c1ca47fe0321f6c18973e437cdd6",
|
||||
"7b7593f44cc6f9f7b21495bca6f3d564f73f36b97ee15d51a783da8141463834022996c55e494800d21304079aefa8a5fe64350c9273e0d36453b097b2dcc5f4",
|
||||
"7c7d917d97412c24b76af336086469a43013d1d6b27298aa82c4e99b3f3b6c5a82014428a6a14b080a834382d9b0f178e405fb10170bcd340957955087698e19",
|
||||
"7d19dccf48114d3ec00c45fe80581300faca042157d6c9458ec439c300d8c7b1190aa70eecf19f8b1d5af8c7291f3da08fc635a7fd6acc7c5b203d1e226589f5",
|
||||
"7d8d6fa22ff724d823b82499686732b7fbf32f7c1f35dd5733ec3b65fa9625cf2d49bba86e6e0132252bea64074f35ff96a77bfa44441aed3fc1765b13cbc526",
|
||||
"7e36be2204fe367a3798e1b2ff988779890591e5c997b1f6025ec8ee1fef3eb19e81b74bb8657874f5a990d5062d6c849621ce363c4a9c2c5a63c0966be6140c",
|
||||
"7ff908cc2a18ec5a80e74fb4a2f12b406f0b7456ac797d35091d618c7ee991baa88edf62200817aa27732b03d9109cbdc6603092822b2e13a575953045b1cd0a",
|
||||
"8154716e77acd0f5e912887facffc7b2c9889891e863a39fcfed1e5637e47328a4a3bf40bbac1e740629d3013304ada88cf24dbf3735a7aa2d4b855f813c8fd1",
|
||||
"8162d651b6211f06f655a69cd7fdd383d6b4287e9ba132b9898ef9ac8687349e777626333d23bed93f9264aae965efb14ed650cb64fd0ad90494aff903eaef11",
|
||||
"8348c81a253096a9def0b472a8499fc03ef8c6c6d3cc9b4a018f142501ebd04c2479008b88895e033eb83978e7d71e52a91a2e324ca869ed8f2724dfdcef269e",
|
||||
"84b101db8d076398c1d624a8b38b22fbddbfa8fcc43ade44619f5a9b6e70daf1c963d6dc09ea039cca94ec56ccfd04a1689e806c970c0bd32cc9e56b73c7bd7b",
|
||||
"853fb95e0f017c203e08312e3ccf45c0419928e08313b1bb0444aa4ff089550546e67fdd3434a22cae3f67603437051e49be5c4d8fc5583b1aa6a1ae36f0a911",
|
||||
"85c9e13ccfc0d67de10281b04257d8ac0c256d2f9415e54148fb59954c0d43f66d3cbea43ea6389f8407a8bda8b1b1becd30e41dfbb3dd9bebbe69816d096fa9",
|
||||
"85ea0b349a8df04283c62efb571d2947e7264b566883e300501086733b08efa42ced215bc47951c8198626f86ca8c0df730cdc35f4d99ffff958599884b68e51",
|
||||
"88673d4f24d039e89c15d9ede6b653e41e42ca8bd7a8cb7e92a4f235e9b56cbad6200f8dc313c644e9a7d8d1dbc2b7e988da93bc0765499701bca6bc86d8fe3d",
|
||||
"88ec4ff5a1b0ffdabfe62d068286c851ee64c428883e56f32af14b59756d5846be9d46e5a777c4c22f2dec9596a1a44ed3aa75f1fb0231923cbd4ab59f1f9c47",
|
||||
"892bf89bd3a008a7d982de0d278349e654c713efacf965e88e46a12398375cd8502711378e378c39c33b2f995f47799760c6e6e05948b93c0d2b9fd427854ffc",
|
||||
"894bd433b4b06514195a604961c871649e108d210a41d5cbebe76f78cb6270b7708d1c59d6cb88807f882bb154be1c9058a753e2b6a95c3f4ac9e27a02036f12",
|
||||
"896256329fbeb5b8116349c31d8a39a7d36d5f970d48558e1db5417d611e240e4dbf473f6e49137f7aa6116394b7deabb0bbec4a014896cdc9484ee91458117d",
|
||||
"897c3401b4a35d8fad5966bf8c4dce6d94837c76e46e8131a8bd70527f6e1d8c9f59053d0a56425d7dee71939280ac3c38df14e976f613cb906d7187d6141297",
|
||||
"89e6f6a865ab743936a9b29d53b67bf4b68660ccbe834d4a11fa9011edb535e3b7b4d7a238c84971d4cb5f06ef7398bfecc4f2b786200fee67d7307f242da565",
|
||||
"8b707d4f8f32c80709d880fb257873915033c7d5bce9589a80ca9437618262c55dbe8eec2e8c82469bc335a84a8f16f89afcc53b8329dced5407a513927efc4c",
|
||||
"8e98d00c5d110856943461cd85305b0a817abb457c2afc8e89edb32e502d0060081c8e667d9fceb63a2f8efbcb6e193e9b0231afcc05ecb2303d7f742f304396",
|
||||
"90bb7c91281bb6625a0700c1ee2f3cee488cb9c1864ccf2e24699c5d957b1b7b686574d11acb37572fdf18a15f272fd44009b6cfce9b6cdf9025dd5002869d30",
|
||||
"911c26cf828319df5123a9cf38641704961a6b894aa6ee2b0d13409996a93d89f4868b91e0eb1efea907a70a14cf3a3bd8935033aeb03bd8555f2dea857a48bd",
|
||||
"92862e616dce7469bafc507ab8fbb47bb6f5ca8b96b05e9fbf39a259d1d4c4cac97b0472f713db2e5fbff0d3e587e7b34bedff80cc2a70c446becf9b488370d9",
|
||||
"92a15e2cbd0c89fbce36b05e3b282255097bb5492fc11f0d2b0a08c4311621a41ec35df201de51523b62189a3b44bb3eb1cbdf64e80f4a543d0d9f9a99f9bd3f",
|
||||
"97679def7032179662646816abc12f74fc693fb02c43675a2d5407e58be6dacd1eb483d1bf46f66c5103de3a649211c29e1127dca473e13b02dcd5e7df719cc0",
|
||||
"97e29f9edfe712b059203de5af236569e2c41fae8cddfc7b486204d6e30c411ec605c757fa5a1a151646092bd5d71de18a5f2d8b6fb74b9a28a7c7226a4f641b",
|
||||
"9b5f9ebc961424b8a6b7def59a86ad6fa6e45fc9ad5fb251c15d4d09202e6d3f63bb37b80faa4fdfe3997182079988d78556a9ebf7db535951a1e3cba0c0f6c9",
|
||||
"9b847b5006ea1b47dc0ec366d09aec4a67aec747c55af554c094994fe8c8625b09cfd5322958c816bea74f725abb3d1403f2e9336007db3b257949401b1fef03",
|
||||
"9cdf4ab91c8ef6148dfd724f2a2c644cc00df44f5eea5035e760ac59ec79078ffaf3d97a9c5a9747c04895a3dc666339f82cd17e40095b9fd055df3ff07da6d6",
|
||||
"9fb127fbe4659174b52ef61778a705cc5a96c8f136445bd28c10ac79398ab9ea291852b627e285e828fe37aa23d05b13cf202f3f0cb4c272aba94dd1806802c9",
|
||||
"9fe6ba948da2f4e4aa0e1b0d3e1aec1f093335f8097d7dd3d6b5217cd539f5c41735ef7a615d8210f2e6b777b7198f151264ade172be7dbbf5d442bf91843e8f",
|
||||
"a1afb2eae49546bf59e6f9a1968287add54dd6e336ec795037090a435f736b6d8ba2076e05e27034979a8caaeafce05fc6d9d5541f4e5a4321e64106dabd1549",
|
||||
"a2b06b54679145e65ce10a8356285efcadbacd41be817d2e0858ddba59e638775b79f76cb9e4ac5859627b67ebf227c55b51cc48e6d0d7ef41c9845d96ded68a",
|
||||
"a2b503bc78bd0b68fcdc3e3b68e3c68cf3da8d2d48d91f09313c7cdc11b43dd4d4de3a8a2c4b526809adf9879427c4818db72cffdbc2f0015a9fa5ade83bd400",
|
||||
"a359c15185b6d2a402dacfb7b3dc2e3ce5fd80a1add892b2dcf8e23bebe57f16680eebf7a851c3a870d3ba9932c4e42bad937c4676931d849c62f021ba812860",
|
||||
"a35dea43a67cbd18b705cf2b28114652686eb409c1ae1e56c04256fd902ba9ab52c7343bb8b162522bf3442da42431246644432c70f819ba8617a723abcce836",
|
||||
"a4b1eb406ff2c349437a5634148365fd0eecad5a264036e3af171d0f6769a7129590a0a3e09592038baa8bc1292af2bbdbfb74e3b1a685844e263532a87baef6",
|
||||
"a4b8fa949865e0aa45147a27f0a034a26e34745d624dcf0603dd25fd1ce279eaf2d073a853d67e6432447d5e06708d71a9cddac0d2918876d2d3498af3ae0892",
|
||||
"a6bc3c6beffd4335228c3b4857365215f0c4bc5197a5b0eca95334af33dba19ebf8d513f6c75359d7cb678b051d96579d73ebdaa5b6906e3b6eab35005bee13e",
|
||||
"aabfdbdc21150ac70f9bb1a34f4d7de570a72ba7e1afd8c08d64c85e00c12e6ca1f2ffe60dbd16a871987bd7aa47182baf57e7f68daaf0bac7fc3b907c8ef4c0",
|
||||
"af506d21ee140905c125e61c19d04599354fe84fe211502c9c766951387f6ff79e80db0658392af173f37ef7c92d7815ac9214e8ba4c6ade3e7a7ef014e5cc08",
|
||||
"b04a1d5062f2921f39074e4f5c00675269195834a0a9c0bcce10b1427bf8a6499bdd7d8c6717f220aa4ec9f590bb04b290673018528a60dd819ce9798b0a33b4",
|
||||
"b0a3c5148905a3e7e18c773684026e4ccd8811c3c62f6fcfc23135686a8db9c2caa6de7b14775e29b7cdeb360ae25ea626381c7689ade892c3fb72f82e2daa89",
|
||||
"b1a0cbb91459433ff6de32b189783a734c2ada4c04d7dd164de449ce79c749d382aff10aa9ed7b4449af3390da51585123ef88719ecf7cfea9c24223023a23dd",
|
||||
"b47a96b489f4dd851c364dee278699905f1ed933ba3a98a6660160463a8decef830bb91ac0a1b4f9b742df2dfbdc9625ec27133a69f6cf3cb81ed298183764e7",
|
||||
"b52e493e5049e86223385546f3407f5924fd75311a0a11af38423b7bb7c02c3f085fd1d9188515c7b43c59fbf168c23126456dba98dc9c0d29b7a3edee159015",
|
||||
"b5419f6ea89dc32431a7671df1ebf934647bba5b27db54235fb1e47d691b70c3160bf8019653d5faad616b169adfea5d8e7077e9820d9294144354133d45ee16",
|
||||
"b570d19edbda421e0975056b5fdf4cefbc3825b840aacaa337567ec1aa151a81633eb645a86c8c1c22b23e7f916c60c20cb115de29b670511fb9413611e8cc3b",
|
||||
"baff09432cffceac6ecd395a8ed5c947fdafe6c30c1c0f3b83c4ddfaa2ca9d57b21876153ac2b82067d7d37bc6789e2f68558f1f26fbcd53fd6a500124f80655",
|
||||
"bc0b544f1c13cc1d0fe15b0eab96e89e6d4dfc8919de1fb757ef97a7d5de9efff5e520def5a8471b75480fd49d410d222ed9332089bd527946c74070e8ad1934",
|
||||
"bc3886ba087d3fd637a4fa85adf33170e23b369c0c6eca422ddb26c73c04ae467e2b95ed73bead19013001af65bf2cf0d686a6e702b458a77068184c8b17dfb5",
|
||||
"bd609b6955a6a35a5580a6e19e173b02fa6d4ed880b6cba8fb5d2fd91309dc753326a824a47ee6148b3d6a01b9b49ce7c1122b1e30b6ea181bd257bbc38c2940",
|
||||
"bet",
|
||||
"bf64f21ff129fae4bf3ff795c39df0a4a6dc40ece1d71747a913dd84af2e4cac4e1b84213e23cb1397b3299f26b1b6302a3cdbd41da8baea2505febd6e1803ce",
|
||||
"bfe8d96ce71f9cce7bd16b5282041c66773405f1a11f4f0c8d3b6e81646f262bdac0cb3ee8f54e13175ba9ed7da38407e8a9aeff20972271f0c62c0b19f8b644",
|
||||
"c088e98f02d33581ac0d79c37a101e4273e0750a5691cffd96a09c38742617dae948cbc4affbff4ece1d611e44ea5539f0597eef33ef39f7f0e3ec2a5edf75eb",
|
||||
"c1bafe50eb70a1b65188fac549c6bbe7f641b672fbe9fd08cb64ed1f176efbedeca88f5c295d508e2dbf9b495fe0040bbbfbc4776af0d6cad6576a997db3e4cc",
|
||||
"c1e166044d7731207ce8b838011eae84814857a8ddb63b8a393d2497bdcd7e96d045aa229a7978533646cf9f9ea99a619943599d47a1558073690601fb486ad5",
|
||||
"c21ee36416076c1929dd93af7e936e371d4fe263662a2deb8fd6b0e5cd5b8cd86437b4afb2faa8813bd7b8689c7f56a63729a1e666684d8303f469faad669e54",
|
||||
"c3f1018eb1f7b5e5c0210deab309d06d3e8e9e15ec7dd41d2dbcf863c39e36955b2034fe44af5a4983285b8fc6c0d92b092f95383f8989c1d75a40a4bcdd3d83",
|
||||
"c4d740361d5f6bdcf408abc029d8adceb35f06c332c46fc290d187d96562992a8d6caa562eaa21643c346d44c9e706cd991ba986e53cfe37b41a0e048d14d6e0",
|
||||
"c5d4c712e06053bc35bc6cef173daaaae7fd47db5ac812b95a2f0f08374432ffeaa2b49a0f10cb60f38405d2459489df0e43fb73b48bdb6caadcb4405915c33e",
|
||||
"c777d3358a0aff067b64f254ac462fa223a1650af20ce2af341de610eebbb55a128a1dc43c91da7a1844848b5920b7dd5c5e0a1e8651d6442a2418709dad8c87",
|
||||
"c96c4e97012d25add2fe69513a5b1f941fc36c837737780c443203c72182b808a129982ebd64aaffb8eda4ba3c8787fd98ca55fd33f060f63917567446417574",
|
||||
"c9774fae6c0a30b456a21005abf026799f370a12fbcbc098e81bac2456955320ec6e712f1d6f9d59a50d615f81c6284785292180364598987a7990ae83c0f0c9",
|
||||
"cb557116fa7b3b6da35024b539795d9e255c111c06edbf0e77ba728dd352353182c96918c649fb9327bbb4fea1bb25affcade9b5069676b191611062941356e7",
|
||||
"cc3b30ba0f733abfe64667838f620c4f542db4665fa68e4d945b75ac0d2c435e6529e6541c4ac8ca18dec753b10e3a5c4614cfbc658dc951ab6cab357e6ef363",
|
||||
"ccd85fb40538f948396a4c2bf381ea591927a7cde9330ecab883cad5bd59db56f0c983362f9d0a8e88a67d3f2bae2182bc8ea94b4e3adc721c782ca5c801e2af",
|
||||
"cd1226e73c8275de15f2edb3744a413277fc76a4ebb7842fb743215c14b405b96c4e64bc8324feafe58937da218a1b0aeb9451d5781672ced1ad68c31eb54ba1",
|
||||
"cdc5df38351edbdf7afdb3aaf0b4f53253cedbf3f43d662548a432f86389505fd6f2f64f51f951355f4fcfc5718a98dd782e1472246556c87f0bbaacebb38cb8",
|
||||
"ce31dc5dfa61834e3ab67925ff5f24baf04b4aee6e35cd8ffa524f87b2e2e094999f85c68cc7a1c0e9b19016d050c1755406d02f7116ef85afa355c65a9a5855",
|
||||
"cefa12e7ac99a5d11df487ab6521837b11165246d1c3cdb2108770532cb1429c2dcba5262a4dbd9a37686bb76ad1c48ddecf473d807c2e552534b24bb78ee30d",
|
||||
"cf5b890eb74b4ac647d011a989a92a413c23c0db580c87057fc5afba2d83dd861f2a8640fb952381d090328d6278dbe56713d516020ce95cfb6d4fecf63b89e7",
|
||||
"cf8c583b1282449a97b72e317e56d5a4d1432e5420148a21ba8fd8bb2a172c7832379f30cd6582bd6674b548deb8517c8915c5c4b423bd3e73903f71b8862380",
|
||||
"d0cd44fcdae652efb0dd428cd1b8f1911e6eb2ca3469a1f2d6f9faf97a9d05e30f28387dfb81bfb4c97eba64187a0c047c85bf06998ccaec58781f3982626bb6",
|
||||
"d15bfc3278de168872744ebec8fc7a07678bd04b7557e89749eeedc7087fe0a36cb8b094e978e979d67feba46c4a2741f0fab18010796b5ff436836a5fc67e88",
|
||||
"d3d64ab67746fcb7b4a37d6b6b80c9d4b11afd9e15d81a60b3fde53e4f99267a63b50cfb2184c7c84c9f0dd4345c0d929160a7df52698a82603c112e0bf8ab8e",
|
||||
"d647b73602a3a0c1b06f282a612c29eefc6a7e372bc8af212a41f481843c23a975b41ca402f06ecb7dc660d4dd22a814f7659b48da7dfd28c02a319032394da1",
|
||||
"d98d182c89b465adb0fdd1cc5c2bcb22b81fcc4eb941977b667de22927ccc9a7876033008118957d803c83afb95595986bcc076e77483dd55dca91ce253ba010",
|
||||
"dafd89491990553f5e22021f96344b3bc92be6a419c919ba78860876f226e51e668dbabcb11cf9500f3bd05582b387907ea007b5e8f37c78fb71ac819b9bc20e",
|
||||
"db6932752693a1b2e7ef9af4adbf6fc8a299f21965ff9ff52b141563a471600df9308a89562af7b664b7fe14da134b4f44beafcf910f8794652e16dc475796b5",
|
||||
"db8eecd1ac9b20918e31f04331e46007f367c1f6365c9c4abb7af70eb1d2ea12174375fb95d1d11c46e03c81976de6d68f70693e1ea7f2096aecf06307a17d29",
|
||||
"dbe9efadf636bdd82f3ac2b3710653421e7cefca01b74012824b73f7368469fc4dd7e788b047920d4b3b7e4a486c732872ca11a75a89d1323337191ac2bc899e",
|
||||
"dca1aa77f919ef1000d91291ba68800340332c299e3c4c6bcabb41fd2305f36db353211d6ac691c37d16889e3c3ffc1efb7c621e8040cb77b7249e264af44768",
|
||||
"dee065b956b99b10db4763759d64c41791af1a7e77f1864f90a2b0847a12633dcf9bc108db7eaf73cc8d0e750f5c37383a56cd77cc2276d3960104c6bebe6346",
|
||||
"df52eb2c24a6c35b977a1d0fab336ab5c21cd84f78f685d5f0bea9ebaa7c078c0ca69717455e29f17bcd9282a1af9cbbe2d3e608c62cecf868419da081e2d810",
|
||||
"dfc8721858bd56b846473eb6123420a2735fc69cd77a92a1d2c623c51eab3ac664d61a890d305c6fe77ec48f2759248744e9d56689f6c22317bbaa316c848fbc",
|
||||
"e249989b0c397ac03583594a3911c9e9222ccce620921170bb39b8ab6fdaf136b164f3c9fcd8b4f750fc469c9cd69f144c2ca2dd918fcb778148fbf9751a869b",
|
||||
"e364856fe22a5c80cc8d13ee445473a0eb7204bad6972fc4c116ea1551b50da43a01577ef0487f2afb7aaee4b4155d61b1ff2b83dc502363929de76af0226818",
|
||||
"e37d85b60af58cc03e9b36e09dee5e8308368f44f91b28455e7f645a13fe29902e7f7d594ccb600e02caf4202a05d15477d4ea5191c7b97038ea06d73ce93c33",
|
||||
"e41b679ec1446821bf0a80fa7003fb90ac66b79d09c00dccf702a1b254f9ea85a68b0643ecd81d999413d5814b06b9998afd9876062067f51a63747533921d08",
|
||||
"e61565e75d632748413d51997cabb00613355f0a94cf6b2f929fdfa351490d2afc9bad72c7fa67595d9d9c7adc9454e8d1b05527991a17258424b14ec4e9a1d5",
|
||||
"e6a64aa839b95caeb74d810677a33b747e23907213719dd9706af7364b4cacf204b09f9b26686a70cd6d416a6b590f87103cc683685529968ea0edd75107f649",
|
||||
"e751d2f83310990aedc7392b54f827afac1873e9f8861e625814a8d1d15776160864742d557796d07a612479b2886287b417273cc9f7718889216c2ec3b3b7ed",
|
||||
"e7847a5814b865bc043600fee7d810b9815da389278fdfdd412114ab8f87b1536f4b63f3f7c3d3eeb097486abc152043eefdae6fd12c2f8743dac1cb668ab136",
|
||||
"e82ba384934ac4780595261c43eeceb3df29a047087870f5da13c7acae782b4b97857b98852ce235428b4bc24aa4ddcdcd7297acf683421201eff1c3fbcab84e",
|
||||
"e9da05b6d590dcf94addabd168c543be41a2275ddf44f6f44db1e3698f0bf7dd67f2e93b66679e0a0d42a2f39f3bc6a389f0e6b362431d0cb197fe46f9dd6606",
|
||||
"ea8456e0667e1cce6273cb333b7e6982f9aa0f260c7c103e04eb0076a73fe3497070b1a8f0c45b097dc3100a30254095a1c63e9514367655e9a378344ed25d1d",
|
||||
"eb415e110eaff48bdbc03b5ab719f64593f222b4a1d872b552e4fc48d338e532d1954f76e94813e44a6cd030425b4076cd7b9bf388b870a31344545d092dfa1a",
|
||||
"eb5078bcb64f9595d6d8589ad60502b2870f16942fbb4cbd2483c817c7fa460faeda90b82bcf531ac96be8c1d6825953ab85ab0bd46ea477615e71e50386ffe0",
|
||||
"ed5c9e654bfb28e9d4131b3805597ee9fa14fe72c6e2a6d503ec2e47faf396bbfa15ce49e6fe83bd97da1d441138545d388a329ae888c1f1ea44fc62996d787f",
|
||||
"edba91511ccf8ab01de2e2cef34c47d8430f8a2f4c62cd66c42ecb62da52d396e909aef7da067eedc58e1eb58a1fc3697939371e6a36c931af5987a50509854b",
|
||||
"ee216d2d13cba1a951445b061771ab0c97eb3c250003e16008debd85fa0317a508f923db79c796dc29de18c83baad5b15651f80db1cf7aee854e6da28853b742",
|
||||
"eli",
|
||||
"eva",
|
||||
"f1812dbb566edaa2ac92121641e5ae504d647bec835a02ed5d7c7f90424d0e8fe202846a599c2f74c49ec9b86181d3d6c50ac0688baa9b4c28608d592becdfdb",
|
||||
"f26a63e5171e2935e13015fbb755f04bff87fb1767ac91aa3481b9fe13b54cc75f772b41dfdc634829dd9b44c7b08798ed114046ef981d454889c41d4f6408d9",
|
||||
"f2f359ea473c07070fd1e50d2fcfe3dc4f624f01678c35920b079660b2d5b9c1743259ae6129992cd3b99ec2cdda94a45e8710888488b196c6cd9c853e86e454",
|
||||
"f35ce011f75fc01d153a94339aad24ae4fd5f181af55916a5ca0153cd5220ed199b98459eb88e9f4f3a4f8fbcf5c272bafdca35ddaca0827c4b480f79e7db1d6",
|
||||
"f4df4a067fec667827901fb55acb16acc4650f24eeaa588af1a103e5009e9166f753c7cd313d0d3dec79abb82a13c43fd2059db5ac0307b78369ca318001c4e7",
|
||||
"f56a47b89ebd2d22f869e2260b55f70d7ae0d499fc3fd4dbcb0e6e507f12513f29c004b9426e428696df0d434e4ad467f143bc620a2f661a54608de9e2c265d5",
|
||||
"f61bdd3abb2d03f07e33bfb0b9fba46069468cefd9eda04e77cdc5c2f13a417716d3e60ca91c39de1a480b72112ef0e6143e927fad45410ee252cfce9034f0b1",
|
||||
"f62196a11f50362b35eb1ed830b03c18bb187e4d07014a3d1b238756fe836f254afa923184170512a0c6d990032b4b1edb25dd2b74f6fc15f6ef6b51b6f82dd4",
|
||||
"f8e4bf2dd4f93dd473b055ebf2dfa6081703014fddca40a0efb6bd5dcb702244a30a2d3edcd6597ea4118c20258da575a0bc69a895356519d8400a5ad3b2bf58",
|
||||
"f980d152d5c14c6e7557f13fe26305ed0105dbb23177d455372e5529a5d3333e203070e87352d985a136f5ce3976a16b97070a4343fb4cb9d0760d9bcd5c7677",
|
||||
"fa8641fb64db60e7299f070f6497678dee0bfdeefcc22a51ca328da34b33fdd6c31b882d97fc32cfcdeee4fcb72b05d7eae43b10b531db161b7e8dcfc2775ebf",
|
||||
"fdde8cdd2fa5689aec75121e3c0778ca8c37238fd6a64706d85a4156d7735c482f1db74cefd023e94587b64a56d4a06e3b7fecf5c85978a4c777c9eaa5c633fd",
|
||||
"jan",
|
||||
"mar",
|
||||
"ona",
|
||||
"pau",
|
||||
"pep",
|
||||
"pol",
|
||||
"teo"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"fa": {
|
||||
"custom": {
|
||||
"glow-tts": {
|
||||
"id": "tts_models/fa/custom/glow-tts"
|
||||
}
|
||||
}
|
||||
},
|
||||
"bn": {
|
||||
"custom": {
|
||||
"vits-male": {
|
||||
"id": "tts_models/bn/custom/vits-male"
|
||||
},
|
||||
"vits-female": {
|
||||
"id": "tts_models/bn/custom/vits-female"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
206
data/st-core-scripts/scripts/extensions/tts/cosyvoice.js
Normal file
206
data/st-core-scripts/scripts/extensions/tts/cosyvoice.js
Normal file
@@ -0,0 +1,206 @@
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { CosyVoiceProvider };
|
||||
|
||||
class CosyVoiceProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = '. ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
return text;
|
||||
}
|
||||
|
||||
audioFormats = ['wav', 'ogg', 'silk', 'mp3', 'flac'];
|
||||
|
||||
languageLabels = {
|
||||
'Auto': 'auto',
|
||||
};
|
||||
|
||||
langKey2LangCode = {
|
||||
'zh': 'zh-CN',
|
||||
'en': 'en-US',
|
||||
'ja': 'ja-JP',
|
||||
'ko': 'ko-KR',
|
||||
};
|
||||
|
||||
modelTypes = {
|
||||
CosyVoice: 'CosyVoice',
|
||||
};
|
||||
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://localhost:9880',
|
||||
format: 'wav',
|
||||
lang: 'auto',
|
||||
streaming: false,
|
||||
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
|
||||
<label for="tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="tts_endpoint" type="text" class="text_pole" maxlength="250" height="300" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
<span>Windows users Use <a target="_blank" href="https://github.com/v3ucn/CosyVoice_For_Windows">CosyVoice_For_Windows</a>(Unofficial).</span><br/>
|
||||
<span>Macos Users Use <a target="_blank" href="https://github.com/v3ucn/CosyVoice_for_MacOs">CosyVoice_for_MacOs</a>(Unofficial).</span><br/>
|
||||
<br/>
|
||||
|
||||
`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#tts_endpoint').val();
|
||||
|
||||
|
||||
saveTtsProviderSettings();
|
||||
this.changeTTSSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
console.debug(`Ignoring non-user-configurable setting: ${key}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Set initial values from the settings
|
||||
$('#tts_endpoint').val(this.settings.provider_endpoint).on('change', this.onSettingsChange.bind(this));
|
||||
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.info('ITS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return await this.checkReady();
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
|
||||
|
||||
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
|
||||
|
||||
const match = this.voices.filter(
|
||||
v => v.name == voiceName,
|
||||
)[0];
|
||||
console.log(match);
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await fetch(`${this.settings.provider_endpoint}/speakers`);
|
||||
console.info(response);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
|
||||
|
||||
this.voices = responseJson;
|
||||
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
// Each time a parameter is changed, we change the configuration
|
||||
async changeTTSSettings() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch TTS generation from the API.
|
||||
* @param {string} inputText Text to generate TTS for
|
||||
* @param {string} voiceId Voice ID to use (model_type&speaker_id))
|
||||
* @returns {Promise<Response|string>} Fetch response
|
||||
*/
|
||||
async fetchTtsGeneration(inputText, voiceId, lang = null, forceNoStreaming = false) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
const streaming = this.settings.streaming;
|
||||
|
||||
const params = {
|
||||
text: inputText,
|
||||
speaker: voiceId,
|
||||
};
|
||||
|
||||
if (streaming) {
|
||||
params['streaming'] = 1;
|
||||
}
|
||||
|
||||
const url = `${this.settings.provider_endpoint}/`;
|
||||
|
||||
const response = await fetch(
|
||||
url,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(params), // Convert parameter objects to JSON strings
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Interface not used
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
.minimax_tts_settings>.tts_block {
|
||||
gap: 5px;
|
||||
margin: 5px 0;
|
||||
}
|
||||
|
||||
.minimax-custom-item {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 8px;
|
||||
background: #333;
|
||||
margin: 5px 0;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.minimax-custom-item-info {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.minimax-custom-item-name {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.minimax-custom-item-details {
|
||||
color: #aaa;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
.minimax-custom-item-remove {
|
||||
padding: 4px 8px;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.minimax-empty-list {
|
||||
color: #888;
|
||||
font-style: italic;
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
#openai-character-instructions {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
#openai-character-instructions .character-instructions {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 5px;
|
||||
}
|
||||
270
data/st-core-scripts/scripts/extensions/tts/edge.js
Normal file
270
data/st-core-scripts/scripts/extensions/tts/edge.js
Normal file
@@ -0,0 +1,270 @@
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
import { getApiUrl } from '../../extensions.js';
|
||||
import { doExtrasFetch, modules } from '../../extensions.js';
|
||||
import { getPreviewString } from './index.js';
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { EdgeTtsProvider };
|
||||
|
||||
const EDGE_TTS_PROVIDER = {
|
||||
extras: 'extras',
|
||||
plugin: 'plugin',
|
||||
};
|
||||
|
||||
class EdgeTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
rate: 0,
|
||||
provider: EDGE_TTS_PROVIDER.extras,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `Microsoft Edge TTS<br>
|
||||
<label for="edge_tts_provider">Provider</label>
|
||||
<select id="edge_tts_provider">
|
||||
<option value="${EDGE_TTS_PROVIDER.extras}">Extras</option>
|
||||
<option value="${EDGE_TTS_PROVIDER.plugin}">Plugin</option>
|
||||
</select>
|
||||
<label for="edge_tts_rate">Rate: <span id="edge_tts_rate_output"></span></label>
|
||||
<input id="edge_tts_rate" type="range" value="${this.defaultSettings.rate}" min="-100" max="100" step="1" />
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
this.settings.rate = Number($('#edge_tts_rate').val());
|
||||
$('#edge_tts_rate_output').text(this.settings.rate);
|
||||
this.settings.provider = String($('#edge_tts_provider').val());
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#edge_tts_rate').val(this.settings.rate || 0);
|
||||
$('#edge_tts_rate_output').text(this.settings.rate || 0);
|
||||
$('#edge_tts_rate').on('input', () => { this.onSettingsChange(); });
|
||||
$('#edge_tts_provider').val(this.settings.provider || EDGE_TTS_PROVIDER.extras);
|
||||
$('#edge_tts_provider').on('change', () => { this.onSettingsChange(); });
|
||||
await this.checkReady();
|
||||
|
||||
console.debug('EdgeTTS: Settings loaded');
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a simple readiness check by trying to fetch voiceIds
|
||||
*/
|
||||
async checkReady() {
|
||||
await this.throwIfModuleMissing();
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
/**
|
||||
* Get a voice from the TTS provider.
|
||||
* @param {string} voiceName Voice name to get
|
||||
* @returns {Promise<Object>} Voice object
|
||||
*/
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
voice => voice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate TTS for a given text.
|
||||
* @param {string} text Text to generate TTS for
|
||||
* @param {string} voiceId Voice ID to use
|
||||
* @returns {Promise<Response>} Fetch response
|
||||
*/
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
await this.throwIfModuleMissing();
|
||||
|
||||
const url = this.getVoicesUrl();
|
||||
const response = await this.doFetch(url);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
let responseJson = await response.json();
|
||||
responseJson = responseJson
|
||||
.sort((a, b) => a.Locale.localeCompare(b.Locale) || a.ShortName.localeCompare(b.ShortName))
|
||||
.map(x => ({ name: x.ShortName, voice_id: x.ShortName, preview_url: false, lang: x.Locale }));
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} id Voice ID
|
||||
*/
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const voice = await this.getVoice(id);
|
||||
const text = getPreviewString(voice.lang);
|
||||
const response = await this.fetchTtsGeneration(text, id);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch TTS generation from the API.
|
||||
* @param {string} inputText Text to generate TTS for
|
||||
* @param {string} voiceId Voice ID to use
|
||||
* @returns {Promise<Response>} Fetch response
|
||||
*/
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
await this.throwIfModuleMissing();
|
||||
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
const url = this.getGenerateUrl();
|
||||
const response = await this.doFetch(url,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
'text': inputText,
|
||||
'voice': voiceId,
|
||||
'rate': Number(this.settings.rate),
|
||||
}),
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a fetch request using the configured provider.
|
||||
* @param {string} url URL string
|
||||
* @param {any} options Request options
|
||||
* @returns {Promise<Response>} Fetch response
|
||||
*/
|
||||
doFetch(url, options) {
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
|
||||
return doExtrasFetch(url, options);
|
||||
}
|
||||
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
|
||||
return fetch(url, options);
|
||||
}
|
||||
|
||||
throw new Error('Invalid TTS Provider');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the URL for the TTS generation endpoint.
|
||||
* @returns {string} URL string
|
||||
*/
|
||||
getGenerateUrl() {
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/edge-tts/generate';
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
|
||||
return '/api/plugins/edge-tts/generate';
|
||||
}
|
||||
|
||||
throw new Error('Invalid TTS Provider');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the URL for the TTS voices endpoint.
|
||||
* @returns {string} URL object or string
|
||||
*/
|
||||
getVoicesUrl() {
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/edge-tts/list';
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
|
||||
return '/api/plugins/edge-tts/list';
|
||||
}
|
||||
|
||||
throw new Error('Invalid TTS Provider');
|
||||
}
|
||||
|
||||
async throwIfModuleMissing() {
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.extras && !modules.includes('edge-tts')) {
|
||||
const message = 'Edge TTS module not loaded. Add edge-tts to enable-modules and restart the Extras API.';
|
||||
// toastr.error(message)
|
||||
throw new Error(message);
|
||||
}
|
||||
|
||||
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin && !this.isPluginAvailable()) {
|
||||
const message = 'Edge TTS Server plugin not loaded. Install it from https://github.com/SillyTavern/SillyTavern-EdgeTTS-Plugin and restart the SillyTavern server.';
|
||||
// toastr.error(message)
|
||||
throw new Error(message);
|
||||
}
|
||||
}
|
||||
|
||||
async isPluginAvailable() {
|
||||
try {
|
||||
const result = await fetch('/api/plugins/edge-tts/probe', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders({ omitContentType: true }),
|
||||
});
|
||||
return result.ok;
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
455
data/st-core-scripts/scripts/extensions/tts/electronhub.js
Normal file
455
data/st-core-scripts/scripts/extensions/tts/electronhub.js
Normal file
@@ -0,0 +1,455 @@
|
||||
import { event_types, eventSource, getRequestHeaders } from '../../../script.js';
|
||||
import { SECRET_KEYS, secret_state } from '../../secrets.js';
|
||||
import { getPreviewString, saveTtsProviderSettings, initVoiceMap } from './index.js';
|
||||
|
||||
export { ElectronHubTtsProvider };
|
||||
|
||||
class ElectronHubTtsProvider {
|
||||
settings;
|
||||
voices = [];
|
||||
models = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
model: 'tts-1',
|
||||
speed: 1,
|
||||
temperature: 1,
|
||||
top_p: 1,
|
||||
// GPT-4o Mini TTS
|
||||
instructions: '',
|
||||
// Dia
|
||||
speaker_transcript: '',
|
||||
cfg_filter_top_k: 25,
|
||||
cfg_scale: 3,
|
||||
// Microsoft TTS
|
||||
speech_rate: 0,
|
||||
pitch_adjustment: 0,
|
||||
emotional_style: '',
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div>Electron Hub unified TTS API.</div>
|
||||
<div class="flex-container alignItemsCenter">
|
||||
<div class="flex1"></div>
|
||||
<div id="electronhub_tts_key" class="menu_button menu_button_icon manage-api-keys" data-key="api_key_electronhub">
|
||||
<i class="fa-solid fa-key"></i>
|
||||
<span>API Key</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex-container flexGap10 wrap">
|
||||
<div class="flex1">
|
||||
<label for="electronhub_tts_model">Model</label>
|
||||
<select id="electronhub_tts_model" class="text_pole"></select>
|
||||
</div>
|
||||
<div>
|
||||
<label for="electronhub_tts_speed">Speed <span id="electronhub_tts_speed_output"></span></label>
|
||||
<input type="range" id="electronhub_tts_speed" value="1" min="0.25" max="4" step="0.05">
|
||||
</div>
|
||||
<div>
|
||||
<label for="electronhub_tts_temperature">Temperature</label>
|
||||
<input id="electronhub_tts_temperature" class="text_pole" type="number" min="0" max="2" step="0.1" value="1" />
|
||||
</div>
|
||||
<div id="electronhub_block_top_p" style="display:none;">
|
||||
<label for="electronhub_tts_top_p">Top-p</label>
|
||||
<input id="electronhub_tts_top_p" class="text_pole" type="number" min="0" max="1" step="0.01" value="1" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="electronhub_block_instructions" style="display:none;">
|
||||
<label for="electronhub_tts_instructions">Instructions (GPT-4o Mini TTS):</label>
|
||||
<textarea id="electronhub_tts_instructions" class="textarea_compact autoSetHeight" placeholder="e.g., 'Speak cheerfully and energetically'"></textarea>
|
||||
</div>
|
||||
|
||||
<div id="electronhub_block_dia" style="display:none;">
|
||||
<label for="electronhub_tts_speaker_transcript">Speaker transcript (Dia):</label>
|
||||
<textarea id="electronhub_tts_speaker_transcript" class="textarea_compact autoSetHeight" maxlength="1000"></textarea>
|
||||
<label for="electronhub_tts_cfg_scale">CFG scale (1-5):</label>
|
||||
<input id="electronhub_tts_cfg_scale" type="number" min="1" max="5" step="1" />
|
||||
<label for="electronhub_tts_cfg_topk">CFG filter top_k (15-50):</label>
|
||||
<input id="electronhub_tts_cfg_topk" type="number" min="15" max="50" step="1" />
|
||||
</div>
|
||||
|
||||
<div id="electronhub_block_msft" style="display:none;">
|
||||
<div class="flex-container flexGap10 wrap">
|
||||
<div>
|
||||
<label for="electronhub_tts_speech_rate">Speech rate (-100..100)</label>
|
||||
<input id="electronhub_tts_speech_rate" class="text_pole" type="number" min="-100" max="100" step="1" style="width:120px;" />
|
||||
</div>
|
||||
<div>
|
||||
<label for="electronhub_tts_pitch_adjustment">Pitch adjustment (-100..100)</label>
|
||||
<input id="electronhub_tts_pitch_adjustment" class="text_pole" type="number" min="-100" max="100" step="1" style="width:120px;" />
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex-container flexGap10">
|
||||
<div class="flex1">
|
||||
<label for="electronhub_tts_emotional_style">Emotional style</label>
|
||||
<input id="electronhub_tts_emotional_style" class="text_pole" type="text" placeholder="cheerful, sad, angry, gentle..." />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="electronhub_dynamic_params" class="flex-container flexGap10 wrap" style="display:none;"></div>`;
|
||||
return html;
|
||||
}
|
||||
|
||||
constructor() {
|
||||
this.handler = async function (/** @type {string} */ key) {
|
||||
if (key !== SECRET_KEYS.ELECTRONHUB) return;
|
||||
$('#electronhub_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.ELECTRONHUB]);
|
||||
await this.onRefreshClick();
|
||||
}.bind(this);
|
||||
}
|
||||
|
||||
dispose() {
|
||||
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
|
||||
eventSource.removeListener(event, this.handler);
|
||||
});
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default Electron Hub TTS settings');
|
||||
}
|
||||
|
||||
this.settings = { ...this.defaultSettings, ...settings };
|
||||
|
||||
await this.loadModels();
|
||||
this.populateModelSelect();
|
||||
|
||||
$('#electronhub_tts_model').val(this.settings.model);
|
||||
$('#electronhub_tts_model').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#electronhub_tts_speed').val(this.settings.speed);
|
||||
$('#electronhub_tts_speed_output').text(this.settings.speed);
|
||||
$('#electronhub_tts_speed').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#electronhub_tts_temperature').val(this.settings.temperature);
|
||||
$('#electronhub_tts_temperature').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#electronhub_tts_top_p').val(this.settings.top_p);
|
||||
$('#electronhub_tts_top_p').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#electronhub_tts_instructions').val(this.settings.instructions);
|
||||
$('#electronhub_tts_instructions').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#electronhub_tts_speaker_transcript').val(this.settings.speaker_transcript);
|
||||
$('#electronhub_tts_speaker_transcript').on('input', () => { this.onSettingsChange(); });
|
||||
$('#electronhub_tts_cfg_scale').val(this.settings.cfg_scale);
|
||||
$('#electronhub_tts_cfg_scale').on('input', () => { this.onSettingsChange(); });
|
||||
$('#electronhub_tts_cfg_topk').val(this.settings.cfg_filter_top_k);
|
||||
$('#electronhub_tts_cfg_topk').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#electronhub_tts_speech_rate').val(this.settings.speech_rate);
|
||||
$('#electronhub_tts_speech_rate').on('input', () => { this.onSettingsChange(); });
|
||||
$('#electronhub_tts_pitch_adjustment').val(this.settings.pitch_adjustment);
|
||||
$('#electronhub_tts_pitch_adjustment').on('input', () => { this.onSettingsChange(); });
|
||||
$('#electronhub_tts_emotional_style').val(this.settings.emotional_style);
|
||||
$('#electronhub_tts_emotional_style').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#electronhub_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.ELECTRONHUB]);
|
||||
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
|
||||
eventSource.on(event, this.handler);
|
||||
});
|
||||
|
||||
await this.checkReady();
|
||||
this.updateConditionalBlocks();
|
||||
this.renderDynamicParams();
|
||||
console.debug('Electron Hub TTS: Settings loaded');
|
||||
}
|
||||
|
||||
async onSettingsChange() {
|
||||
const previousModel = this.settings.model;
|
||||
this.settings.model = String($('#electronhub_tts_model').find(':selected').val() || this.settings.model);
|
||||
this.settings.speed = Number($('#electronhub_tts_speed').val());
|
||||
$('#electronhub_tts_speed_output').text(this.settings.speed);
|
||||
this.settings.temperature = Number($('#electronhub_tts_temperature').val());
|
||||
this.settings.top_p = Number($('#electronhub_tts_top_p').val());
|
||||
this.settings.instructions = String($('#electronhub_tts_instructions').val() || '');
|
||||
this.settings.speaker_transcript = String($('#electronhub_tts_speaker_transcript').val() || '');
|
||||
this.settings.cfg_scale = Number($('#electronhub_tts_cfg_scale').val());
|
||||
this.settings.cfg_filter_top_k = Number($('#electronhub_tts_cfg_topk').val());
|
||||
this.settings.speech_rate = Number($('#electronhub_tts_speech_rate').val());
|
||||
this.settings.pitch_adjustment = Number($('#electronhub_tts_pitch_adjustment').val());
|
||||
this.settings.emotional_style = String($('#electronhub_tts_emotional_style').val() || '');
|
||||
this.updateConditionalBlocks();
|
||||
this.renderDynamicParams();
|
||||
saveTtsProviderSettings();
|
||||
if (previousModel !== this.settings.model) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
await initVoiceMap();
|
||||
}
|
||||
}
|
||||
|
||||
async loadModels() {
|
||||
try {
|
||||
const response = await fetch('/api/openai/electronhub/models', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders({ omitContentType: true }),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
/** @type {Array<any>} */
|
||||
const data = await response.json();
|
||||
const allModels = Array.isArray(data) ? data : [];
|
||||
const ttsModels = allModels.filter(m => {
|
||||
const eps = Array.isArray(m?.endpoints) ? m.endpoints : [];
|
||||
return eps.some(ep => {
|
||||
if (typeof ep !== 'string') return false;
|
||||
return ep === '/v1/audio/speech' || ep.endsWith('/audio/speech') || ep === 'audio/speech';
|
||||
});
|
||||
});
|
||||
|
||||
this.models = ttsModels;
|
||||
|
||||
if (this.models.length > 0 && !this.models.find(m => m.id === this.settings.model)) {
|
||||
this.settings.model = this.models[0].id;
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn('Electron Hub models fetch failed', err);
|
||||
this.models = [];
|
||||
}
|
||||
}
|
||||
|
||||
populateModelSelect() {
|
||||
const select = $('#electronhub_tts_model');
|
||||
select.empty();
|
||||
const groups = this.groupByVendor(this.models);
|
||||
for (const [vendor, models] of groups.entries()) {
|
||||
const optgroup = document.createElement('optgroup');
|
||||
optgroup.label = vendor;
|
||||
for (const m of models) {
|
||||
const opt = document.createElement('option');
|
||||
opt.value = m.id;
|
||||
opt.text = m.name || m.id;
|
||||
optgroup.appendChild(opt);
|
||||
}
|
||||
select.append(optgroup);
|
||||
}
|
||||
|
||||
if (this.models.find(x => x.id === this.settings.model)) {
|
||||
select.val(this.settings.model);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Group models by vendor prefix from name before ':'
|
||||
* @param {Array<any>} array
|
||||
* @returns {Map<string, any[]>}
|
||||
*/
|
||||
groupByVendor(array) {
|
||||
return array.reduce((acc, curr) => {
|
||||
const name = String(curr?.name || curr?.id || 'Other');
|
||||
const vendor = name.split(':')[0].trim() || 'Other';
|
||||
if (!acc.has(vendor)) acc.set(vendor, []);
|
||||
acc.get(vendor).push(curr);
|
||||
return acc;
|
||||
}, new Map());
|
||||
}
|
||||
|
||||
updateConditionalBlocks() {
|
||||
const modelId = this.settings.model;
|
||||
const model = this.models.find(m => m.id === modelId);
|
||||
const params = model?.parameters || {};
|
||||
const vendorName = String(model?.name || '').split(':')[0].trim().toLowerCase();
|
||||
|
||||
const hasInstructions = 'instructions' in params || modelId === 'gpt-4o-mini-tts';
|
||||
const hasDia = 'speaker_transcript' in params || 'cfg_scale' in params || 'cfg_filter_top_k' in params || modelId.includes('dia');
|
||||
|
||||
const hasMsft = 'speech_rate' in params || 'pitch_adjustment' in params || 'emotional_style' in params || vendorName === 'microsoft' || modelId === 'microsoft-tts';
|
||||
const hasTopP = 'top_p' in params;
|
||||
|
||||
$('#electronhub_block_instructions').toggle(!!hasInstructions);
|
||||
$('#electronhub_block_dia').toggle(!!hasDia);
|
||||
$('#electronhub_block_msft').toggle(!!hasMsft);
|
||||
$('#electronhub_block_top_p').toggle(!!hasTopP);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build UI for additional model parameters dynamically
|
||||
*/
|
||||
renderDynamicParams() {
|
||||
const container = $('#electronhub_dynamic_params');
|
||||
container.empty();
|
||||
const model = this.models.find(m => m.id === this.settings.model);
|
||||
const params = model?.parameters || {};
|
||||
const modelHasVoices = Array.isArray(model?.voices) && model.voices.length > 0;
|
||||
const exclude = new Set(['input', 'response_format', 'model', 'speed', 'temperature', 'top_p', 'instructions', 'speaker_transcript', 'cfg_scale', 'cfg_filter_top_k', 'speech_rate', 'pitch_adjustment', 'emotional_style']);
|
||||
if (modelHasVoices) exclude.add('voice');
|
||||
|
||||
const entries = Object.entries(params).filter(([k]) => !exclude.has(k));
|
||||
container.toggle(entries.length > 0);
|
||||
if (entries.length === 0) return;
|
||||
|
||||
for (const [key, spec] of entries) {
|
||||
const nice = key.replace(/_/g, ' ').replace(/\b\w/g, c => c.toUpperCase());
|
||||
const type = String(spec?.type || 'string');
|
||||
const id = `electronhub_dyn_${key.replace(/[^a-zA-Z0-9_-]/g, '_')}`;
|
||||
|
||||
if (Array.isArray(spec?.enum) && spec.enum.length) {
|
||||
const select = $(`<div><label for="${id}">${nice}</label><select id="${id}" class="text_pole"></select></div>`);
|
||||
container.append(select);
|
||||
const el = select.find('select');
|
||||
for (const opt of spec.enum) el.append(new Option(String(opt), String(opt)));
|
||||
const val = this.settings[key] ?? spec.default ?? spec.enum[0];
|
||||
el.val(String(val));
|
||||
el.on('change', () => { this.settings[key] = String(el.val() || ''); saveTtsProviderSettings(); });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (type === 'boolean') {
|
||||
const block = $(`<label class="checkbox_label" for="${id}"><input type="checkbox" id="${id}"> <small>${nice}</small></label>`);
|
||||
container.append(block);
|
||||
const el = block.find('input');
|
||||
el.prop('checked', !!(this.settings[key] ?? spec.default ?? false));
|
||||
el.on('change', () => { this.settings[key] = !!el.is(':checked'); saveTtsProviderSettings(); });
|
||||
continue;
|
||||
}
|
||||
|
||||
if (type === 'number' || type === 'integer') {
|
||||
const min = spec.minimum ?? undefined;
|
||||
const max = spec.maximum ?? undefined;
|
||||
const step = type === 'integer' ? 1 : (spec.step ?? 0.01);
|
||||
const block = $(`<div><label for="${id}">${nice}${(min != null || max != null) ? ` (${min ?? ''}..${max ?? ''})` : ''}:</label><input id="${id}" type="number" class="text_pole" ${min != null ? `min="${min}"` : ''} ${max != null ? `max="${max}"` : ''} step="${step}"></div>`);
|
||||
container.append(block);
|
||||
const el = block.find('input');
|
||||
const val = this.settings[key] ?? spec.default ?? '';
|
||||
if (val !== '') el.val(val);
|
||||
el.on('input', () => {
|
||||
const raw = el.val();
|
||||
this.settings[key] = (raw === '') ? '' : Number(raw);
|
||||
saveTtsProviderSettings();
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const isLong = /instructions|transcript|style|prompt|description/i.test(key);
|
||||
if (isLong) {
|
||||
const block = $(`<div><label for="${id}">${nice}</label><textarea id="${id}" class="textarea_compact autoSetHeight"></textarea></div>`);
|
||||
container.append(block);
|
||||
const el = block.find('textarea');
|
||||
el.val(String(this.settings[key] ?? spec.default ?? ''));
|
||||
el.on('input', () => { this.settings[key] = String(el.val() || ''); saveTtsProviderSettings(); });
|
||||
} else {
|
||||
const block = $(`<div><label for="${id}">${nice}</label><input id="${id}" type="text" class="text_pole" /></div>`);
|
||||
container.append(block);
|
||||
const el = block.find('input');
|
||||
el.val(String(this.settings[key] ?? spec.default ?? ''));
|
||||
el.on('input', () => { this.settings[key] = String(el.val() || ''); saveTtsProviderSettings(); });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
await this.loadModels();
|
||||
this.populateModelSelect();
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
this.updateConditionalBlocks();
|
||||
this.renderDynamicParams();
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(v => v.name == voiceName)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
const modelId = this.settings.model;
|
||||
const model = this.models.find(m => m.id === modelId);
|
||||
if (model && Array.isArray(model.voices) && model.voices.length) {
|
||||
return model.voices.map(name => ({ name, voice_id: name, lang: 'en-US' }));
|
||||
}
|
||||
// Fallback to common OpenAI voices
|
||||
const fallback = ['alloy', 'ash', 'ballad', 'coral', 'echo', 'fable', 'onyx', 'nova', 'sage', 'shimmer', 'verse'];
|
||||
return fallback.map(name => ({ name, voice_id: name, lang: 'en-US' }));
|
||||
}
|
||||
|
||||
async previewTtsVoice(voiceId) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const text = getPreviewString('en-US');
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating Electron Hub TTS for voice_id ${voiceId}`);
|
||||
const body = {
|
||||
input: inputText,
|
||||
voice: voiceId,
|
||||
speed: this.settings.speed,
|
||||
temperature: this.settings.temperature,
|
||||
model: this.settings.model,
|
||||
};
|
||||
|
||||
const model = (this.settings.model || '').toLowerCase();
|
||||
if (model === 'gpt-4o-mini-tts') {
|
||||
if (this.settings.instructions?.trim()) body.instructions = this.settings.instructions.trim();
|
||||
}
|
||||
if (model.includes('dia')) {
|
||||
if (this.settings.speaker_transcript?.trim()) body.speaker_transcript = this.settings.speaker_transcript.trim();
|
||||
if (Number.isFinite(this.settings.cfg_scale)) body.cfg_scale = Number(this.settings.cfg_scale);
|
||||
if (Number.isFinite(this.settings.cfg_filter_top_k)) body.cfg_filter_top_k = Number(this.settings.cfg_filter_top_k);
|
||||
}
|
||||
if (model.includes('microsoft-tts')) {
|
||||
if (Number.isFinite(this.settings.speech_rate)) body.speech_rate = Number(this.settings.speech_rate);
|
||||
if (Number.isFinite(this.settings.pitch_adjustment)) body.pitch_adjustment = Number(this.settings.pitch_adjustment);
|
||||
if ((this.settings.emotional_style || '').trim()) body.emotional_style = String(this.settings.emotional_style).trim();
|
||||
}
|
||||
if (Number.isFinite(this.settings.top_p)) {
|
||||
body.top_p = Number(this.settings.top_p);
|
||||
}
|
||||
|
||||
// add dynamic params based on schema
|
||||
const modelObj = this.models.find(m => m.id === this.settings.model);
|
||||
const params = modelObj?.parameters || {};
|
||||
const modelHasVoices = Array.isArray(modelObj?.voices) && modelObj.voices.length > 0;
|
||||
const exclude = new Set(['input', 'response_format', 'model', 'speed', 'temperature', 'top_p', 'instructions', 'speaker_transcript', 'cfg_scale', 'cfg_filter_top_k', 'speech_rate', 'pitch_adjustment', 'emotional_style']);
|
||||
if (modelHasVoices) exclude.add('voice');
|
||||
for (const key of Object.keys(params)) {
|
||||
if (exclude.has(key)) continue;
|
||||
const val = this.settings[key];
|
||||
if (val === undefined || val === '') continue;
|
||||
body[key] = val;
|
||||
}
|
||||
|
||||
const response = await fetch('/api/openai/electronhub/generate-voice', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
||||
436
data/st-core-scripts/scripts/extensions/tts/elevenlabs.js
Normal file
436
data/st-core-scripts/scripts/extensions/tts/elevenlabs.js
Normal file
@@ -0,0 +1,436 @@
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
import { event_types, eventSource, getRequestHeaders } from '/script.js';
|
||||
import { SECRET_KEYS, secret_state, writeSecret } from '/scripts/secrets.js';
|
||||
import { getBase64Async } from '/scripts/utils.js';
|
||||
export { ElevenLabsTtsProvider };
|
||||
|
||||
class ElevenLabsTtsProvider {
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' ... ... ... ';
|
||||
|
||||
defaultSettings = {
|
||||
stability: 0.75,
|
||||
similarity_boost: 0.75,
|
||||
style_exaggeration: 0.00,
|
||||
speaker_boost: true,
|
||||
speed: 1.0,
|
||||
model: 'eleven_turbo_v2_5',
|
||||
voiceMap: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div class="elevenlabs_tts_settings">
|
||||
<div class="flex-container alignItemsBaseline">
|
||||
<h4 for="elevenlabs_tts_key" class="flex1 margin0">
|
||||
<a href="https://elevenlabs.io/app/developers/api-keys" target="_blank">ElevenLabs TTS Key</a>
|
||||
</h4>
|
||||
<div id="elevenlabs_tts_key" class="menu_button menu_button_icon manage-api-keys" data-key="api_key_elevenlabs">
|
||||
<i class="fa-solid fa-key"></i>
|
||||
<span>Click to set</span>
|
||||
</div>
|
||||
</div>
|
||||
<label for="elevenlabs_tts_model">Model</label>
|
||||
<select id="elevenlabs_tts_model" class="text_pole">
|
||||
<option value="eleven_v3">Eleven v3</option>
|
||||
<option value="eleven_ttv_v3">Eleven ttv v3</option>
|
||||
<option value="eleven_multilingual_v2">Multilingual v2</option>
|
||||
<option value="eleven_flash_v2_5">Eleven Flash v2.5</option>
|
||||
<option value="eleven_turbo_v2_5">Turbo v2.5</option>
|
||||
<option value="eleven_multilingual_ttv_v2">Multilingual ttv v2</option>
|
||||
<option value="eleven_monolingual_v1">English v1 (Old)</option>
|
||||
<option value="eleven_multilingual_v1">Multilingual v1 (Old)</option>
|
||||
<option value="eleven_turbo_v2">Turbo v2 (Old)</option>
|
||||
</select>
|
||||
<label for="elevenlabs_tts_stability">Stability: <span id="elevenlabs_tts_stability_output"></span></label>
|
||||
<input id="elevenlabs_tts_stability" type="range" value="${this.defaultSettings.stability}" min="0" max="1" step="0.01" />
|
||||
<label for="elevenlabs_tts_similarity_boost">Similarity Boost: <span id="elevenlabs_tts_similarity_boost_output"></span></label>
|
||||
<input id="elevenlabs_tts_similarity_boost" type="range" value="${this.defaultSettings.similarity_boost}" min="0" max="1" step="0.01" />
|
||||
<label for="elevenlabs_tts_speed">Speed: <span id="elevenlabs_tts_speed_output"></span></label>
|
||||
<input id="elevenlabs_tts_speed" type="range" value="${this.defaultSettings.speed}" min="0.7" max="1.2" step="0.01" />
|
||||
<div id="elevenlabs_tts_v2_options" style="display: none;">
|
||||
<label for="elevenlabs_tts_style_exaggeration">Style Exaggeration: <span id="elevenlabs_tts_style_exaggeration_output"></span></label>
|
||||
<input id="elevenlabs_tts_style_exaggeration" type="range" value="${this.defaultSettings.style_exaggeration}" min="0" max="1" step="0.01" />
|
||||
<label for="elevenlabs_tts_speaker_boost">Speaker Boost:</label>
|
||||
<input id="elevenlabs_tts_speaker_boost" style="display: inline-grid" type="checkbox" />
|
||||
</div>
|
||||
<hr>
|
||||
<div id="elevenlabs_tts_voice_cloning">
|
||||
<span>Instant Voice Cloning</span><br>
|
||||
<input id="elevenlabs_tts_voice_cloning_name" type="text" class="text_pole" placeholder="Voice Name"/>
|
||||
<input id="elevenlabs_tts_voice_cloning_description" type="text" class="text_pole" placeholder="Voice Description"/>
|
||||
<input id="elevenlabs_tts_voice_cloning_labels" type="text" class="text_pole" placeholder="Labels"/>
|
||||
<div class="menu_button menu_button_icon" id="upload_audio_file">
|
||||
<i class="fa-solid fa-file-import"></i>
|
||||
<span>Upload Audio Files</span>
|
||||
</div>
|
||||
<input id="elevenlabs_tts_audio_files" type="file" name="audio_files" accept="audio/*" style="display: none;" multiple>
|
||||
<div id="elevenlabs_tts_selected_files_list"></div>
|
||||
<input id="elevenlabs_tts_clone_voice_button" class="menu_button menu_button_icon" type="button" value="Clone Voice">
|
||||
</div>
|
||||
<hr>
|
||||
</div>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
constructor() {
|
||||
this.handler = async function (/** @type {string} */ key) {
|
||||
if (key !== SECRET_KEYS.ELEVENLABS) return;
|
||||
$('#elevenlabs_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.ELEVENLABS]);
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}.bind(this);
|
||||
}
|
||||
|
||||
dispose() {
|
||||
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
|
||||
eventSource.removeListener(event, this.handler);
|
||||
});
|
||||
}
|
||||
|
||||
shouldInvolveExtendedSettings() {
|
||||
// Models that support extended settings (style_exaggeration, speaker_boost)
|
||||
const modelsWithExtendedSettings = [
|
||||
'eleven_v3',
|
||||
'eleven_ttv_v3',
|
||||
'eleven_multilingual_v2',
|
||||
'eleven_multilingual_ttv_v2',
|
||||
];
|
||||
return modelsWithExtendedSettings.includes(this.settings.model);
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update dynamically
|
||||
this.settings.stability = $('#elevenlabs_tts_stability').val();
|
||||
this.settings.similarity_boost = $('#elevenlabs_tts_similarity_boost').val();
|
||||
this.settings.style_exaggeration = $('#elevenlabs_tts_style_exaggeration').val();
|
||||
this.settings.speaker_boost = $('#elevenlabs_tts_speaker_boost').is(':checked');
|
||||
this.settings.speed = $('#elevenlabs_tts_speed').val();
|
||||
this.settings.model = $('#elevenlabs_tts_model').find(':selected').val();
|
||||
$('#elevenlabs_tts_stability_output').text(Math.round(this.settings.stability * 100) + '%');
|
||||
$('#elevenlabs_tts_similarity_boost_output').text(Math.round(this.settings.similarity_boost * 100) + '%');
|
||||
$('#elevenlabs_tts_style_exaggeration_output').text(Math.round(this.settings.style_exaggeration * 100) + '%');
|
||||
$('#elevenlabs_tts_speed_output').text(this.settings.speed + 'x');
|
||||
$('#elevenlabs_tts_v2_options').toggle(this.shouldInvolveExtendedSettings());
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
// Migrate old settings
|
||||
if (settings['multilingual'] !== undefined) {
|
||||
settings.model = settings.multilingual ? 'eleven_multilingual_v1' : 'eleven_monolingual_v1';
|
||||
delete settings['multilingual'];
|
||||
}
|
||||
|
||||
if (Object.hasOwn(settings, 'apiKey')) {
|
||||
if (settings.apiKey && !secret_state[SECRET_KEYS.ELEVENLABS]){
|
||||
await writeSecret(SECRET_KEYS.ELEVENLABS, settings.apiKey);
|
||||
}
|
||||
delete settings['apiKey'];
|
||||
}
|
||||
|
||||
$('#elevenlabs_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.ELEVENLABS]);
|
||||
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
|
||||
eventSource.on(event, this.handler);
|
||||
});
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#elevenlabs_tts_stability').val(this.settings.stability);
|
||||
$('#elevenlabs_tts_similarity_boost').val(this.settings.similarity_boost);
|
||||
$('#elevenlabs_tts_style_exaggeration').val(this.settings.style_exaggeration);
|
||||
$('#elevenlabs_tts_speaker_boost').prop('checked', this.settings.speaker_boost);
|
||||
$('#elevenlabs_tts_speed').val(this.settings.speed);
|
||||
$('#elevenlabs_tts_model').val(this.settings.model);
|
||||
$('#elevenlabs_tts_similarity_boost').on('input', this.onSettingsChange.bind(this));
|
||||
$('#elevenlabs_tts_stability').on('input', this.onSettingsChange.bind(this));
|
||||
$('#elevenlabs_tts_style_exaggeration').on('input', this.onSettingsChange.bind(this));
|
||||
$('#elevenlabs_tts_speaker_boost').on('change', this.onSettingsChange.bind(this));
|
||||
$('#elevenlabs_tts_speed').on('input', this.onSettingsChange.bind(this));
|
||||
$('#elevenlabs_tts_model').on('change', this.onSettingsChange.bind(this));
|
||||
$('#elevenlabs_tts_stability_output').text(Math.round(this.settings.stability * 100) + '%');
|
||||
$('#elevenlabs_tts_similarity_boost_output').text(Math.round(this.settings.similarity_boost * 100) + '%');
|
||||
$('#elevenlabs_tts_style_exaggeration_output').text(Math.round(this.settings.style_exaggeration * 100) + '%');
|
||||
$('#elevenlabs_tts_speed_output').text(this.settings.speed + 'x');
|
||||
$('#elevenlabs_tts_v2_options').toggle(this.shouldInvolveExtendedSettings());
|
||||
try {
|
||||
await this.checkReady();
|
||||
console.debug('ElevenLabs: Settings loaded');
|
||||
} catch {
|
||||
console.debug('ElevenLabs: Settings loaded, but not ready');
|
||||
}
|
||||
|
||||
this.setupVoiceCloningMenu();
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
setupVoiceCloningMenu() {
|
||||
const audioFilesInput = /** @type {HTMLInputElement} */ (document.getElementById('elevenlabs_tts_audio_files'));
|
||||
const selectedFilesListElement = document.getElementById('elevenlabs_tts_selected_files_list');
|
||||
const cloneVoiceButton = document.getElementById('elevenlabs_tts_clone_voice_button');
|
||||
const uploadAudioFileButton = document.getElementById('upload_audio_file');
|
||||
const voiceCloningNameInput = /** @type {HTMLInputElement} */ (document.getElementById('elevenlabs_tts_voice_cloning_name'));
|
||||
const voiceCloningDescriptionInput = /** @type {HTMLInputElement} */ (document.getElementById('elevenlabs_tts_voice_cloning_description'));
|
||||
const voiceCloningLabelsInput = /** @type {HTMLInputElement} */ (document.getElementById('elevenlabs_tts_voice_cloning_labels'));
|
||||
|
||||
const updateCloneVoiceButtonVisibility = () => {
|
||||
cloneVoiceButton.style.display = audioFilesInput.files.length > 0 ? 'inline-block' : 'none';
|
||||
};
|
||||
|
||||
const clearSelectedFiles = () => {
|
||||
audioFilesInput.value = '';
|
||||
selectedFilesListElement.innerHTML = '';
|
||||
updateCloneVoiceButtonVisibility();
|
||||
};
|
||||
|
||||
uploadAudioFileButton.addEventListener('click', () => {
|
||||
audioFilesInput.click();
|
||||
});
|
||||
|
||||
audioFilesInput.addEventListener('change', () => {
|
||||
selectedFilesListElement.innerHTML = '';
|
||||
for (const file of audioFilesInput.files) {
|
||||
const listItem = document.createElement('div');
|
||||
listItem.textContent = file.name;
|
||||
selectedFilesListElement.appendChild(listItem);
|
||||
}
|
||||
updateCloneVoiceButtonVisibility();
|
||||
});
|
||||
|
||||
cloneVoiceButton.addEventListener('click', async () => {
|
||||
const voiceName = voiceCloningNameInput.value.trim();
|
||||
const voiceDescription = voiceCloningDescriptionInput.value.trim();
|
||||
const voiceLabels = voiceCloningLabelsInput.value.trim();
|
||||
|
||||
if (!voiceName) {
|
||||
toastr.error('Please provide a name for the cloned voice.');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.addVoice(voiceName, voiceDescription, voiceLabels);
|
||||
toastr.success('Voice cloned successfully. Hit reload to see the new voice in the voice listing.');
|
||||
clearSelectedFiles();
|
||||
voiceCloningNameInput.value = '';
|
||||
voiceCloningDescriptionInput.value = '';
|
||||
voiceCloningLabelsInput.value = '';
|
||||
} catch (error) {
|
||||
toastr.error(`Failed to clone voice: ${error.message}`);
|
||||
}
|
||||
});
|
||||
|
||||
updateCloneVoiceButtonVisibility();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get voice object by name
|
||||
* @param {string} voiceName Voice name to look up
|
||||
* @returns {Promise<Object>} Voice object
|
||||
*/
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
elevenVoice => elevenVoice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found in ElevenLabs account`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate TTS audio
|
||||
* @param {string} text Text to synthesize
|
||||
* @param {string} voiceId Voice ID to use for synthesis
|
||||
* @returns {Promise<Response>} Response object containing audio data
|
||||
*/
|
||||
async generateTts(text, voiceId) {
|
||||
const historyId = await this.findTtsGenerationInHistory(text, voiceId);
|
||||
|
||||
if (historyId) {
|
||||
console.debug(`Found existing TTS generation with id ${historyId}`);
|
||||
return await this.fetchTtsFromHistory(historyId);
|
||||
} else {
|
||||
console.debug('No existing TTS generation found, requesting new generation');
|
||||
return await this.fetchTtsGeneration(text, voiceId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find existing TTS generation in history
|
||||
* @param {string} message Message text used for TTS generation
|
||||
* @param {string} voiceId Voice ID used for TTS generation
|
||||
* @returns {Promise<string>} History item ID if found, empty string otherwise
|
||||
*/
|
||||
async findTtsGenerationInHistory(message, voiceId) {
|
||||
const ttsHistory = await this.fetchTtsHistory();
|
||||
for (const history of ttsHistory) {
|
||||
const text = history.text;
|
||||
const itemId = history.history_item_id;
|
||||
if (message === text && history.voice_id == voiceId) {
|
||||
console.info(`Existing TTS history item ${itemId} found: ${text} `);
|
||||
return itemId;
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await fetch('/api/speech/elevenlabs/voices', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders({ omitContentType: true }),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}. See server console for details.`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
return responseJson.voices;
|
||||
}
|
||||
|
||||
async fetchTtsVoiceSettings() {
|
||||
const response = await fetch('/api/speech/elevenlabs/voice-settings', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders({ omitContentType: true }),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}. See server console for details.`);
|
||||
}
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch new TTS generation from ElevenLabs API
|
||||
* @param {string} text Text to synthesize
|
||||
* @param {string} voiceId Voice ID to use for synthesis
|
||||
* @returns {Promise<Response>} Response object containing audio data
|
||||
*/
|
||||
async fetchTtsGeneration(text, voiceId) {
|
||||
let model = this.settings.model ?? 'eleven_monolingual_v1';
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}, model ${model}`);
|
||||
const request = {
|
||||
model_id: model,
|
||||
text: text,
|
||||
voice_settings: {
|
||||
stability: Number(this.settings.stability),
|
||||
similarity_boost: Number(this.settings.similarity_boost),
|
||||
speed: Number(this.settings.speed),
|
||||
},
|
||||
};
|
||||
if (this.shouldInvolveExtendedSettings()) {
|
||||
request.voice_settings.style = Number(this.settings.style_exaggeration);
|
||||
request.voice_settings.use_speaker_boost = Boolean(this.settings.speaker_boost);
|
||||
}
|
||||
const response = await fetch('/api/speech/elevenlabs/synthesize', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
voiceId: voiceId,
|
||||
request: request,
|
||||
}),
|
||||
});
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}. See server console for details.`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch existing TTS audio from history
|
||||
* @param {string} historyItemId History item ID to fetch audio for
|
||||
* @returns {Promise<Response>} Response object containing audio data
|
||||
*/
|
||||
async fetchTtsFromHistory(historyItemId) {
|
||||
console.info(`Fetched existing TTS with history_item_id ${historyItemId}`);
|
||||
const response = await fetch('/api/speech/elevenlabs/history-audio', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
historyItemId: historyItemId,
|
||||
}),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}. See server console for details.`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch TTS generation history
|
||||
* @returns {Promise<Array>} Array of TTS history items
|
||||
*/
|
||||
async fetchTtsHistory() {
|
||||
const response = await fetch('/api/speech/elevenlabs/history', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders({ omitContentType: true }),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}. See server console for details.`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
return responseJson.history;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new voice via ElevenLabs API
|
||||
* @param {string} name Voice name
|
||||
* @param {string} description Voice description
|
||||
* @param {string} labels Voice labels
|
||||
* @returns {Promise<Object>} Newly created voice object
|
||||
*/
|
||||
async addVoice(name, description, labels) {
|
||||
const audioFilesInput = /** @type {HTMLInputElement} */ (document.getElementById('elevenlabs_tts_audio_files'));
|
||||
if (!(audioFilesInput instanceof HTMLInputElement) || audioFilesInput.files.length === 0) {
|
||||
throw new Error('No audio files selected for voice cloning.');
|
||||
}
|
||||
|
||||
const data = {
|
||||
name: name,
|
||||
description: description,
|
||||
labels: labels,
|
||||
files: [],
|
||||
};
|
||||
|
||||
for (const file of audioFilesInput.files) {
|
||||
const base64Data = await getBase64Async(file);
|
||||
data.files.push(base64Data);
|
||||
}
|
||||
|
||||
const response = await fetch('/api/speech/elevenlabs/voices/add', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify(data),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}. See server console for details.`);
|
||||
}
|
||||
|
||||
return await response.json();
|
||||
}
|
||||
}
|
||||
195
data/st-core-scripts/scripts/extensions/tts/google-native.js
Normal file
195
data/st-core-scripts/scripts/extensions/tts/google-native.js
Normal file
@@ -0,0 +1,195 @@
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
import { oai_settings } from '../../openai.js';
|
||||
import { isValidUrl } from '../../utils.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
|
||||
export class GoogleNativeTtsProvider {
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
model: 'gemini-2.5-flash-preview-tts',
|
||||
apiType: 'makersuite',
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
return `
|
||||
<small>Hint: Save an API key in the Google AI Studio/Vertex AI connection settings</small>
|
||||
<div id="google-native-tts-settings">
|
||||
<div>
|
||||
<label for="google-tts-api-type">API Type:</label>
|
||||
<select id="google-tts-api-type">
|
||||
<option value="makersuite">Google AI Studio (MakerSuite)</option>
|
||||
<option value="vertexai" disabled>Google Vertex AI (unsupported)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label for="google-tts-model">Model:</label>
|
||||
<select id="google-tts-model">
|
||||
<option value="gemini-2.5-flash-preview-tts">Gemini 2.5 Flash Preview TTS</option>
|
||||
<option value="gemini-2.5-pro-preview-tts">Gemini 2.5 Pro Preview TTS</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>`;
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
if (Object.keys(settings).length === 0) {
|
||||
console.info('Using default Google TTS Provider settings');
|
||||
}
|
||||
|
||||
this.settings = { ...this.defaultSettings, ...settings };
|
||||
|
||||
$('#google-tts-api-type').val(this.settings.apiType);
|
||||
$('#google-tts-model').val(this.settings.model);
|
||||
|
||||
$('#google-tts-api-type, #google-tts-model').on('change', () => this.onSettingsChange());
|
||||
|
||||
try {
|
||||
await this.checkReady();
|
||||
console.debug('Google TTS: Settings loaded');
|
||||
} catch (err) {
|
||||
console.warn('Google TTS: Settings loaded, but not ready.', err.message);
|
||||
}
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
this.settings.apiType = $('#google-tts-api-type').val();
|
||||
this.settings.model = $('#google-tts-model').val();
|
||||
|
||||
this.voices = []; // Reset voices cache so it re-fetches
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
await this.checkReady();
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length === 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
const match = this.voices.find(voice => voice.name === voiceName || voice.voice_id === voiceName);
|
||||
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
return await this.fetchNativeTtsGeneration(text, voiceId);
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
try {
|
||||
const response = await fetch('/api/google/list-native-voices', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
let errorMessage = `HTTP ${response.status}: ${response.statusText}`;
|
||||
|
||||
try {
|
||||
const errorJson = await response.json();
|
||||
if (errorJson.error) {
|
||||
errorMessage = errorJson.error;
|
||||
}
|
||||
} catch (parseError) {
|
||||
// Response isn't valid JSON, use the HTTP error message
|
||||
console.debug('Error response is not JSON:', parseError.message);
|
||||
}
|
||||
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
|
||||
const responseJson = await response.json();
|
||||
|
||||
if (!responseJson.voices || !Array.isArray(responseJson.voices)) {
|
||||
throw new Error('Invalid response format: voices array not found');
|
||||
}
|
||||
|
||||
this.voices = responseJson.voices;
|
||||
console.info(`Google TTS: Loaded ${this.voices.length} voices`);
|
||||
|
||||
return this.voices;
|
||||
|
||||
} catch (error) {
|
||||
console.error('Failed to fetch Google TTS voices:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
|
||||
try {
|
||||
const voice = await this.getVoice(id);
|
||||
const text = getPreviewString(voice.lang || 'en-US');
|
||||
|
||||
const response = await this.fetchNativeTtsGeneration(text, id);
|
||||
|
||||
if (!response.ok) {
|
||||
// Error is handled inside the fetch function, but we still need to stop here
|
||||
return;
|
||||
}
|
||||
|
||||
const audioBlob = await response.blob();
|
||||
const url = URL.createObjectURL(audioBlob);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
|
||||
} catch (error) {
|
||||
console.error('TTS Preview Error:', error);
|
||||
toastr.error(`Could not generate preview: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
async fetchNativeTtsGeneration(text, voiceId) {
|
||||
console.info(`Generating native Google TTS for voice_id ${voiceId}`);
|
||||
const useReverseProxy = oai_settings.reverse_proxy && isValidUrl(oai_settings.reverse_proxy);
|
||||
|
||||
const response = await fetch('/api/google/generate-native-tts', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
text: text,
|
||||
voice: voiceId,
|
||||
model: this.settings.model,
|
||||
api: this.settings.apiType,
|
||||
reverse_proxy: useReverseProxy ? oai_settings.reverse_proxy : '',
|
||||
proxy_password: useReverseProxy ? oai_settings.proxy_password : '',
|
||||
vertexai_auth_mode: oai_settings.vertexai_auth_mode,
|
||||
vertexai_region: oai_settings.vertexai_region,
|
||||
vertexai_express_project_id: oai_settings.vertexai_express_project_id,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
let errorMessage = `HTTP ${response.status}: ${response.statusText}`;
|
||||
try {
|
||||
const errorJson = await response.json();
|
||||
if (errorJson.error) {
|
||||
errorMessage = errorJson.error;
|
||||
}
|
||||
} catch {
|
||||
// Not a JSON response, do nothing and keep the original http error
|
||||
}
|
||||
throw new Error(errorMessage);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
}
|
||||
140
data/st-core-scripts/scripts/extensions/tts/google-translate.js
Normal file
140
data/st-core-scripts/scripts/extensions/tts/google-translate.js
Normal file
@@ -0,0 +1,140 @@
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
import { splitRecursive } from '../../utils.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
export { GoogleTranslateTtsProvider };
|
||||
|
||||
class GoogleTranslateTtsProvider {
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
region: '',
|
||||
voiceMap: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
return '';
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
this.voices = [];
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await this.checkReady();
|
||||
console.debug('Google Translate TTS: Settings loaded');
|
||||
} catch {
|
||||
console.debug('Google Translate TTS: Settings loaded, but not ready');
|
||||
}
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
await this.checkReady();
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
voice => voice.name == voiceName || voice.voice_id == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await fetch('/api/google/list-voices', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
let responseJson = await response.json();
|
||||
responseJson = Object.entries(responseJson)
|
||||
.sort((a, b) => a[1].localeCompare(b[1]))
|
||||
.map(x => ({ name: x[1], voice_id: x[0], preview_url: false, lang: x[0] }));
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} id Voice ID
|
||||
*/
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const voice = await this.getVoice(id);
|
||||
const text = getPreviewString(voice.lang);
|
||||
const response = await this.fetchTtsGeneration(text, id);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(text, voiceId) {
|
||||
const response = await fetch('/api/google/generate-voice', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
text: splitRecursive(text, 200),
|
||||
voice: voiceId,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
||||
223
data/st-core-scripts/scripts/extensions/tts/gpt-sovits-v2.js
Normal file
223
data/st-core-scripts/scripts/extensions/tts/gpt-sovits-v2.js
Normal file
@@ -0,0 +1,223 @@
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { GptSovitsV2Provider };
|
||||
|
||||
class GptSovitsV2Provider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = '. ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
return text;
|
||||
}
|
||||
|
||||
audioFormats = ['wav', 'ogg', 'silk', 'mp3', 'flac'];
|
||||
|
||||
languageLabels = {
|
||||
'Auto': 'auto',
|
||||
};
|
||||
|
||||
langKey2LangCode = {
|
||||
'zh': 'zh-CN',
|
||||
'en': 'en-US',
|
||||
'ja': 'ja-JP',
|
||||
'ko': 'ko-KR',
|
||||
};
|
||||
|
||||
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://localhost:9880',
|
||||
format: 'wav',
|
||||
lang: 'auto',
|
||||
streaming: false,
|
||||
text_lang: 'zh',
|
||||
prompt_lang: 'zh',
|
||||
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
|
||||
<label for="tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="tts_endpoint" type="text" class="text_pole" maxlength="250" height="300" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
<span>Use <a target="_blank" href="https://github.com/v3ucn/GPT-SoVITS-V2">GPT-SoVITS-V2</a>(Unofficial).</span><br/>
|
||||
<label for="text_lang">Text Lang(Inference text language):</label>
|
||||
<input id="text_lang" type="text" class="text_pole" maxlength="250" height="300" value="${this.defaultSettings.text_lang}"/>
|
||||
<label for="text_lang">Prompt Lang(Reference audio text language):</label>
|
||||
<input id="prompt_lang" type="text" class="text_pole" maxlength="250" height="300" value="${this.defaultSettings.prompt_lang}"/>
|
||||
<br/>
|
||||
|
||||
`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#tts_endpoint').val();
|
||||
this.settings.text_lang = $('#text_lang').val();
|
||||
this.settings.prompt_lang = $('#prompt_lang').val();
|
||||
|
||||
|
||||
saveTtsProviderSettings();
|
||||
this.changeTTSSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
console.debug(`Ignoring non-user-configurable setting: ${key}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Set initial values from the settings
|
||||
$('#tts_endpoint').val(this.settings.provider_endpoint).on('change', this.onSettingsChange.bind(this));
|
||||
$('#text_lang').val(this.settings.text_lang).on('change', this.onSettingsChange.bind(this));
|
||||
$('#prompt_lang').val(this.settings.prompt_lang).on('change', this.onSettingsChange.bind(this));
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.info('ITS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return await this.checkReady();
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
|
||||
|
||||
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
|
||||
|
||||
const match = this.voices.filter(
|
||||
v => v.name == voiceName,
|
||||
)[0];
|
||||
console.log(match);
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await fetch(`${this.settings.provider_endpoint}/speakers`);
|
||||
console.info(response);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
|
||||
|
||||
this.voices = responseJson;
|
||||
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
// Each time a parameter is changed, we change the configuration
|
||||
async changeTTSSettings() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch TTS generation from the API.
|
||||
* @param {string} inputText Text to generate TTS for
|
||||
* @param {string} voiceId Voice ID to use (model_type&speaker_id))
|
||||
* @returns {Promise<Response|string>} Fetch response
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId, lang = null, forceNoStreaming = false) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
function replaceSpeaker(text) {
|
||||
return text.replace(/\[.*?\]/gu, '');
|
||||
}
|
||||
|
||||
let prompt_text = replaceSpeaker(voiceId);
|
||||
|
||||
const params = {
|
||||
text: inputText,
|
||||
prompt_text: prompt_text,
|
||||
ref_audio_path: './参考音频/' + voiceId + '.wav',
|
||||
text_lang: this.settings.text_lang,
|
||||
prompt_lang: this.settings.prompt_lang,
|
||||
text_split_method: 'cut5',
|
||||
batch_size: 1,
|
||||
media_type: 'ogg',
|
||||
streaming_mode: 'true',
|
||||
};
|
||||
|
||||
|
||||
const url = `${this.settings.provider_endpoint}/`;
|
||||
|
||||
const response = await fetch(
|
||||
url,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(params), // Convert parameter objects to JSON strings
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Interface not used
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
}
|
||||
267
data/st-core-scripts/scripts/extensions/tts/gsvi.js
Normal file
267
data/st-core-scripts/scripts/extensions/tts/gsvi.js
Normal file
@@ -0,0 +1,267 @@
|
||||
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { GSVITtsProvider };
|
||||
|
||||
class GSVITtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
separator = '. ';
|
||||
|
||||
characterList = {};
|
||||
voices = [];
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
text = text.replace('<br>', '\n'); // Replace <br> with newline
|
||||
return text;
|
||||
}
|
||||
|
||||
languageLabels = {
|
||||
'Multilingual': '多语种混合',
|
||||
'Chinese': '中文',
|
||||
'English': '英文',
|
||||
'Japanese': '日文',
|
||||
'Chinese-English': '中英混合',
|
||||
'Japanese-English': '日英混合',
|
||||
};
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://127.0.0.1:5000',
|
||||
|
||||
language: '多语种混合',
|
||||
|
||||
cha_name: '',
|
||||
character_emotion: 'default',
|
||||
|
||||
speed: 1,
|
||||
|
||||
top_k: 6,
|
||||
top_p: 0.85,
|
||||
temperature: 0.75,
|
||||
batch_size: 10,
|
||||
|
||||
stream: false,
|
||||
stream_chunk_size: 100,
|
||||
};
|
||||
|
||||
// Added new methods to obtain characters and emotions
|
||||
async fetchCharacterList() {
|
||||
const response = await fetch(this.settings.provider_endpoint + '/character_list');
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
const characterList = await response.json();
|
||||
this.characterList = characterList;
|
||||
this.voices = Object.keys(characterList);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="gsvi_api_language">Text Language</label>
|
||||
<select id="gsvi_api_language">`;
|
||||
|
||||
for (let language in this.languageLabels) {
|
||||
if (this.languageLabels[language] == this.settings?.language) {
|
||||
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
|
||||
continue;
|
||||
}
|
||||
|
||||
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
|
||||
}
|
||||
|
||||
html += `
|
||||
</select>
|
||||
<label>GSVI Settings:</label><br/>
|
||||
<label for="gsvi_tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="gsvi_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
|
||||
|
||||
<label for="gsvi_speed">Speed: <span id="gsvi_tts_speed_output">${this.defaultSettings.speed}</span></label>
|
||||
<input id="gsvi_speed" type="range" value="${this.defaultSettings.speed}" min="0.5" max="2" step="0.01" />
|
||||
|
||||
<label for="gsvi_top_k">Top K: <span id="gsvi_top_k_output">${this.defaultSettings.top_k}</span></label>
|
||||
<input id="gsvi_top_k" type="range" value="${this.defaultSettings.top_k}" min="0" max="100" step="1" />
|
||||
|
||||
<label for="gsvi_top_p">Top P: <span id="gsvi_top_p_output">${this.defaultSettings.top_p}</span></label>
|
||||
<input id="gsvi_top_p" type="range" value="${this.defaultSettings.top_p}" min="0" max="1" step="0.01" />
|
||||
|
||||
<label for="gsvi_temperature">Temperature: <span id="gsvi_tts_temperature_output">${this.defaultSettings.temperature}</span></label>
|
||||
<input id="gsvi_temperature" type="range" value="${this.defaultSettings.temperature}" min="0.01" max="1" step="0.01" />
|
||||
|
||||
<label for="gsvi_batch_size">Batch Size: <span id="gsvi_batch_size_output">${this.defaultSettings.batch_size}</span></label>
|
||||
<input id="gsvi_batch_size" type="range" value="${this.defaultSettings.batch_size}" min="1" max="35" step="1" />
|
||||
|
||||
<label for="gsvi_tts_streaming" class="checkbox_label">
|
||||
<input id="gsvi_tts_streaming" type="checkbox" ${this.defaultSettings.stream ? 'checked' : ''}/>
|
||||
<span>Streaming</span>
|
||||
</label>
|
||||
|
||||
<label for="gsvi_stream_chunk_size">Stream Chunk Size: <span id="gsvi_stream_chunk_size_output">${this.defaultSettings.stream_chunk_size}</span></label>
|
||||
<input id="gsvi_stream_chunk_size" type="range" value="${this.defaultSettings.stream_chunk_size}" min="100" max="400" step="1" />
|
||||
<p>
|
||||
For more information, visit the
|
||||
<a href="https://github.com/X-T-E-R/GPT-SoVITS-Inference" target="_blank">GSVI project page</a>.
|
||||
</p>
|
||||
`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update provider settings based on input fields
|
||||
this.settings.provider_endpoint = $('#gsvi_tts_endpoint').val();
|
||||
this.settings.language = $('#gsvi_api_language').val();
|
||||
|
||||
|
||||
// Update the rest of TTS settings based on input fields
|
||||
this.settings.speed = parseFloat($('#gsvi_speed').val());
|
||||
this.settings.temperature = parseFloat($('#gsvi_temperature').val());
|
||||
this.settings.top_k = parseInt($('#gsvi_top_k').val(), 10);
|
||||
this.settings.top_p = parseFloat($('#gsvi_top_p').val());
|
||||
this.settings.batch_size = parseInt($('#gsvi_batch_size').val(), 10);
|
||||
this.settings.stream = $('#gsvi_tts_streaming').is(':checked');
|
||||
this.settings.stream_chunk_size = parseInt($('#gsvi_stream_chunk_size').val(), 10);
|
||||
|
||||
// Update UI to reflect changes
|
||||
|
||||
$('#gsvi_tts_speed_output').text(this.settings.speed);
|
||||
$('#gsvi_tts_temperature_output').text(this.settings.temperature);
|
||||
$('#gsvi_top_k_output').text(this.settings.top_k);
|
||||
$('#gsvi_top_p_output').text(this.settings.top_p);
|
||||
$('#gsvi_stream_chunk_size_output').text(this.settings.stream_chunk_size);
|
||||
$('#gsvi_batch_size_output').text(this.settings.batch_size);
|
||||
|
||||
|
||||
|
||||
|
||||
// Persist settings changes
|
||||
saveTtsProviderSettings();
|
||||
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length === 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = { ...this.defaultSettings, ...settings };
|
||||
|
||||
// Fetch character and emotion list
|
||||
// Set initial values from the settings
|
||||
$('#gsvi_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#gsvi_api_language').val(this.settings.language);
|
||||
|
||||
$('#gsvi_speed').val(this.settings.speed);
|
||||
$('#gsvi_temperature').val(this.settings.temperature);
|
||||
$('#gsvi_top_k').val(this.settings.top_k);
|
||||
$('#gsvi_top_p').val(this.settings.top_p);
|
||||
$('#gsvi_batch_size').val(this.settings.batch_size);
|
||||
$('#gsvi_tts_streaming').prop('checked', this.settings.stream);
|
||||
$('#gsvi_stream_chunk_size').val(this.settings.stream_chunk_size);
|
||||
|
||||
// Update UI to reflect initial settings
|
||||
$('#gsvi_tts_speed_output').text(this.settings.speed);
|
||||
$('#gsvi_tts_temperature_output').text(this.settings.temperature);
|
||||
$('#gsvi_top_k_output').text(this.settings.top_k);
|
||||
$('#gsvi_top_p_output').text(this.settings.top_p);
|
||||
$('#gsvi_stream_chunk_size_output').text(this.settings.stream_chunk_size);
|
||||
|
||||
// Register event listeners to update settings on user interaction
|
||||
// (Similar to before, ensure event listeners for character and emotion selection are included)
|
||||
// Register input/change event listeners to update settings on user interaction
|
||||
$('#gsvi_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_api_language').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#gsvi_speed').on('input', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_temperature').on('input', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_top_k').on('input', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_top_p').on('input', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_batch_size').on('input', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_tts_streaming').on('change', () => { this.onSettingsChange(); });
|
||||
$('#gsvi_stream_chunk_size').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
await this.checkReady();
|
||||
console.debug('GSVI: Settings loaded');
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await Promise.allSettled([this.fetchCharacterList()]);
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.fetchCharacterList();
|
||||
}
|
||||
if (!this.voices.includes(voiceName)) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return { name: voiceName, voice_id: voiceName, preview_url: false, lang: 'zh-CN' };
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
if (this.voices.length == 0) {
|
||||
await this.fetchCharacterList();
|
||||
}
|
||||
console.log(this.voices);
|
||||
const voices = this.voices.map(x => ({ name: x, voice_id: x, preview_url: false, lang: 'zh-CN' }));
|
||||
return voices;
|
||||
}
|
||||
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
|
||||
const params = new URLSearchParams();
|
||||
params.append('text', inputText);
|
||||
params.append('cha_name', voiceId);
|
||||
params.append('text_language', this.settings.language);
|
||||
params.append('batch_size', this.settings.batch_size.toString());
|
||||
params.append('speed', this.settings.speed.toString());
|
||||
params.append('top_k', this.settings.top_k.toString());
|
||||
params.append('top_p', this.settings.top_p.toString());
|
||||
params.append('temperature', this.settings.temperature.toString());
|
||||
params.append('stream', this.settings.stream.toString());
|
||||
|
||||
|
||||
return `${this.settings.provider_endpoint}/tts?${params.toString()}`;
|
||||
|
||||
}
|
||||
|
||||
// Interface not used by GSVI TTS
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
|
||||
}
|
||||
1521
data/st-core-scripts/scripts/extensions/tts/index.js
Normal file
1521
data/st-core-scripts/scripts/extensions/tts/index.js
Normal file
File diff suppressed because it is too large
Load Diff
113
data/st-core-scripts/scripts/extensions/tts/kokoro-worker.js
Normal file
113
data/st-core-scripts/scripts/extensions/tts/kokoro-worker.js
Normal file
@@ -0,0 +1,113 @@
|
||||
// kokoro-worker.js
|
||||
/** @type {import('./lib/kokoro.web.js').KokoroTTS} */
|
||||
let tts = null;
|
||||
/** @type {boolean} */
|
||||
let ready = false;
|
||||
/** @type {string[]} */
|
||||
let voices = [];
|
||||
|
||||
// Handle messages from the main thread
|
||||
self.onmessage = async function(e) {
|
||||
const { action, data } = e.data;
|
||||
|
||||
switch (action) {
|
||||
case 'initialize':
|
||||
try {
|
||||
const result = await initializeTts(data);
|
||||
self.postMessage({
|
||||
action: 'initialized',
|
||||
success: result,
|
||||
voices,
|
||||
});
|
||||
} catch (error) {
|
||||
self.postMessage({
|
||||
action: 'initialized',
|
||||
success: false,
|
||||
error: error.message,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case 'generateTts':
|
||||
try {
|
||||
const audioBlob = await generateTts(data.text, data.voice, data.speakingRate);
|
||||
const blobUrl = URL.createObjectURL(audioBlob);
|
||||
self.postMessage({
|
||||
action: 'generatedTts',
|
||||
success: true,
|
||||
blobUrl,
|
||||
requestId: data.requestId,
|
||||
});
|
||||
} catch (error) {
|
||||
self.postMessage({
|
||||
action: 'generatedTts',
|
||||
success: false,
|
||||
error: error.message,
|
||||
requestId: data.requestId,
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case 'checkReady':
|
||||
self.postMessage({ action: 'readyStatus', ready });
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
// Initialize the TTS engine
|
||||
async function initializeTts(settings) {
|
||||
try {
|
||||
const { KokoroTTS } = await import('./lib/kokoro.web.js');
|
||||
|
||||
console.log('Worker: Initializing Kokoro TTS with settings:', {
|
||||
modelId: settings.modelId,
|
||||
dtype: settings.dtype,
|
||||
device: settings.device,
|
||||
});
|
||||
|
||||
// Create TTS instance
|
||||
tts = await KokoroTTS.from_pretrained(settings.modelId, {
|
||||
dtype: settings.dtype,
|
||||
device: settings.device,
|
||||
});
|
||||
|
||||
// Get available voices
|
||||
voices = Object.keys(tts.voices);
|
||||
|
||||
// Check if generate method exists
|
||||
if (typeof tts.generate !== 'function') {
|
||||
throw new Error('TTS instance does not have generate method');
|
||||
}
|
||||
|
||||
console.log('Worker: TTS initialized successfully');
|
||||
ready = true;
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Worker: Kokoro TTS initialization failed:', error);
|
||||
ready = false;
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// Generate TTS audio
|
||||
async function generateTts(text, voiceId, speakingRate) {
|
||||
if (!ready || !tts) {
|
||||
throw new Error('TTS engine not initialized');
|
||||
}
|
||||
|
||||
if (text.trim().length === 0) {
|
||||
throw new Error('Empty text');
|
||||
}
|
||||
|
||||
try {
|
||||
const audio = await tts.generate(text, {
|
||||
voice: voiceId,
|
||||
speed: speakingRate || 1.0,
|
||||
});
|
||||
|
||||
return audio.toBlob();
|
||||
} catch (error) {
|
||||
console.error('Worker: TTS generation failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
352
data/st-core-scripts/scripts/extensions/tts/kokoro.js
Normal file
352
data/st-core-scripts/scripts/extensions/tts/kokoro.js
Normal file
@@ -0,0 +1,352 @@
|
||||
import { debounce_timeout } from '../../constants.js';
|
||||
import { debounceAsync, splitRecursive } from '../../utils.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export class KokoroTtsProvider {
|
||||
constructor() {
|
||||
this.settings = {
|
||||
modelId: 'onnx-community/Kokoro-82M-v1.0-ONNX',
|
||||
dtype: 'q8',
|
||||
device: 'wasm',
|
||||
voiceMap: {},
|
||||
defaultVoice: 'af_heart',
|
||||
speakingRate: 1.0,
|
||||
};
|
||||
this.ready = false;
|
||||
this.voices = [
|
||||
'af_heart',
|
||||
'af_alloy',
|
||||
'af_aoede',
|
||||
'af_bella',
|
||||
'af_jessica',
|
||||
'af_kore',
|
||||
'af_nicole',
|
||||
'af_nova',
|
||||
'af_river',
|
||||
'af_sarah',
|
||||
'af_sky',
|
||||
'am_adam',
|
||||
'am_echo',
|
||||
'am_eric',
|
||||
'am_fenrir',
|
||||
'am_liam',
|
||||
'am_michael',
|
||||
'am_onyx',
|
||||
'am_puck',
|
||||
'am_santa',
|
||||
'bf_emma',
|
||||
'bf_isabella',
|
||||
'bm_george',
|
||||
'bm_lewis',
|
||||
'bf_alice',
|
||||
'bf_lily',
|
||||
'bm_daniel',
|
||||
'bm_fable',
|
||||
];
|
||||
this.worker = null;
|
||||
this.separator = ' ... ... ... ';
|
||||
this.pendingRequests = new Map();
|
||||
this.nextRequestId = 1;
|
||||
|
||||
// Update display values immediately but only reinitialize TTS after a delay
|
||||
this.initTtsDebounced = debounceAsync(this.initializeWorker.bind(this), debounce_timeout.relaxed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
// TILDE!
|
||||
text = text.replace(/~/g, '.');
|
||||
return text;
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
if (settings.modelId !== undefined) this.settings.modelId = settings.modelId;
|
||||
if (settings.dtype !== undefined) this.settings.dtype = settings.dtype;
|
||||
if (settings.device !== undefined) this.settings.device = settings.device;
|
||||
if (settings.voiceMap !== undefined) this.settings.voiceMap = settings.voiceMap;
|
||||
if (settings.defaultVoice !== undefined) this.settings.defaultVoice = settings.defaultVoice;
|
||||
if (settings.speakingRate !== undefined) this.settings.speakingRate = settings.speakingRate;
|
||||
|
||||
$('#kokoro_model_id').val(this.settings.modelId).on('input', this.onSettingsChange.bind(this));
|
||||
$('#kokoro_dtype').val(this.settings.dtype).on('change', this.onSettingsChange.bind(this));
|
||||
$('#kokoro_device').val(this.settings.device).on('change', this.onSettingsChange.bind(this));
|
||||
$('#kokoro_speaking_rate').val(this.settings.speakingRate).on('input', this.onSettingsChange.bind(this));
|
||||
$('#kokoro_speaking_rate_output').text(this.settings.speakingRate + 'x');
|
||||
}
|
||||
|
||||
initializeWorker() {
|
||||
return new Promise((resolve, reject) => {
|
||||
try {
|
||||
// Terminate the existing worker if it exists
|
||||
if (this.worker) {
|
||||
this.worker.terminate();
|
||||
$('#kokoro_status_text').text('Initializing...').removeAttr('style');
|
||||
}
|
||||
|
||||
// Create a new worker
|
||||
this.worker = new Worker(new URL('./kokoro-worker.js', import.meta.url), { type: 'module' });
|
||||
|
||||
// Set up message handling
|
||||
this.worker.onmessage = this.handleWorkerMessage.bind(this);
|
||||
|
||||
// Initialize the worker with the current settings
|
||||
this.worker.postMessage({
|
||||
action: 'initialize',
|
||||
data: {
|
||||
modelId: this.settings.modelId,
|
||||
dtype: this.settings.dtype,
|
||||
device: this.settings.device,
|
||||
},
|
||||
});
|
||||
|
||||
// Create a promise that will resolve when initialization completes
|
||||
const initPromise = new Promise((initResolve, initReject) => {
|
||||
const timeoutId = setTimeout(() => {
|
||||
initReject(new Error('Worker initialization timed out'));
|
||||
}, 600000); // 600 second timeout
|
||||
|
||||
this.pendingRequests.set('initialization', {
|
||||
resolve: (result) => {
|
||||
clearTimeout(timeoutId);
|
||||
initResolve(result);
|
||||
},
|
||||
reject: (error) => {
|
||||
clearTimeout(timeoutId);
|
||||
initReject(error);
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
// Resolve the outer promise when initialization completes
|
||||
initPromise.then(success => {
|
||||
this.ready = success;
|
||||
this.updateStatusDisplay();
|
||||
resolve(success);
|
||||
}).catch(error => {
|
||||
console.error('Worker initialization failed:', error);
|
||||
this.ready = false;
|
||||
this.updateStatusDisplay();
|
||||
reject(error);
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Failed to create worker:', error);
|
||||
this.ready = false;
|
||||
this.updateStatusDisplay();
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
handleWorkerMessage(event) {
|
||||
const { action, success, ready, error, requestId, blobUrl } = event.data;
|
||||
|
||||
switch (action) {
|
||||
case 'initialized': {
|
||||
const initRequest = this.pendingRequests.get('initialization');
|
||||
if (initRequest) {
|
||||
if (success) {
|
||||
initRequest.resolve(true);
|
||||
} else {
|
||||
initRequest.reject(new Error(error || 'Initialization failed'));
|
||||
}
|
||||
this.pendingRequests.delete('initialization');
|
||||
}
|
||||
} break;
|
||||
case 'generatedTts': {
|
||||
const request = this.pendingRequests.get(requestId);
|
||||
if (request) {
|
||||
if (success) {
|
||||
fetch(blobUrl).then(response => response.blob()).then(audioBlob => {
|
||||
// Clean up the blob URL
|
||||
URL.revokeObjectURL(blobUrl);
|
||||
|
||||
request.resolve(new Response(audioBlob, {
|
||||
headers: {
|
||||
'Content-Type': 'audio/wav',
|
||||
},
|
||||
}));
|
||||
}).catch(error => {
|
||||
request.reject(new Error('Failed to fetch TTS audio blob: ' + error));
|
||||
});
|
||||
} else {
|
||||
request.reject(new Error(error || 'TTS generation failed'));
|
||||
}
|
||||
this.pendingRequests.delete(requestId);
|
||||
}
|
||||
} break;
|
||||
case 'readyStatus':
|
||||
this.ready = ready;
|
||||
this.updateStatusDisplay();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
updateStatusDisplay() {
|
||||
const statusText = this.ready ? 'Ready' : 'Failed';
|
||||
const statusColor = this.ready ? 'green' : 'red';
|
||||
$('#kokoro_status_text').text(statusText).css('color', statusColor);
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
if (!this.worker) {
|
||||
return await this.initializeWorker();
|
||||
}
|
||||
|
||||
this.worker.postMessage({ action: 'checkReady' });
|
||||
return this.ready;
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return await this.initializeWorker();
|
||||
}
|
||||
|
||||
get settingsHtml() {
|
||||
return `
|
||||
<div class="kokoro_tts_settings">
|
||||
<label for="kokoro_model_id">Model ID:</label>
|
||||
<input id="kokoro_model_id" type="text" class="text_pole" value="${this.settings.modelId}" />
|
||||
|
||||
<label for="kokoro_dtype">Data Type:</label>
|
||||
<select id="kokoro_dtype" class="text_pole">
|
||||
<option value="q8" ${this.settings.dtype === 'q8' ? 'selected' : ''}>q8 (Recommended)</option>
|
||||
<option value="fp32" ${this.settings.dtype === 'fp32' ? 'selected' : ''}>fp32 (High Precision)</option>
|
||||
<option value="fp16" ${this.settings.dtype === 'fp16' ? 'selected' : ''}>fp16</option>
|
||||
<option value="q4" ${this.settings.dtype === 'q4' ? 'selected' : ''}>q4 (Low Memory)</option>
|
||||
<option value="q4f16" ${this.settings.dtype === 'q4f16' ? 'selected' : ''}>q4f16</option>
|
||||
</select>
|
||||
|
||||
<label for="kokoro_device">Device:</label>
|
||||
<select id="kokoro_device" class="text_pole">
|
||||
<option value="wasm" ${this.settings.device === 'wasm' ? 'selected' : ''}>WebAssembly (CPU)</option>
|
||||
<option value="webgpu" ${this.settings.device === 'webgpu' ? 'selected' : ''}>WebGPU (GPU Acceleration)</option>
|
||||
</select>
|
||||
|
||||
<label for="kokoro_speaking_rate">Speaking Rate: <span id="kokoro_speaking_rate_output">${this.settings.speakingRate}x</span></label>
|
||||
<input id="kokoro_speaking_rate" type="range" value="${this.settings.speakingRate}" min="0.5" max="2.0" step="0.1" />
|
||||
|
||||
<hr>
|
||||
<div>
|
||||
Status: <span id="kokoro_status_text">Initializing...</span>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
async onSettingsChange() {
|
||||
this.settings.modelId = $('#kokoro_model_id').val().toString();
|
||||
this.settings.dtype = $('#kokoro_dtype').val().toString();
|
||||
this.settings.device = $('#kokoro_device').val().toString();
|
||||
this.settings.speakingRate = parseFloat($('#kokoro_speaking_rate').val().toString());
|
||||
|
||||
// Update UI display
|
||||
$('#kokoro_speaking_rate_output').text(this.settings.speakingRate + 'x');
|
||||
|
||||
// Reinitialize TTS engine with debounce
|
||||
this.initTtsDebounced();
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
if (!this.ready) {
|
||||
await this.checkReady();
|
||||
}
|
||||
return this.voices.map(voice => ({
|
||||
name: voice,
|
||||
voice_id: voice,
|
||||
preview_url: null,
|
||||
lang: voice.startsWith('b') ? 'en-GB' : 'en-US',
|
||||
}));
|
||||
}
|
||||
|
||||
async previewTtsVoice(voiceId) {
|
||||
if (!this.ready) {
|
||||
await this.checkReady();
|
||||
}
|
||||
|
||||
const voice = this.getVoice(voiceId);
|
||||
const previewText = getPreviewString(voice.lang);
|
||||
for await (const response of this.generateTts(previewText, voiceId)) {
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
await new Promise(resolve => {
|
||||
const audioElement = new Audio();
|
||||
audioElement.src = url;
|
||||
audioElement.play();
|
||||
audioElement.onended = () => resolve();
|
||||
});
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
}
|
||||
|
||||
getVoiceDisplayName(voiceId) {
|
||||
return voiceId;
|
||||
}
|
||||
|
||||
getVoice(voiceName) {
|
||||
const defaultVoice = this.settings.defaultVoice || 'af_heart';
|
||||
const actualVoiceName = this.voices.includes(voiceName) ? voiceName : defaultVoice;
|
||||
return {
|
||||
name: actualVoiceName,
|
||||
voice_id: actualVoiceName,
|
||||
preview_url: null,
|
||||
lang: actualVoiceName.startsWith('b') ? 'en-GB' : 'en-US',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate TTS audio for the given text using the specified voice.
|
||||
* @param {string} text Text to generate
|
||||
* @param {string} voiceId Voice ID
|
||||
* @returns {AsyncGenerator<Response>} Audio response generator
|
||||
*/
|
||||
async* generateTts(text, voiceId) {
|
||||
if (!this.ready || !this.worker) {
|
||||
console.log('TTS not ready, initializing...');
|
||||
await this.initializeWorker();
|
||||
}
|
||||
|
||||
if (!this.ready || !this.worker) {
|
||||
throw new Error('Failed to initialize TTS engine');
|
||||
}
|
||||
|
||||
if (text.trim().length === 0) {
|
||||
throw new Error('Empty text');
|
||||
}
|
||||
|
||||
const voice = this.getVoice(voiceId);
|
||||
const requestId = this.nextRequestId++;
|
||||
|
||||
const chunkSize = 400;
|
||||
const chunks = splitRecursive(text, chunkSize, ['\n\n', '\n', '.', '?', '!', ',', ' ', '']);
|
||||
|
||||
for (const chunk of chunks) {
|
||||
yield await new Promise((resolve, reject) => {
|
||||
// Store the promise callbacks
|
||||
this.pendingRequests.set(requestId, { resolve, reject });
|
||||
|
||||
// Send the request to the worker
|
||||
this.worker.postMessage({
|
||||
action: 'generateTts',
|
||||
data: {
|
||||
text: chunk,
|
||||
voice: voice.voice_id,
|
||||
speakingRate: this.settings.speakingRate || 1.0,
|
||||
requestId,
|
||||
},
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
dispose() {
|
||||
// Clean up the worker when the provider is disposed
|
||||
if (this.worker) {
|
||||
this.worker.terminate();
|
||||
this.worker = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
# kokoro-js
|
||||
|
||||
* Author: hexgrad
|
||||
* NPM: <https://www.npmjs.com/package/kokoro-js>
|
||||
* Version: 1.2.0
|
||||
* License: Apache-2.0
|
||||
|
||||
Last updated: 2025-03-10
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,73 @@
|
||||
class PCMProcessor extends AudioWorkletProcessor {
|
||||
constructor() {
|
||||
super();
|
||||
this.buffer = new Float32Array(24000 * 30); // Pre-allocate buffer for ~30 seconds at 24kHz
|
||||
this.writeIndex = 0;
|
||||
this.readIndex = 0;
|
||||
this.pendingBytes = new Uint8Array(0); // Buffer for incomplete samples
|
||||
this.volume = 1.0; // Default volume (1.0 = 100%, 0.5 = 50%, etc.)
|
||||
this.port.onmessage = (event) => {
|
||||
if (event.data.pcmData) {
|
||||
// Combine any pending bytes with new data
|
||||
const newData = new Uint8Array(event.data.pcmData);
|
||||
const combined = new Uint8Array(this.pendingBytes.length + newData.length);
|
||||
combined.set(this.pendingBytes);
|
||||
combined.set(newData, this.pendingBytes.length);
|
||||
|
||||
// Calculate how many complete 16-bit samples we have
|
||||
const completeSamples = Math.floor(combined.length / 2);
|
||||
const bytesToProcess = completeSamples * 2;
|
||||
|
||||
if (completeSamples > 0) {
|
||||
// Process complete samples
|
||||
const int16Array = new Int16Array(combined.buffer.slice(0, bytesToProcess));
|
||||
|
||||
// Write directly to circular buffer
|
||||
for (let i = 0; i < int16Array.length; i++) {
|
||||
// Expand buffer if needed
|
||||
if (this.writeIndex >= this.buffer.length) {
|
||||
const newBuffer = new Float32Array(this.buffer.length * 2);
|
||||
// Copy existing data maintaining order
|
||||
let sourceIndex = this.readIndex;
|
||||
let targetIndex = 0;
|
||||
while (sourceIndex !== this.writeIndex) {
|
||||
newBuffer[targetIndex++] = this.buffer[sourceIndex];
|
||||
sourceIndex = (sourceIndex + 1) % this.buffer.length;
|
||||
}
|
||||
this.buffer = newBuffer;
|
||||
this.readIndex = 0;
|
||||
this.writeIndex = targetIndex;
|
||||
}
|
||||
|
||||
this.buffer[this.writeIndex] = int16Array[i] / 32768.0; // Convert 16-bit to float
|
||||
this.writeIndex = (this.writeIndex + 1) % this.buffer.length;
|
||||
}
|
||||
}
|
||||
|
||||
// Store any remaining incomplete bytes
|
||||
if (combined.length > bytesToProcess) {
|
||||
this.pendingBytes = combined.slice(bytesToProcess);
|
||||
} else {
|
||||
this.pendingBytes = new Uint8Array(0);
|
||||
}
|
||||
} else if (event.data.volume !== undefined) {
|
||||
// Set volume (0.0 to 1.0, can go higher for amplification)
|
||||
this.volume = Math.max(0, event.data.volume);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
process(inputs, outputs, parameters) {
|
||||
const output = outputs[0];
|
||||
if (output.length > 0 && this.readIndex !== this.writeIndex) {
|
||||
const channelData = output[0];
|
||||
for (let i = 0; i < channelData.length && this.readIndex !== this.writeIndex; i++) {
|
||||
channelData[i] = this.buffer[this.readIndex] * this.volume;
|
||||
this.readIndex = (this.readIndex + 1) % this.buffer.length;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
registerProcessor('pcm-processor', PCMProcessor);
|
||||
15
data/st-core-scripts/scripts/extensions/tts/manifest.json
Normal file
15
data/st-core-scripts/scripts/extensions/tts/manifest.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"display_name": "TTS",
|
||||
"loading_order": 10,
|
||||
"requires": [],
|
||||
"optional": [
|
||||
"silero-tts",
|
||||
"edge-tts",
|
||||
"coqui-tts"
|
||||
],
|
||||
"js": "index.js",
|
||||
"css": "style.css",
|
||||
"author": "Ouoertheo#7264",
|
||||
"version": "1.0.0",
|
||||
"homePage": "None"
|
||||
}
|
||||
963
data/st-core-scripts/scripts/extensions/tts/minimax.js
Normal file
963
data/st-core-scripts/scripts/extensions/tts/minimax.js
Normal file
@@ -0,0 +1,963 @@
|
||||
import { getPreviewString, initVoiceMap, saveTtsProviderSettings } from './index.js';
|
||||
import { event_types, eventSource, getRequestHeaders } from '../../../script.js';
|
||||
import { SECRET_KEYS, secret_state } from '../../secrets.js';
|
||||
import { getBase64Async } from '../../utils.js';
|
||||
|
||||
export { MiniMaxTtsProvider };
|
||||
|
||||
class MiniMaxTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
apiHost: 'https://api.minimax.io',
|
||||
model: 'speech-02-hd',
|
||||
voiceMap: {},
|
||||
speed: { default: 1.0, min: 0.5, max: 2.0, step: 0.1 },
|
||||
volume: { default: 1.0, min: 0.0, max: 10.0, step: 0.1 },
|
||||
pitch: { default: 0, min: -12, max: 12, step: 1 },
|
||||
audioSampleRate: 32000,
|
||||
bitrate: 128000,
|
||||
format: 'mp3',
|
||||
customModels: [],
|
||||
customVoices: [],
|
||||
customVoiceId: '',
|
||||
};
|
||||
|
||||
// MiniMax API doesn't provide a method to list user's cloned voices
|
||||
// so users need to manually input their custom cloned voice IDs
|
||||
static defaultVoices = [
|
||||
{ name: 'Unrestrained Young Man', voice_id: 'Chinese (Mandarin)_Unrestrained_Young_Man', lang: 'zh-CN', preview_url: null },
|
||||
];
|
||||
|
||||
// default models (by MiniMax doc)
|
||||
static defaultModels = [
|
||||
{ id: 'speech-02-hd', name: 'Speech-02-HD (High Quality)' },
|
||||
{ id: 'speech-02-turbo', name: 'Speech-02-Turbo (Fast)' },
|
||||
{ id: 'speech-01', name: 'Speech-01 (Legacy)' },
|
||||
{ id: 'speech-01-240228', name: 'Speech-01-240228 (Legacy)' },
|
||||
];
|
||||
|
||||
availableModels = [];
|
||||
availableVoices = [];
|
||||
|
||||
get settingsHtml() {
|
||||
return `
|
||||
<div class="minimax_tts_settings">
|
||||
<div class="tts_block justifyCenter">
|
||||
<div id="api_key_minimax" class="menu_button menu_button_icon manage-api-keys" data-key="api_key_minimax">
|
||||
<i class="fa-solid fa-key"></i>
|
||||
<span>Click to set API Key</span>
|
||||
</div>
|
||||
<div id="minimax_group_id" class="menu_button menu_button_icon manage-api-keys" data-key="minimax_group_id">
|
||||
<i class="fa-solid fa-key"></i>
|
||||
<span>Click to set Group ID</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tts_block">
|
||||
<label for="minimax_tts_api_host">API Host</label>
|
||||
<select id="minimax_tts_api_host" class="text_pole">
|
||||
<option value="https://api.minimax.io">Official (api.minimax.io)</option>
|
||||
<option value="https://api.minimaxi.chat">Global (api.minimaxi.chat)</option>
|
||||
<option value="https://api.minimax.chat">Mainland China (api.minimax.chat)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="tts_block">
|
||||
<label for="minimax_tts_model">Model</label>
|
||||
<select id="minimax_tts_model" class="text_pole">
|
||||
<option value="speech-02-hd">Speech-02-HD (High Quality)</option>
|
||||
<option value="speech-02-turbo">Speech-02-Turbo (Fast)</option>
|
||||
<option value="speech-01">Speech-01 (Legacy)</option>
|
||||
<option value="speech-01-240228">Speech-01-240228 (Legacy)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="tts_block">
|
||||
<input id="minimax_connect" class="menu_button" type="button" value="Connect" />
|
||||
<input id="minimax_refresh" class="menu_button" type="button" value="Refresh" />
|
||||
</div>
|
||||
|
||||
<div class="tts_block">
|
||||
<label for="minimax_tts_speed">Speed: <span id="minimax_tts_speed_output"></span></label>
|
||||
<input id="minimax_tts_speed" type="range" value="${this.defaultSettings.speed.default}" min="${this.defaultSettings.speed.min}" max="${this.defaultSettings.speed.max}" step="${this.defaultSettings.speed.step}" />
|
||||
</div>
|
||||
<div class="tts_block">
|
||||
<label for="minimax_tts_volume">Volume: <span id="minimax_tts_volume_output"></span></label>
|
||||
<input id="minimax_tts_volume" type="range" value="${this.defaultSettings.volume.default}" min="${this.defaultSettings.volume.min}" max="${this.defaultSettings.volume.max}" step="${this.defaultSettings.volume.step}" />
|
||||
</div>
|
||||
<div class="tts_block">
|
||||
<label for="minimax_tts_pitch">Pitch: <span id="minimax_tts_pitch_output"></span></label>
|
||||
<input id="minimax_tts_pitch" type="range" value="${this.defaultSettings.pitch.default}" min="${this.defaultSettings.pitch.min}" max="${this.defaultSettings.pitch.max}" step="${this.defaultSettings.pitch.step}" />
|
||||
</div>
|
||||
<div class="tts_block">
|
||||
<label for="minimax_tts_format">Audio Format</label>
|
||||
<select id="minimax_tts_format" class="text_pole">
|
||||
<option value="mp3">MP3</option>
|
||||
<option value="wav">WAV</option>
|
||||
<option value="flac">FLAC</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<div class="tts_block">
|
||||
<label for="minimax_tts_custom_voice_id">Custom Voice ID (for 'customVoice' option)</label>
|
||||
<input id="minimax_tts_custom_voice_id" type="text" class="text_pole" placeholder="Enter custom voice ID from MiniMax platform"/>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<div id="minimax_custom_voice_cloning" class="tts_block flexFlowColumn">
|
||||
<h4>Custom Voice Management</h4>
|
||||
<div class="tts_block wide100p">
|
||||
<input id="minimax_custom_voice_name" type="text" class="text_pole" placeholder="Voice Name"/>
|
||||
</div>
|
||||
<div class="tts_block wide100p">
|
||||
<input id="minimax_custom_voice_id" type="text" class="text_pole" placeholder="Voice ID (from MiniMax platform)"/>
|
||||
</div>
|
||||
<div class="tts_block wide100p">
|
||||
<select id="minimax_custom_voice_lang" class="text_pole">
|
||||
<option value="auto">Auto Detect</option>
|
||||
<option value="Chinese">Chinese (中文)</option>
|
||||
<option value="Chinese,Yue">Chinese, Yue (粤语)</option>
|
||||
<option value="English">English</option>
|
||||
<option value="Arabic">Arabic (العربية)</option>
|
||||
<option value="Russian">Russian (Русский)</option>
|
||||
<option value="Spanish">Spanish (Español)</option>
|
||||
<option value="French">French (Français)</option>
|
||||
<option value="Portuguese">Portuguese (Português)</option>
|
||||
<option value="German">German (Deutsch)</option>
|
||||
<option value="Turkish">Turkish (Türkçe)</option>
|
||||
<option value="Dutch">Dutch (Nederlands)</option>
|
||||
<option value="Ukrainian">Ukrainian (Українська)</option>
|
||||
<option value="Vietnamese">Vietnamese (Tiếng Việt)</option>
|
||||
<option value="Indonesian">Indonesian (Bahasa Indonesia)</option>
|
||||
<option value="Japanese">Japanese (日本語)</option>
|
||||
<option value="Italian">Italian (Italiano)</option>
|
||||
<option value="Korean">Korean (한국어)</option>
|
||||
<option value="Thai">Thai (ไทย)</option>
|
||||
<option value="Polish">Polish (Polski)</option>
|
||||
<option value="Romanian">Romanian (Română)</option>
|
||||
<option value="Greek">Greek (Ελληνικά)</option>
|
||||
<option value="Czech">Czech (Čeština)</option>
|
||||
<option value="Finnish">Finnish (Suomi)</option>
|
||||
<option value="Hindi">Hindi (हिन्दी)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="tts_block">
|
||||
<input id="minimax_add_custom_voice" class="menu_button" type="button" value="Add Custom Voice">
|
||||
</div>
|
||||
<div id="minimax_custom_voices_list" style="margin-top: 10px;"></div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<div id="minimax_custom_model_management" class="tts_block flexFlowColumn">
|
||||
<h4>Custom Model Management</h4>
|
||||
<div class="tts_block wide100p">
|
||||
<input id="minimax_custom_model_id" type="text" class="text_pole" placeholder="Model ID"/>
|
||||
</div>
|
||||
<div class="tts_block wide100p">
|
||||
<input id="minimax_custom_model_name" type="text" class="text_pole" placeholder="Model Name"/>
|
||||
</div>
|
||||
<div class="tts_block">
|
||||
<input id="minimax_add_custom_model" class="menu_button" type="button" value="Add Custom Model">
|
||||
</div>
|
||||
<div id="minimax_custom_models_list" style="margin-top: 10px;"></div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
constructor() {
|
||||
this.handler = async function (/** @type {string} */ key) {
|
||||
if (![SECRET_KEYS.MINIMAX, SECRET_KEYS.MINIMAX_GROUP_ID].includes(key)) return;
|
||||
$('#api_key_minimax').toggleClass('success', !!secret_state[SECRET_KEYS.MINIMAX]);
|
||||
$('#minimax_group_id').toggleClass('success', !!secret_state[SECRET_KEYS.MINIMAX_GROUP_ID]);
|
||||
await this.onRefreshClick();
|
||||
}.bind(this);
|
||||
}
|
||||
|
||||
dispose() {
|
||||
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
|
||||
eventSource.removeListener(event, this.handler);
|
||||
});
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
this.settings.apiHost = $('#minimax_tts_api_host').val();
|
||||
this.settings.speed = parseFloat($('#minimax_tts_speed').val().toString());
|
||||
this.settings.volume = parseFloat($('#minimax_tts_volume').val().toString());
|
||||
this.settings.pitch = parseInt($('#minimax_tts_pitch').val().toString());
|
||||
this.settings.model = $('#minimax_tts_model').find(':selected').val();
|
||||
this.settings.format = $('#minimax_tts_format').find(':selected').val();
|
||||
this.settings.customVoiceId = $('#minimax_tts_custom_voice_id').val();
|
||||
|
||||
$('#minimax_tts_speed_output').text(this.settings.speed.toFixed(1));
|
||||
$('#minimax_tts_volume_output').text(this.settings.volume.toFixed(1));
|
||||
$('#minimax_tts_pitch_output').text(this.settings.pitch);
|
||||
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
addCustomModel() {
|
||||
const modelId = $('#minimax_custom_model_id').val().toString().trim();
|
||||
const modelName = $('#minimax_custom_model_name').val().toString().trim();
|
||||
|
||||
if (!modelId || !modelName) {
|
||||
toastr.error('Please enter model ID and name');
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if already exists in custom models
|
||||
if (this.settings.customModels.find(m => m.id === modelId)) {
|
||||
toastr.error('Model ID already exists in custom models');
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if conflicts with default models
|
||||
if (MiniMaxTtsProvider.defaultModels.find(m => m.id === modelId)) {
|
||||
toastr.error('Model ID conflicts with default model. Please use a different model ID.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if conflicts with default model names
|
||||
if (MiniMaxTtsProvider.defaultModels.find(m => m.name === modelName)) {
|
||||
toastr.error('Model name conflicts with default model. Please use a different model name.');
|
||||
return;
|
||||
}
|
||||
|
||||
this.settings.customModels.push({ id: modelId, name: modelName });
|
||||
$('#minimax_custom_model_id').val('');
|
||||
$('#minimax_custom_model_name').val('');
|
||||
|
||||
this.updateCustomModelsDisplay();
|
||||
this.updateModelSelect(this.getAllModels());
|
||||
saveTtsProviderSettings();
|
||||
toastr.success('Model added successfully');
|
||||
}
|
||||
|
||||
removeCustomModel(modelId) {
|
||||
this.settings.customModels = this.settings.customModels.filter(m => m.id !== modelId);
|
||||
this.updateCustomModelsDisplay();
|
||||
this.updateModelSelect(this.getAllModels());
|
||||
saveTtsProviderSettings();
|
||||
|
||||
toastr.success('Model removed successfully');
|
||||
}
|
||||
|
||||
addCustomVoice() {
|
||||
const voiceName = $('#minimax_custom_voice_name').val().toString().trim();
|
||||
const voiceId = $('#minimax_custom_voice_id').val().toString().trim();
|
||||
const voiceLang = $('#minimax_custom_voice_lang').val().toString().trim();
|
||||
|
||||
if (!voiceName || !voiceId) {
|
||||
toastr.error('Please enter voice name and ID');
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if already exists in custom voices
|
||||
if (this.settings.customVoices.find(v => v.voice_id === voiceId)) {
|
||||
toastr.error('Voice ID already exists in custom voices');
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if conflicts with default voices
|
||||
if (MiniMaxTtsProvider.defaultVoices.find(v => v.voice_id === voiceId)) {
|
||||
toastr.error('Voice ID conflicts with default voice. Please use a different voice ID.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if conflicts with default voice names
|
||||
if (MiniMaxTtsProvider.defaultVoices.find(v => v.name === voiceName)) {
|
||||
toastr.error('Voice name conflicts with default voice. Please use a different voice name.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Convert display name to standard language code before saving
|
||||
const standardLangCode = this.convertDisplayNameToLanguageCode(voiceLang);
|
||||
|
||||
this.settings.customVoices.push({
|
||||
name: voiceName,
|
||||
voice_id: voiceId,
|
||||
lang: standardLangCode,
|
||||
preview_url: null,
|
||||
});
|
||||
|
||||
$('#minimax_custom_voice_name').val('');
|
||||
$('#minimax_custom_voice_id').val('');
|
||||
$('#minimax_custom_voice_lang').val('auto');
|
||||
|
||||
this.updateCustomVoicesDisplay();
|
||||
initVoiceMap(); // Update TTS extension voiceMap
|
||||
saveTtsProviderSettings();
|
||||
toastr.success('Voice added successfully');
|
||||
}
|
||||
|
||||
// Remove custom voice
|
||||
removeCustomVoice(voiceId) {
|
||||
this.settings.customVoices = this.settings.customVoices.filter(v => v.voice_id !== voiceId);
|
||||
this.updateCustomVoicesDisplay();
|
||||
initVoiceMap(); // Update TTS extension voiceMap
|
||||
saveTtsProviderSettings();
|
||||
toastr.success('Voice removed successfully');
|
||||
}
|
||||
|
||||
// Helper function to escape HTML
|
||||
escapeHtml(text) {
|
||||
const div = document.createElement('div');
|
||||
div.textContent = text;
|
||||
return div.innerHTML;
|
||||
}
|
||||
|
||||
// Update custom models display
|
||||
updateCustomModelsDisplay() {
|
||||
const container = $('#minimax_custom_models_list');
|
||||
container.empty();
|
||||
|
||||
if (this.settings.customModels.length === 0) {
|
||||
container.append('<div class="minimax-empty-list">No custom models added</div>');
|
||||
return;
|
||||
}
|
||||
|
||||
this.settings.customModels.forEach(model => {
|
||||
const modelDiv = $('<div></div>').addClass('minimax-custom-item');
|
||||
|
||||
const modelInfo = $('<div></div>').addClass('minimax-custom-item-info');
|
||||
const modelName = $('<div></div>').addClass('minimax-custom-item-name').text(model.name);
|
||||
const modelId = $('<div></div>').addClass('minimax-custom-item-details').text(`(${model.id})`);
|
||||
modelInfo.append(modelName).append(modelId);
|
||||
|
||||
const removeBtn = $('<button></button>')
|
||||
.addClass('menu_button minimax-custom-item-remove')
|
||||
.text('Remove')
|
||||
.on('click', () => {
|
||||
try {
|
||||
this.removeCustomModel(model.id);
|
||||
} catch (error) {
|
||||
console.error('MiniMax TTS: Error removing custom model:', error);
|
||||
toastr.error(`Failed to remove custom model: ${error.message}`);
|
||||
}
|
||||
});
|
||||
|
||||
modelDiv.append(modelInfo).append(removeBtn);
|
||||
container.append(modelDiv);
|
||||
});
|
||||
}
|
||||
|
||||
// Update custom voices display
|
||||
updateCustomVoicesDisplay() {
|
||||
const container = $('#minimax_custom_voices_list');
|
||||
container.empty();
|
||||
|
||||
if (this.settings.customVoices.length === 0) {
|
||||
container.append('<div class="minimax-empty-list">No custom voices added</div>');
|
||||
return;
|
||||
}
|
||||
|
||||
this.settings.customVoices.forEach(voice => {
|
||||
const voiceDiv = $('<div></div>').addClass('minimax-custom-item');
|
||||
|
||||
const voiceInfo = $('<div></div>').addClass('minimax-custom-item-info');
|
||||
const voiceName = $('<div></div>').addClass('minimax-custom-item-name').text(voice.name);
|
||||
const voiceDetails = $('<div></div>').addClass('minimax-custom-item-details').text(`(${voice.voice_id}) - ${voice.lang}`);
|
||||
voiceInfo.append(voiceName).append(voiceDetails);
|
||||
|
||||
const removeBtn = $('<button></button>')
|
||||
.addClass('menu_button minimax-custom-item-remove')
|
||||
.text('Remove')
|
||||
.on('click', () => {
|
||||
try {
|
||||
this.removeCustomVoice(voice.voice_id);
|
||||
} catch (error) {
|
||||
console.error('MiniMax TTS: Error removing custom voice:', error);
|
||||
toastr.error(`Failed to remove custom voice: ${error.message}`);
|
||||
}
|
||||
});
|
||||
|
||||
voiceDiv.append(voiceInfo).append(removeBtn);
|
||||
container.append(voiceDiv);
|
||||
});
|
||||
}
|
||||
|
||||
// Get all models (default + custom)
|
||||
getAllModels() {
|
||||
return [...MiniMaxTtsProvider.defaultModels, ...this.settings.customModels];
|
||||
}
|
||||
|
||||
// Get all voices (default + custom)
|
||||
getAllVoices() {
|
||||
return [...MiniMaxTtsProvider.defaultVoices, ...this.settings.customVoices];
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert display names to standard language codes
|
||||
* @param {string} displayName Language display name
|
||||
* @returns {string} Standard language code
|
||||
*/
|
||||
convertDisplayNameToLanguageCode(displayName) {
|
||||
const displayNameToCode = {
|
||||
'Chinese': 'zh-CN',
|
||||
'Chinese,Yue': 'zh-TW',
|
||||
'English': 'en-US',
|
||||
'Japanese': 'ja-JP',
|
||||
'Korean': 'ko-KR',
|
||||
'French': 'fr-FR',
|
||||
'German': 'de-DE',
|
||||
'Spanish': 'es-ES',
|
||||
'Portuguese': 'pt-BR',
|
||||
'Italian': 'it-IT',
|
||||
'Arabic': 'ar-SA',
|
||||
'Russian': 'ru-RU',
|
||||
'Turkish': 'tr-TR',
|
||||
'Dutch': 'nl-NL',
|
||||
'Ukrainian': 'uk-UA',
|
||||
'Vietnamese': 'vi-VN',
|
||||
'Indonesian': 'id-ID',
|
||||
'Thai': 'th-TH',
|
||||
'Polish': 'pl-PL',
|
||||
'Romanian': 'ro-RO',
|
||||
'Greek': 'el-GR',
|
||||
'Czech': 'cs-CZ',
|
||||
'Finnish': 'fi-FI',
|
||||
'Hindi': 'hi-IN',
|
||||
};
|
||||
|
||||
return displayNameToCode[displayName] || displayName;
|
||||
}
|
||||
|
||||
updateModelSelect(models) {
|
||||
const modelSelect = $('#minimax_tts_model');
|
||||
const currentValue = modelSelect.val();
|
||||
|
||||
// Clear existing options
|
||||
modelSelect.empty();
|
||||
|
||||
// Add all models
|
||||
models.forEach(model => {
|
||||
const option = $('<option></option>');
|
||||
option.val(model.id);
|
||||
option.text(model.name);
|
||||
modelSelect.append(option);
|
||||
});
|
||||
|
||||
// Restore previous selection if it still exists
|
||||
if (currentValue && models.find(m => m.id === currentValue)) {
|
||||
modelSelect.val(currentValue);
|
||||
}
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length === 0) {
|
||||
console.info('Using default MiniMax TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = { ...this.defaultSettings };
|
||||
|
||||
// Flatten the settings fields with default/min/max definitions so the actual values are used
|
||||
this.settings = Object.fromEntries(
|
||||
Object.entries(this.defaultSettings).map(([key, value]) => {
|
||||
if (value && typeof value === 'object' && 'default' in value) {
|
||||
return [key, value.default];
|
||||
}
|
||||
return [key, value];
|
||||
}),
|
||||
);
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
console.warn(`Invalid setting passed to MiniMax TTS Provider: ${key}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure custom configuration arrays exist
|
||||
if (!this.settings.customModels) this.settings.customModels = [];
|
||||
if (!this.settings.customVoices) this.settings.customVoices = [];
|
||||
|
||||
// # Migrate settings
|
||||
// Pitch value changed from float to int. If it's a float, let's try to extrapolate it to the new range
|
||||
if (!Number.isInteger(this.settings.pitch)) {
|
||||
const oldPitch = parseFloat(this.settings.pitch);
|
||||
if (!isNaN(oldPitch)) {
|
||||
// map old [0.5..1.0] to [-12..0], and [1.0..2.0] to [0..12] (old default was 1.0, new default is 0)
|
||||
const newPitch = (oldPitch < 1.0) ? (oldPitch - 1.0) * 24 : (oldPitch - 1.0) * 12;
|
||||
this.settings.pitch = Math.max(-12, Math.min(12, Math.round(newPitch)));
|
||||
console.info(`MiniMax TTS: Migrated pitch from ${oldPitch} to ${this.settings.pitch}`);
|
||||
} else {
|
||||
this.settings.pitch = 0;
|
||||
console.info(`MiniMax TTS: Migration reset pitch to default ${this.settings.pitch}`);
|
||||
}
|
||||
}
|
||||
|
||||
$('#minimax_tts_api_host').val(this.settings.apiHost || 'https://api.minimax.io');
|
||||
$('#minimax_tts_model').val(this.settings.model);
|
||||
$('#minimax_tts_speed').val(this.settings.speed);
|
||||
$('#minimax_tts_volume').val(this.settings.volume);
|
||||
$('#minimax_tts_pitch').val(this.settings.pitch);
|
||||
$('#minimax_tts_format').val(this.settings.format);
|
||||
$('#minimax_tts_custom_voice_id').val(this.settings.customVoiceId);
|
||||
|
||||
$('#minimax_connect').on('click', () => {
|
||||
try {
|
||||
this.onConnectClick();
|
||||
} catch (error) {
|
||||
console.error('MiniMax TTS: Error in connect click handler:', error);
|
||||
toastr.error(`Connection failed: ${error.message}`);
|
||||
}
|
||||
});
|
||||
$('#minimax_refresh').on('click', () => {
|
||||
try {
|
||||
this.onRefreshClick();
|
||||
} catch (error) {
|
||||
console.error('MiniMax TTS: Error in refresh click handler:', error);
|
||||
toastr.error(`Refresh failed: ${error.message}`);
|
||||
}
|
||||
});
|
||||
$('#minimax_tts_api_host').on('change', this.onSettingsChange.bind(this));
|
||||
$('#minimax_tts_speed').on('input', this.onSettingsChange.bind(this));
|
||||
$('#minimax_tts_volume').on('input', this.onSettingsChange.bind(this));
|
||||
$('#minimax_tts_pitch').on('input', this.onSettingsChange.bind(this));
|
||||
$('#minimax_tts_model').on('change', this.onSettingsChange.bind(this));
|
||||
$('#minimax_tts_format').on('change', this.onSettingsChange.bind(this));
|
||||
$('#minimax_tts_custom_voice_id').on('input', this.onSettingsChange.bind(this));
|
||||
|
||||
// Custom model and voice event listeners
|
||||
$('#minimax_add_custom_model').on('click', () => {
|
||||
try {
|
||||
this.addCustomModel();
|
||||
} catch (error) {
|
||||
console.error('MiniMax TTS: Error adding custom model:', error);
|
||||
toastr.error(`Failed to add custom model: ${error.message}`);
|
||||
}
|
||||
});
|
||||
$('#minimax_add_custom_voice').on('click', () => {
|
||||
try {
|
||||
this.addCustomVoice();
|
||||
} catch (error) {
|
||||
console.error('MiniMax TTS: Error adding custom voice:', error);
|
||||
toastr.error(`Failed to add custom voice: ${error.message}`);
|
||||
}
|
||||
});
|
||||
|
||||
// Keyboard event listeners
|
||||
const ENTER_KEY = 13;
|
||||
$('#minimax_custom_model_id, #minimax_custom_model_name').on('keypress', (e) => {
|
||||
if (e.which === ENTER_KEY) {
|
||||
try {
|
||||
this.addCustomModel();
|
||||
} catch (error) {
|
||||
console.error('MiniMax TTS: Error adding custom model via keyboard:', error);
|
||||
toastr.error(`Failed to add custom model: ${error.message}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
$('#minimax_custom_voice_name, #minimax_custom_voice_id').on('keypress', (e) => {
|
||||
if (e.which === ENTER_KEY) {
|
||||
try {
|
||||
this.addCustomVoice();
|
||||
} catch (error) {
|
||||
console.error('MiniMax TTS: Error adding custom voice via keyboard:', error);
|
||||
toastr.error(`Failed to add custom voice: ${error.message}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
$('#minimax_tts_speed_output').text(this.settings.speed.toFixed(1));
|
||||
$('#minimax_tts_volume_output').text(this.settings.volume.toFixed(1));
|
||||
$('#minimax_tts_pitch_output').text(this.settings.pitch);
|
||||
|
||||
// Initialize custom configuration display
|
||||
this.updateCustomModelsDisplay();
|
||||
this.updateCustomVoicesDisplay();
|
||||
|
||||
// Update model selector to include custom models
|
||||
this.updateModelSelect(this.getAllModels());
|
||||
|
||||
// Initialize voice map for character voice assignment
|
||||
try {
|
||||
await initVoiceMap();
|
||||
} catch (error) {
|
||||
console.debug('MiniMax: Voice map initialization failed, but continuing');
|
||||
}
|
||||
|
||||
$('#api_key_minimax').toggleClass('success', !!secret_state[SECRET_KEYS.MINIMAX]);
|
||||
$('#minimax_group_id').toggleClass('success', !!secret_state[SECRET_KEYS.MINIMAX_GROUP_ID]);
|
||||
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
|
||||
eventSource.on(event, this.handler);
|
||||
});
|
||||
|
||||
// Only check ready status when API credentials are available
|
||||
if (secret_state[SECRET_KEYS.MINIMAX] && secret_state[SECRET_KEYS.MINIMAX_GROUP_ID]) {
|
||||
try {
|
||||
await this.checkReady();
|
||||
console.debug('MiniMax TTS: Settings loaded and ready');
|
||||
} catch (error) {
|
||||
console.debug('MiniMax TTS: Settings loaded, but not ready:', error);
|
||||
}
|
||||
} else {
|
||||
console.debug('MiniMax TTS: Settings loaded, waiting for API credentials');
|
||||
}
|
||||
}
|
||||
|
||||
// Perform a simple readiness check
|
||||
async checkReady() {
|
||||
if (!secret_state[SECRET_KEYS.MINIMAX] || !secret_state[SECRET_KEYS.MINIMAX_GROUP_ID]) {
|
||||
const error = new Error('API Key and Group ID are required');
|
||||
console.error('MiniMax TTS checkReady error:', error.message);
|
||||
throw error;
|
||||
}
|
||||
// Try to fetch available models and voices, but don't block connection on failure
|
||||
try {
|
||||
await this.updateModelsAndVoices();
|
||||
} catch (error) {
|
||||
console.warn('MiniMax TTS: Failed to fetch models/voices during ready check, will use all available:', error);
|
||||
// Even if API call fails, set all available values to ensure basic functionality
|
||||
this.availableModels = this.getAllModels();
|
||||
this.availableVoices = this.getAllVoices();
|
||||
}
|
||||
|
||||
// Ensure at least voices are available
|
||||
if (!this.availableVoices || this.availableVoices.length === 0) {
|
||||
this.availableVoices = this.getAllVoices();
|
||||
}
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
try {
|
||||
await this.updateModelsAndVoices();
|
||||
await initVoiceMap(); // Update voice map after refresh
|
||||
toastr.success('MiniMax TTS: Models and voices refreshed successfully');
|
||||
} catch (error) {
|
||||
toastr.error(`MiniMax TTS: Failed to refresh - ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
async onConnectClick() {
|
||||
try {
|
||||
await this.checkReady();
|
||||
await initVoiceMap(); // Update voice map after connection
|
||||
toastr.success('MiniMax TTS: Connected successfully');
|
||||
saveTtsProviderSettings();
|
||||
} catch (error) {
|
||||
toastr.error(`MiniMax TTS: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (!voiceName) {
|
||||
const error = new Error('TTS Voice name not provided');
|
||||
console.error('MiniMax TTS getVoice error:', error.message);
|
||||
throw error;
|
||||
}
|
||||
|
||||
// If no available voices, try to fetch them
|
||||
if (!this.availableVoices || this.availableVoices.length === 0) {
|
||||
this.availableVoices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
// Ensure at least voices are available
|
||||
if (!this.availableVoices || this.availableVoices.length === 0) {
|
||||
this.availableVoices = this.getAllVoices();
|
||||
}
|
||||
|
||||
const voice = this.availableVoices.find(voice =>
|
||||
voice.voice_id === voiceName || voice.name === voiceName,
|
||||
);
|
||||
|
||||
if (!voice) {
|
||||
const error = new Error(`TTS Voice not found: ${voiceName}`);
|
||||
console.error('MiniMax TTS getVoice error:', error.message);
|
||||
throw error;
|
||||
}
|
||||
|
||||
return voice;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
// If voiceId is 'customVoice', use the custom voice ID from settings
|
||||
if (voiceId === 'customVoice') {
|
||||
const customVoiceId = this.settings.customVoiceId;
|
||||
if (!customVoiceId || customVoiceId.trim() === '') {
|
||||
const error = new Error('Please enter custom voice ID in settings first');
|
||||
console.error('MiniMax TTS generateTts error:', error.message);
|
||||
throw error;
|
||||
}
|
||||
voiceId = customVoiceId.trim();
|
||||
}
|
||||
|
||||
// Get the voice object to determine language
|
||||
let language = null;
|
||||
try {
|
||||
const voice = await this.getVoice(voiceId);
|
||||
if (voice && voice.lang) {
|
||||
language = this.mapLanguageToMiniMaxFormat(voice.lang);
|
||||
console.debug(`MiniMax TTS: Using voice language ${voice.lang}, API language: ${language}`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.debug('MiniMax TTS: Could not determine voice language, using default');
|
||||
}
|
||||
|
||||
return await this.fetchTtsGeneration(text, voiceId, language);
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
try {
|
||||
if (!secret_state[SECRET_KEYS.MINIMAX] || !secret_state[SECRET_KEYS.MINIMAX_GROUP_ID]) {
|
||||
console.warn('MiniMax TTS: API Key and Group ID required for fetching voices');
|
||||
console.warn('Using all available voices (default + custom). Please check your API credentials');
|
||||
return this.getAllVoices();
|
||||
}
|
||||
|
||||
// MiniMax API doesn't provide a voices listing endpoint
|
||||
// Using all available voices (default + custom)
|
||||
console.info('MiniMax TTS: Using all available voices (default + custom)');
|
||||
return this.getAllVoices();
|
||||
} catch (error) {
|
||||
console.error('Error fetching MiniMax voices:', error);
|
||||
console.warn('Using all available voices (default + custom). Please check your API credentials');
|
||||
return this.getAllVoices();
|
||||
}
|
||||
}
|
||||
|
||||
async fetchTtsModels() {
|
||||
// MiniMax API doesn't provide a models listing endpoint
|
||||
// Using all available models (default + custom)
|
||||
console.info('MiniMax TTS: Using all available models (default + custom)');
|
||||
this.availableModels = this.getAllModels();
|
||||
return this.getAllModels();
|
||||
}
|
||||
|
||||
async updateModelsAndVoices() {
|
||||
try {
|
||||
// Get models list
|
||||
this.availableModels = await this.fetchTtsModels();
|
||||
console.info(`MiniMax TTS: Loaded ${this.availableModels.length} models`);
|
||||
|
||||
// Get voices list (now fetched from API)
|
||||
this.availableVoices = await this.fetchTtsVoiceObjects();
|
||||
console.info(`MiniMax TTS: Loaded ${this.availableVoices.length} voices`);
|
||||
|
||||
// Update model dropdown
|
||||
this.updateModelSelect(this.availableModels);
|
||||
|
||||
return {
|
||||
models: this.availableModels,
|
||||
voices: this.availableVoices,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('MiniMax TTS: Failed to update models and voices:', error);
|
||||
// Set all available values to ensure basic functionality
|
||||
this.availableModels = this.getAllModels();
|
||||
this.availableVoices = this.getAllVoices();
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// Get correct MIME type
|
||||
getAudioMimeType(format) {
|
||||
const mimeTypes = {
|
||||
'mp3': 'audio/mpeg',
|
||||
'wav': 'audio/wav',
|
||||
'pcm': 'audio/pcm',
|
||||
'flac': 'audio/flac',
|
||||
'aac': 'audio/aac',
|
||||
};
|
||||
return mimeTypes[format] || 'audio/mpeg';
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId, language = null) {
|
||||
console.info(`Generating new MiniMax TTS for voice_id ${voiceId}`);
|
||||
|
||||
if (!secret_state[SECRET_KEYS.MINIMAX] || !secret_state[SECRET_KEYS.MINIMAX_GROUP_ID]) {
|
||||
const error = new Error('API Key and Group ID are required');
|
||||
console.error('MiniMax TTS fetchTtsGeneration error:', error.message);
|
||||
throw error;
|
||||
}
|
||||
|
||||
/** @param {number} number @param {number} lower @param {number} upper @returns {number} */
|
||||
const clamp = (number, lower, upper) => Math.min(Math.max(number, lower), upper);
|
||||
|
||||
const requestBody = {
|
||||
text: inputText,
|
||||
voiceId: voiceId,
|
||||
apiHost: this.settings.apiHost,
|
||||
model: this.settings.model || this.defaultSettings.model,
|
||||
speed: clamp(Number(this.settings.speed) || this.defaultSettings.speed.default, this.defaultSettings.speed.min, this.defaultSettings.speed.max),
|
||||
volume: clamp(Number(this.settings.volume) || this.defaultSettings.volume.default, this.defaultSettings.volume.min, this.defaultSettings.volume.max),
|
||||
pitch: clamp(Math.round(Number(this.settings.pitch)) || this.defaultSettings.pitch.default, this.defaultSettings.pitch.min, this.defaultSettings.pitch.max),
|
||||
audioSampleRate: Number(this.settings.audioSampleRate) || this.defaultSettings.audioSampleRate,
|
||||
bitrate: Number(this.settings.bitrate) || this.defaultSettings.bitrate,
|
||||
format: this.settings.format || this.defaultSettings.format,
|
||||
language: language,
|
||||
};
|
||||
|
||||
console.debug('MiniMax TTS Request:', {
|
||||
body: { ...requestBody, voiceId: '[REDACTED]' },
|
||||
});
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/minimax/generate-voice', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
let errorMessage = `HTTP ${response.status}`;
|
||||
|
||||
try {
|
||||
// Try to parse JSON error response from backend
|
||||
const errorData = await response.json();
|
||||
console.error('MiniMax TTS backend error:', errorData);
|
||||
errorMessage = errorData.error || errorMessage;
|
||||
} catch (jsonError) {
|
||||
// If not JSON, try to read text
|
||||
try {
|
||||
const errorText = await response.text();
|
||||
console.error('MiniMax TTS backend error (Text):', errorText);
|
||||
errorMessage = errorText || errorMessage;
|
||||
} catch (textError) {
|
||||
console.error('MiniMax TTS: Failed to read error response:', textError);
|
||||
}
|
||||
}
|
||||
|
||||
toastr.error(`${errorMessage}`, 'MiniMax TTS Generation Failed');
|
||||
const error = new Error(errorMessage);
|
||||
console.error('MiniMax TTS fetchTtsGeneration error:', error.message);
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Backend handles all the complex processing and returns audio data directly
|
||||
console.debug('MiniMax TTS: Audio response received from backend');
|
||||
return response;
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error in MiniMax TTS generation:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map language codes to MiniMax API supported language format
|
||||
* @param {string} lang Language code or display name
|
||||
* @returns {string} MiniMax API language format
|
||||
*/
|
||||
mapLanguageToMiniMaxFormat(lang) {
|
||||
// Convert display name to language code if needed
|
||||
const languageCode = this.convertDisplayNameToLanguageCode(lang);
|
||||
|
||||
// Then map language codes to MiniMax API format
|
||||
const languageMap = {
|
||||
'zh-CN': 'zh_CN',
|
||||
'zh-TW': 'zh_TW',
|
||||
'en-US': 'en_US',
|
||||
'en-GB': 'en_GB',
|
||||
'en-AU': 'en_AU',
|
||||
'en-IN': 'en_IN',
|
||||
'ja-JP': 'ja_JP',
|
||||
'ko-KR': 'ko_KR',
|
||||
'fr-FR': 'fr_FR',
|
||||
'de-DE': 'de_DE',
|
||||
'es-ES': 'es_ES',
|
||||
'pt-BR': 'pt_BR',
|
||||
'it-IT': 'it_IT',
|
||||
'ar-SA': 'ar_SA',
|
||||
'ru-RU': 'ru_RU',
|
||||
'tr-TR': 'tr_TR',
|
||||
'nl-NL': 'nl_NL',
|
||||
'uk-UA': 'uk_UA',
|
||||
'vi-VN': 'vi_VN',
|
||||
'id-ID': 'id_ID',
|
||||
'th-TH': 'th_TH',
|
||||
'pl-PL': 'pl_PL',
|
||||
'ro-RO': 'ro_RO',
|
||||
'el-GR': 'el_GR',
|
||||
'cs-CZ': 'cs_CZ',
|
||||
'fi-FI': 'fi_FI',
|
||||
'hi-IN': 'hi_IN',
|
||||
};
|
||||
|
||||
// Return mapped language or default to auto
|
||||
return languageMap[languageCode] || 'auto';
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} voiceId Voice ID
|
||||
*/
|
||||
async previewTtsVoice(voiceId) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
|
||||
try {
|
||||
const voice = await this.getVoice(voiceId);
|
||||
// Get preview text based on voice language, defaulting to en-US
|
||||
const previewLang = voice.lang || 'en-US';
|
||||
const text = getPreviewString(previewLang);
|
||||
|
||||
// Map the language to MiniMax API format for the request
|
||||
const apiLang = this.mapLanguageToMiniMaxFormat(previewLang);
|
||||
console.debug(`MiniMax TTS: Using preview language ${previewLang}, API language: ${apiLang}`);
|
||||
|
||||
const response = await this.fetchTtsGeneration(text, voiceId, apiLang);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
const error = new Error(`HTTP ${response.status}: ${errorText}`);
|
||||
console.error('MiniMax TTS previewTtsVoice error:', error.message);
|
||||
throw error;
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
console.debug(`MiniMax TTS: Audio blob size: ${audio.size}, type: ${audio.type}`);
|
||||
|
||||
// Use the same method as other TTS providers - convert to base64 data URL
|
||||
const srcUrl = await getBase64Async(audio);
|
||||
console.debug('MiniMax TTS: Base64 data URL created');
|
||||
|
||||
// Clean up previous event listener to prevent memory leaks
|
||||
this.audioElement.onended = null;
|
||||
this.audioElement.onerror = null;
|
||||
|
||||
this.audioElement.src = srcUrl;
|
||||
this.audioElement.volume = Math.min(this.settings.volume || 1.0, 1.0); // HTML audio element max is 1.0
|
||||
|
||||
// Add error handler for audio element
|
||||
this.audioElement.onerror = (e) => {
|
||||
console.error('MiniMax TTS: Audio element error:', e);
|
||||
console.error('MiniMax TTS: Audio element error details:', {
|
||||
error: this.audioElement.error,
|
||||
networkState: this.audioElement.networkState,
|
||||
readyState: this.audioElement.readyState,
|
||||
src: this.audioElement.src,
|
||||
});
|
||||
|
||||
toastr.error('Audio playback failed. The audio format may not be supported by your browser.');
|
||||
};
|
||||
|
||||
try {
|
||||
await this.audioElement.play();
|
||||
console.debug('MiniMax TTS: Audio playback started successfully');
|
||||
} catch (playError) {
|
||||
console.error('MiniMax TTS: Play error:', playError);
|
||||
throw new Error(`Audio playback failed: ${playError.message}`);
|
||||
}
|
||||
|
||||
this.audioElement.onended = () => {
|
||||
this.audioElement.onended = null;
|
||||
this.audioElement.onerror = null;
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
console.error('MiniMax TTS Preview Error:', error);
|
||||
toastr.error(`Could not generate preview: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
215
data/st-core-scripts/scripts/extensions/tts/novel.js
Normal file
215
data/st-core-scripts/scripts/extensions/tts/novel.js
Normal file
@@ -0,0 +1,215 @@
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
import { POPUP_TYPE, callGenericPopup } from '../../popup.js';
|
||||
import { splitRecursive } from '../../utils.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
import { initVoiceMap } from './index.js';
|
||||
|
||||
export { NovelTtsProvider };
|
||||
|
||||
class NovelTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
customVoices: [],
|
||||
};
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
// Novel reads tilde as a word. Replace with full stop
|
||||
text = text.replace(/~/g, '.');
|
||||
// Novel reads asterisk as a word. Remove it
|
||||
text = text.replace(/\*/g, '');
|
||||
return text;
|
||||
}
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div class="novel_tts_hints">
|
||||
<div>Use NovelAI's TTS engine.</div>
|
||||
<div>
|
||||
The default Voice IDs are only examples. Add custom voices and Novel will create a new random voice for it.
|
||||
Feel free to try different options!
|
||||
</div>
|
||||
<i>Hint: Save an API key in the NovelAI API settings to use it here.</i>
|
||||
</div>
|
||||
<label for="tts-novel-custom-voices-add">Custom Voices</label>
|
||||
<div class="tts_custom_voices">
|
||||
<select id="tts-novel-custom-voices-select"><select>
|
||||
<i id="tts-novel-custom-voices-add" class="tts-button fa-solid fa-plus fa-xl success" title="Add"></i>
|
||||
<i id="tts-novel-custom-voices-delete" class="tts-button fa-solid fa-xmark fa-xl failure" title="Delete"></i>
|
||||
</div>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
|
||||
// Add a new Novel custom voice to provider
|
||||
async addCustomVoice() {
|
||||
const voiceName = await callGenericPopup('Custom Voice name:', POPUP_TYPE.INPUT);
|
||||
this.settings.customVoices.push(voiceName);
|
||||
this.populateCustomVoices();
|
||||
initVoiceMap(); // Update TTS extension voiceMap
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
// Delete selected custom voice from provider
|
||||
deleteCustomVoice() {
|
||||
const selected = $('#tts-novel-custom-voices-select').find(':selected').val();
|
||||
const voiceIndex = this.settings.customVoices.indexOf(selected);
|
||||
|
||||
if (voiceIndex !== -1) {
|
||||
this.settings.customVoices.splice(voiceIndex, 1);
|
||||
}
|
||||
this.populateCustomVoices();
|
||||
initVoiceMap(); // Update TTS extension voiceMap
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
// Create the UI dropdown list of voices in provider
|
||||
populateCustomVoices() {
|
||||
let voiceSelect = $('#tts-novel-custom-voices-select');
|
||||
voiceSelect.empty();
|
||||
this.settings.customVoices.forEach(voice => {
|
||||
voiceSelect.append(`<option>${voice}</option>`);
|
||||
});
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
$('#tts-novel-custom-voices-add').on('click', () => (this.addCustomVoice()));
|
||||
$('#tts-novel-custom-voices-delete').on('click', () => (this.deleteCustomVoice()));
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
this.populateCustomVoices();
|
||||
await this.checkReady();
|
||||
console.debug('NovelTTS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
// Doesnt really do much for Novel, not seeing a good way to test this at the moment.
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (!voiceName) {
|
||||
throw 'TTS Voice name not provided';
|
||||
}
|
||||
|
||||
return { name: voiceName, voice_id: voiceName, lang: 'en-US', preview_url: false };
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate TTS audio for the given text using the specified voice.
|
||||
* @param {string} text Text to generate
|
||||
* @param {string} voiceId Voice ID
|
||||
* @returns {AsyncGenerator<Response>} Audio response generator
|
||||
*/
|
||||
generateTts(text, voiceId) {
|
||||
return this.fetchTtsGeneration(text, voiceId);
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
let voices = [
|
||||
{ name: 'Ligeia', voice_id: 'Ligeia', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Aini', voice_id: 'Aini', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Orea', voice_id: 'Orea', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Claea', voice_id: 'Claea', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Lim', voice_id: 'Lim', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Aurae', voice_id: 'Aurae', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Naia', voice_id: 'Naia', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Aulon', voice_id: 'Aulon', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Elei', voice_id: 'Elei', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Ogma', voice_id: 'Ogma', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Raid', voice_id: 'Raid', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Pega', voice_id: 'Pega', lang: 'en-US', preview_url: false },
|
||||
{ name: 'Lam', voice_id: 'Lam', lang: 'en-US', preview_url: false },
|
||||
];
|
||||
|
||||
// Add in custom voices to the map
|
||||
let addVoices = this.settings.customVoices.map(voice =>
|
||||
({ name: voice, voice_id: voice, lang: 'en-US', preview_url: false }),
|
||||
);
|
||||
voices = voices.concat(addVoices);
|
||||
|
||||
return voices;
|
||||
}
|
||||
|
||||
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
|
||||
const text = getPreviewString('en-US');
|
||||
for await (const response of this.generateTts(text, id)) {
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
await new Promise(resolve => {
|
||||
const audioElement = new Audio();
|
||||
audioElement.src = url;
|
||||
audioElement.play();
|
||||
audioElement.onended = () => resolve();
|
||||
});
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
}
|
||||
|
||||
async* fetchTtsGeneration(inputText, voiceId) {
|
||||
const MAX_LENGTH = 1000;
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
const chunks = splitRecursive(inputText, MAX_LENGTH);
|
||||
for (const chunk of chunks) {
|
||||
const response = await fetch('/api/novelai/generate-voice',
|
||||
{
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
'text': chunk,
|
||||
'voice': voiceId,
|
||||
}),
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
yield response;
|
||||
}
|
||||
}
|
||||
}
|
||||
181
data/st-core-scripts/scripts/extensions/tts/openai-compatible.js
Normal file
181
data/st-core-scripts/scripts/extensions/tts/openai-compatible.js
Normal file
@@ -0,0 +1,181 @@
|
||||
import { event_types, eventSource, getRequestHeaders } from '../../../script.js';
|
||||
import { SECRET_KEYS, secret_state } from '../../secrets.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { OpenAICompatibleTtsProvider };
|
||||
|
||||
class OpenAICompatibleTtsProvider {
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
model: 'tts-1',
|
||||
speed: 1,
|
||||
available_voices: ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'],
|
||||
provider_endpoint: 'http://127.0.0.1:8000/v1/audio/speech',
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="openai_compatible_tts_endpoint">Provider Endpoint:</label>
|
||||
<div class="flex-container alignItemsCenter">
|
||||
<div class="flex1">
|
||||
<input id="openai_compatible_tts_endpoint" type="text" class="text_pole" maxlength="500" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
</div>
|
||||
<div id="openai_compatible_tts_key" class="menu_button menu_button_icon manage-api-keys" data-key="api_key_custom_openai_tts">
|
||||
<i class="fa-solid fa-key"></i>
|
||||
<span>API Key</span>
|
||||
</div>
|
||||
</div>
|
||||
<label for="openai_compatible_model">Model:</label>
|
||||
<input id="openai_compatible_model" type="text" class="text_pole" maxlength="500" value="${this.defaultSettings.model}"/>
|
||||
<label for="openai_compatible_tts_voices">Available Voices (comma separated):</label>
|
||||
<input id="openai_compatible_tts_voices" type="text" class="text_pole" value="${this.defaultSettings.available_voices.join()}"/>
|
||||
<label for="openai_compatible_tts_speed">Speed: <span id="openai_compatible_tts_speed_output"></span></label>
|
||||
<input type="range" id="openai_compatible_tts_speed" value="1" min="0.25" max="4" step="0.05">`;
|
||||
return html;
|
||||
}
|
||||
|
||||
constructor() {
|
||||
this.handler = async function (/** @type {string} */ key) {
|
||||
if (key !== SECRET_KEYS.CUSTOM_OPENAI_TTS) return;
|
||||
$('#openai_compatible_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.CUSTOM_OPENAI_TTS]);
|
||||
await this.onRefreshClick();
|
||||
}.bind(this);
|
||||
}
|
||||
|
||||
dispose() {
|
||||
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
|
||||
eventSource.removeListener(event, this.handler);
|
||||
});
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#openai_compatible_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#openai_compatible_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#openai_compatible_model').val(this.defaultSettings.model);
|
||||
$('#openai_compatible_model').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#openai_compatible_tts_voices').val(this.settings.available_voices.join());
|
||||
$('#openai_compatible_tts_voices').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#openai_compatible_tts_speed').val(this.settings.speed);
|
||||
$('#openai_compatible_tts_speed').on('input', () => {
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
$('#openai_compatible_tts_speed_output').text(this.settings.speed);
|
||||
|
||||
$('#openai_compatible_tts_key').toggleClass('success', !!secret_state[SECRET_KEYS.CUSTOM_OPENAI_TTS]);
|
||||
[event_types.SECRET_WRITTEN, event_types.SECRET_DELETED, event_types.SECRET_ROTATED].forEach(event => {
|
||||
eventSource.on(event, this.handler);
|
||||
});
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.debug('OpenAI Compatible TTS: Settings loaded');
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update dynamically
|
||||
this.settings.provider_endpoint = String($('#openai_compatible_tts_endpoint').val());
|
||||
this.settings.model = String($('#openai_compatible_model').val());
|
||||
this.settings.available_voices = String($('#openai_compatible_tts_voices').val()).split(',');
|
||||
this.settings.speed = Number($('#openai_compatible_tts_speed').val());
|
||||
$('#openai_compatible_tts_speed_output').text(this.settings.speed);
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
oaicVoice => oaicVoice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
return this.settings.available_voices.map(v => {
|
||||
return { name: v, voice_id: v, lang: 'en-US' };
|
||||
});
|
||||
}
|
||||
|
||||
async previewTtsVoice(voiceId) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
|
||||
const text = getPreviewString('en-US');
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
const response = await fetch('/api/openai/custom/generate-voice', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
provider_endpoint: this.settings.provider_endpoint,
|
||||
model: this.settings.model,
|
||||
input: inputText,
|
||||
voice: voiceId,
|
||||
response_format: 'mp3',
|
||||
speed: this.settings.speed,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
||||
253
data/st-core-scripts/scripts/extensions/tts/openai.js
Normal file
253
data/st-core-scripts/scripts/extensions/tts/openai.js
Normal file
@@ -0,0 +1,253 @@
|
||||
import { getRequestHeaders, substituteParams } from '../../../script.js';
|
||||
import { saveTtsProviderSettings, sanitizeId } from './index.js';
|
||||
|
||||
export { OpenAITtsProvider };
|
||||
|
||||
class OpenAITtsProvider {
|
||||
static voices = [
|
||||
{ name: 'Alloy', voice_id: 'alloy', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/alloy.wav' },
|
||||
{ name: 'Ash', voice_id: 'ash', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/ash.wav' },
|
||||
{ name: 'Coral', voice_id: 'coral', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/coral.wav' },
|
||||
{ name: 'Echo', voice_id: 'echo', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/echo.wav' },
|
||||
{ name: 'Fable', voice_id: 'fable', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/fable.wav' },
|
||||
{ name: 'Onyx', voice_id: 'onyx', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/onyx.wav' },
|
||||
{ name: 'Nova', voice_id: 'nova', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/nova.wav' },
|
||||
{ name: 'Sage', voice_id: 'sage', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/sage.wav' },
|
||||
{ name: 'Shimmer', voice_id: 'shimmer', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/shimmer.wav' },
|
||||
];
|
||||
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
customVoices: [],
|
||||
model: 'tts-1',
|
||||
speed: 1,
|
||||
characterInstructions: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div>Use OpenAI's TTS engine.</div>
|
||||
<small>Hint: Save an API key in the OpenAI API settings to use it here.</small>
|
||||
<div>
|
||||
<label for="openai-tts-model">Model:</label>
|
||||
<select id="openai-tts-model">
|
||||
<optgroup label="Latest">
|
||||
<option value="tts-1">tts-1</option>
|
||||
<option value="tts-1-hd">tts-1-hd</option>
|
||||
<option value="gpt-4o-mini-tts">gpt-4o-mini-tts</option>
|
||||
</optgroup>
|
||||
<optgroup label="Snapshots">
|
||||
<option value="tts-1-1106">tts-1-1106</option>
|
||||
<option value="tts-1-hd-1106">tts-1-hd-1106</option>
|
||||
</optgroup>
|
||||
<select>
|
||||
</div>
|
||||
<div>
|
||||
<label for="openai-tts-speed">Speed: <span id="openai-tts-speed-output"></span></label>
|
||||
<input type="range" id="openai-tts-speed" value="1" min="0.25" max="4" step="0.05">
|
||||
</div>`;
|
||||
return html;
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#openai-tts-model').val(this.settings.model);
|
||||
$('#openai-tts-model').on('change', () => {
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
$('#openai-tts-speed').val(this.settings.speed);
|
||||
$('#openai-tts-speed').on('input', () => {
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
$('#openai-tts-speed-output').text(this.settings.speed);
|
||||
|
||||
await this.checkReady();
|
||||
// Initialize UI state based on current model (gpt-4o-mini-tts or other)
|
||||
this.updateInstructionsUI();
|
||||
// Look for voice map changes
|
||||
this.setupVoiceMapObserver();
|
||||
|
||||
console.debug('OpenAI TTS: Settings loaded');
|
||||
}
|
||||
|
||||
setupVoiceMapObserver() {
|
||||
if (this.voiceMapObserver) {
|
||||
this.voiceMapObserver.disconnect();
|
||||
this.voiceMapObserver = null;
|
||||
}
|
||||
|
||||
const targetNode = document.getElementById('tts_voicemap_block');
|
||||
if (!targetNode) return;
|
||||
|
||||
const observer = new MutationObserver(() => {
|
||||
if (this.settings.model === 'gpt-4o-mini-tts') {
|
||||
this.populateCharacterInstructions();
|
||||
}
|
||||
});
|
||||
|
||||
observer.observe(targetNode, { childList: true, subtree: true });
|
||||
this.voiceMapObserver = observer;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update dynamically
|
||||
this.settings.model = String($('#openai-tts-model').find(':selected').val());
|
||||
this.settings.speed = Number($('#openai-tts-speed').val());
|
||||
$('#openai-tts-speed-output').text(this.settings.speed);
|
||||
this.updateInstructionsUI();
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
updateInstructionsUI() {
|
||||
if (this.settings.model === 'gpt-4o-mini-tts') {
|
||||
this.createInstructionsContainer();
|
||||
$('#openai-instructions-container').show();
|
||||
this.populateCharacterInstructions();
|
||||
} else {
|
||||
$('#openai-instructions-container').hide();
|
||||
this.voiceMapObserver?.disconnect();
|
||||
this.voiceMapObserver = null;
|
||||
}
|
||||
}
|
||||
|
||||
createInstructionsContainer() {
|
||||
if ($('#openai-instructions-container').length === 0) {
|
||||
const containerHtml = `
|
||||
<div id="openai-instructions-container" style="display: none;">
|
||||
<span>Voice Instructions (GPT-4o Mini TTS)</span><br>
|
||||
<small>Customize how each character speaks</small>
|
||||
<div id="openai-character-instructions"></div>
|
||||
</div>
|
||||
`;
|
||||
$('#openai-tts-speed').parent().after(containerHtml);
|
||||
}
|
||||
}
|
||||
|
||||
populateCharacterInstructions() {
|
||||
|
||||
const currentCharacters = $('.tts_voicemap_block_char span').map((i, el) => $(el).text()).get();
|
||||
|
||||
$('#openai-character-instructions').empty();
|
||||
|
||||
for (const char of currentCharacters) {
|
||||
if (char === 'SillyTavern System' || char === '[Default Voice]') continue;
|
||||
|
||||
const sanitizedName = sanitizeId(char);
|
||||
const savedInstructions = this.settings.characterInstructions?.[char] || '';
|
||||
|
||||
const instructionBlock = document.createElement('div');
|
||||
const label = document.createElement('label');
|
||||
const textArea = document.createElement('textarea');
|
||||
instructionBlock.appendChild(label);
|
||||
instructionBlock.appendChild(textArea);
|
||||
instructionBlock.className = 'character-instructions';
|
||||
label.setAttribute('for', `openai_char_${sanitizedName}`);
|
||||
label.innerText = `${char}:`;
|
||||
textArea.id = `openai_char_${sanitizedName}`;
|
||||
textArea.placeholder = 'e.g., "Speak cheerfully and energetically"';
|
||||
textArea.className = 'textarea_compact autoSetHeight';
|
||||
textArea.value = savedInstructions;
|
||||
textArea.addEventListener('input', () => {
|
||||
this.saveCharacterInstructions(char, textArea.value);
|
||||
});
|
||||
|
||||
$('#openai-character-instructions').append(instructionBlock);
|
||||
}
|
||||
}
|
||||
|
||||
saveCharacterInstructions(characterName, instructions) {
|
||||
if (!this.settings.characterInstructions) {
|
||||
this.settings.characterInstructions = {};
|
||||
}
|
||||
this.settings.characterInstructions[characterName] = instructions;
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (!voiceName) {
|
||||
throw 'TTS Voice name not provided';
|
||||
}
|
||||
|
||||
const voice = OpenAITtsProvider.voices.find(voice => voice.voice_id === voiceName || voice.name === voiceName);
|
||||
|
||||
if (!voice) {
|
||||
throw `TTS Voice not found: ${voiceName}`;
|
||||
}
|
||||
|
||||
return voice;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId, characterName = null) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId, characterName);
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
return OpenAITtsProvider.voices;
|
||||
}
|
||||
|
||||
async previewTtsVoice(_) {
|
||||
return;
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId, characterName = null) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
const requestBody = {
|
||||
'text': inputText,
|
||||
'voice': voiceId,
|
||||
'model': this.settings.model,
|
||||
'speed': this.settings.speed,
|
||||
};
|
||||
|
||||
if (this.settings.model === 'gpt-4o-mini-tts' && characterName) {
|
||||
const instructions = this.settings.characterInstructions?.[characterName];
|
||||
if (instructions && instructions.trim()) {
|
||||
requestBody.instructions = substituteParams(instructions);
|
||||
}
|
||||
}
|
||||
|
||||
const response = await fetch('/api/openai/generate-voice', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
||||
151
data/st-core-scripts/scripts/extensions/tts/pollinations.js
Normal file
151
data/st-core-scripts/scripts/extensions/tts/pollinations.js
Normal file
@@ -0,0 +1,151 @@
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
import { splitRecursive } from '../../utils.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export class PollinationsTtsProvider {
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
// TODO: Make this configurable
|
||||
model: 'openai-audio',
|
||||
voiceMap: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
return '';
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
this.voices = [];
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await this.checkReady();
|
||||
console.debug('Pollinations TTS: Settings loaded');
|
||||
} catch {
|
||||
console.debug('Pollinations TTS: Settings loaded, but not ready');
|
||||
}
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
await this.checkReady();
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
voice => voice.name == voiceName || voice.voice_id == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate TTS audio for the given text using the specified voice.
|
||||
* @param {string} text Text to generate
|
||||
* @param {string} voiceId Voice ID
|
||||
* @returns {AsyncGenerator<Response>} Audio response generator
|
||||
*/
|
||||
generateTts(text, voiceId) {
|
||||
return this.fetchTtsGeneration(text, voiceId);
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await fetch('/api/speech/pollinations/voices', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({ model: this.settings.model }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
return responseJson
|
||||
.sort()
|
||||
.map(x => ({ name: x, voice_id: x, preview_url: false, lang: 'en-US' }));
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} id Voice ID
|
||||
*/
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const voice = await this.getVoice(id);
|
||||
const text = getPreviewString(voice.lang);
|
||||
for await (const response of this.generateTts(text, id)) {
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
await new Promise(resolve => {
|
||||
const audioElement = new Audio();
|
||||
audioElement.src = url;
|
||||
audioElement.play();
|
||||
audioElement.onended = () => resolve();
|
||||
});
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
}
|
||||
|
||||
async* fetchTtsGeneration(text, voiceId) {
|
||||
const MAX_LENGTH = 1000;
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
const chunks = splitRecursive(text, MAX_LENGTH);
|
||||
for (const chunk of chunks) {
|
||||
const response = await fetch('/api/speech/pollinations/generate', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
model: this.settings.model,
|
||||
text: 'Say exactly this and nothing else:' + '\n' + chunk,
|
||||
voice: voiceId,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
yield response;
|
||||
}
|
||||
}
|
||||
}
|
||||
81
data/st-core-scripts/scripts/extensions/tts/readme.md
Normal file
81
data/st-core-scripts/scripts/extensions/tts/readme.md
Normal file
@@ -0,0 +1,81 @@
|
||||
# Provider Requirements.
|
||||
Because I don't know how, or if you can, and/or maybe I am just too lazy to implement interfaces in JS, here's the requirements of a provider that the extension needs to operate.
|
||||
|
||||
### class YourTtsProvider
|
||||
#### Required
|
||||
Exported for use in extension index.js, and added to providers list in index.js
|
||||
1. generateTts(text, voiceId)
|
||||
2. fetchTtsVoiceObjects()
|
||||
3. onRefreshClick()
|
||||
4. checkReady()
|
||||
5. loadSettings(settingsObject)
|
||||
6. settings field
|
||||
7. settingsHtml field
|
||||
|
||||
#### Optional
|
||||
1. previewTtsVoice()
|
||||
2. separator field
|
||||
3. processText(text)
|
||||
4. dispose()
|
||||
|
||||
# Requirement Descriptions
|
||||
### generateTts(text, voiceId)
|
||||
Must return `audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']`
|
||||
Must take text to be rendered and the voiceId to identify the voice to be used
|
||||
|
||||
### fetchTtsVoiceObjects()
|
||||
Required.
|
||||
Used by the TTS extension to get a list of voice objects from the provider.
|
||||
Must return an list of voice objects representing the available voices.
|
||||
1. name: a friendly user facing name to assign to characters. Shows in dropdown list next to user.
|
||||
2. voice_id: the provider specific id of the voice used in fetchTtsGeneration() call
|
||||
3. preview_url: a URL to a local audio file that will be used to sample voices
|
||||
4. lang: OPTIONAL language string
|
||||
|
||||
### getVoice(voiceName)
|
||||
Required.
|
||||
Must return a single voice object matching the provided voiceName. The voice object must have the following at least:
|
||||
1. name: a friendly user facing name to assign to characters. Shows in dropdown list next to user.
|
||||
2. voice_id: the provider specific id of the voice used in fetchTtsGeneration() call
|
||||
3. preview_url: a URL to a local audio file that will be used to sample voices
|
||||
4. lang: OPTIONAL language indicator
|
||||
|
||||
### onRefreshClick()
|
||||
Required.
|
||||
Users click this button to reconnect/reinit the selected provider.
|
||||
Responds to the user clicking the refresh button, which is intended to re-initialize the Provider into a working state, like retrying connections or checking if everything is loaded.
|
||||
|
||||
### checkReady()
|
||||
Required.
|
||||
Return without error to let TTS extension know that the provider is ready.
|
||||
Return an error to block the main TTS extension for initializing the provider and UI. The error will be put in the TTS extension UI directly.
|
||||
|
||||
### loadSettings(settingsObject)
|
||||
Required.
|
||||
Handle the input settings from the TTS extension on provider load.
|
||||
Put code in here to load your provider settings.
|
||||
|
||||
### settings field
|
||||
Required, used for storing any provider state that needs to be saved.
|
||||
Anything stored in this field is automatically persisted under extension_settings[providerName] by the main extension in `saveTtsProviderSettings()`, as well as loaded when the provider is selected in `loadTtsProvider(provider)`.
|
||||
TTS extension doesn't expect any specific contents.
|
||||
|
||||
### settingsHtml field
|
||||
Required, injected into the TTS extension UI. Besides adding it, not relied on by TTS extension directly.
|
||||
|
||||
### previewTtsVoice()
|
||||
Optional.
|
||||
Function to handle playing previews of voice samples if no direct preview_url is available in fetchTtsVoiceObjects() response
|
||||
|
||||
### separator field
|
||||
Optional.
|
||||
Used when narrate quoted text is enabled.
|
||||
Defines the string of characters used to introduce separation between between the groups of extracted quoted text sent to the provider. The provider will use this to introduce pauses by default using `...`
|
||||
|
||||
### processText(text)
|
||||
Optional.
|
||||
A function applied to the input text before passing it to the TTS generator. Can be async.
|
||||
|
||||
### dispose()
|
||||
Optional.
|
||||
Function to handle cleanup of provider resources when the provider is switched.
|
||||
344
data/st-core-scripts/scripts/extensions/tts/sbvits2.js
Normal file
344
data/st-core-scripts/scripts/extensions/tts/sbvits2.js
Normal file
@@ -0,0 +1,344 @@
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { SBVits2TtsProvider };
|
||||
|
||||
class SBVits2TtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = '. ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
// backup for auto_split
|
||||
text = text.replace(/\n+/g, '<br>');
|
||||
return text;
|
||||
}
|
||||
|
||||
languageLabels = {
|
||||
'Chinese': 'ZH',
|
||||
'English': 'EN',
|
||||
'Japanese': 'JP',
|
||||
};
|
||||
|
||||
langKey2LangCode = {
|
||||
'ZH': 'zh-CN',
|
||||
'EN': 'en-US',
|
||||
'JP': 'ja-JP',
|
||||
};
|
||||
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://localhost:5000',
|
||||
sdp_ratio: 0.2,
|
||||
noise: 0.6,
|
||||
noisew: 0.8,
|
||||
length: 1,
|
||||
language: 'JP',
|
||||
auto_split: true,
|
||||
split_interval: 0.5,
|
||||
assist_text: '',
|
||||
assist_text_weight: 1,
|
||||
style: 'Neutral',
|
||||
style_weight: 1,
|
||||
reference_audio_path: '',
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="sbvits_api_language">Language</label>
|
||||
<select id="sbvits_api_language">`;
|
||||
|
||||
for (let language in this.languageLabels) {
|
||||
if (this.languageLabels[language] == this.settings?.language) {
|
||||
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
|
||||
continue;
|
||||
}
|
||||
|
||||
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
|
||||
}
|
||||
|
||||
html += `
|
||||
</select>
|
||||
<label">SBVits2 Settings:</label><br/>
|
||||
<label for="sbvits_tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="sbvits_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
<span>Use <a target="_blank" href="https://github.com/litagin02/Style-Bert-VITS2">Style-Bert-VITS2 API Server</a>.</span><br/>
|
||||
|
||||
<label for="sbvits_sdp_ratio">sdp_ratio: <span id="sbvits_sdp_ratio_output">${this.defaultSettings.sdp_ratio}</span></label>
|
||||
<input id="sbvits_sdp_ratio" type="range" value="${this.defaultSettings.sdp_ratio}" min="0.0" max="1" step="0.01" />
|
||||
|
||||
<label for="sbvits_noise">noise: <span id="sbvits_noise_output">${this.defaultSettings.noise}</span></label>
|
||||
<input id="sbvits_noise" type="range" value="${this.defaultSettings.noise}" min="0.1" max="2" step="0.01" />
|
||||
|
||||
<label for="sbvits_noisew">noisew: <span id="sbvits_noisew_output">${this.defaultSettings.noisew}</span></label>
|
||||
<input id="sbvits_noisew" type="range" value="${this.defaultSettings.noisew}" min="0.1" max="2" step="0.01" />
|
||||
|
||||
<label for="sbvits_length">length: <span id="sbvits_length_output">${this.defaultSettings.length}</span></label>
|
||||
<input id="sbvits_length" type="range" value="${this.defaultSettings.length}" min="0.0" max="5" step="0.01" />
|
||||
|
||||
<label for="sbvits_auto_split" class="checkbox_label">
|
||||
<input id="sbvits_auto_split" type="checkbox" ${this.defaultSettings.auto_split ? 'checked' : ''} />
|
||||
Enable Text Splitting
|
||||
</label>
|
||||
|
||||
<label for="sbvits_split_interval">split_interval: <span id="sbvits_split_interval_output">${this.defaultSettings.split_interval}</span></label>
|
||||
<input id="sbvits_split_interval" type="range" value="${this.defaultSettings.split_interval}" min="0.0" max="5" step="0.01" />
|
||||
|
||||
<label for="sbvits_assist_text">assist_text:</label>
|
||||
<input id="sbvits_assist_text" type="text" class="text_pole" maxlength="512" value="${this.defaultSettings.assist_text}"/>
|
||||
|
||||
<label for="sbvits_assist_text_weight">assist_text_weight: <span id="sbvits_assist_text_weight_output">${this.defaultSettings.assist_text_weight}</span></label>
|
||||
<input id="sbvits_assist_text_weight" type="range" value="${this.defaultSettings.assist_text_weight}" min="0.0" max="1" step="0.01" />
|
||||
|
||||
<label for="sbvits_style_weight">style_weight: <span id="sbvits_style_weight_output">${this.defaultSettings.style_weight}</span></label>
|
||||
<input id="sbvits_style_weight" type="range" value="${this.defaultSettings.style_weight}" min="0.0" max="20" step="0.01" />
|
||||
|
||||
<label for="sbvits_reference_audio_path">reference_audio_path:</label>
|
||||
<input id="sbvits_reference_audio_path" type="text" class="text_pole" maxlength="512" value="${this.defaultSettings.reference_audio_path}"/>
|
||||
`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#sbvits_tts_endpoint').val();
|
||||
this.settings.language = $('#sbvits_api_language').val();
|
||||
this.settings.assist_text = $('#sbvits_assist_text').val();
|
||||
this.settings.reference_audio_path = $('#sbvits_reference_audio_path').val();
|
||||
|
||||
// Update the default TTS settings based on input fields
|
||||
this.settings.sdp_ratio = $('#sbvits_sdp_ratio').val();
|
||||
this.settings.noise = $('#sbvits_noise').val();
|
||||
this.settings.noisew = $('#sbvits_noisew').val();
|
||||
this.settings.length = $('#sbvits_length').val();
|
||||
this.settings.auto_split = $('#sbvits_auto_split').is(':checked');
|
||||
this.settings.split_interval = $('#sbvits_split_interval').val();
|
||||
this.settings.assist_text_weight = $('#sbvits_assist_text_weight').val();
|
||||
this.settings.style_weight = $('#sbvits_style_weight').val();
|
||||
|
||||
// Update the UI to reflect changes
|
||||
$('#sbvits_sdp_ratio_output').text(this.settings.sdp_ratio);
|
||||
$('#sbvits_noise_output').text(this.settings.noise);
|
||||
$('#sbvits_noisew_output').text(this.settings.noisew);
|
||||
$('#sbvits_length_output').text(this.settings.length);
|
||||
$('#sbvits_split_interval_output').text(this.settings.split_interval);
|
||||
$('#sbvits_assist_text_weight_output').text(this.settings.assist_text_weight);
|
||||
$('#sbvits_style_weight_output').text(this.settings.style_weight);
|
||||
|
||||
saveTtsProviderSettings();
|
||||
this.changeTTSSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
console.debug(`Ignoring non-user-configurable setting: ${key}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Set initial values from the settings
|
||||
$('#sbvits_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#sbvits_api_language').val(this.settings.language);
|
||||
$('#sbvits_assist_text').val(this.settings.assist_text);
|
||||
$('#sbvits_reference_audio_path').val(this.settings.reference_audio_path);
|
||||
$('#sbvits_sdp_ratio').val(this.settings.sdp_ratio);
|
||||
$('#sbvits_noise').val(this.settings.noise);
|
||||
$('#sbvits_noisew').val(this.settings.noisew);
|
||||
$('#sbvits_length').val(this.settings.length);
|
||||
$('#sbvits_auto_split').prop('checked', this.settings.auto_split);
|
||||
$('#sbvits_split_interval').val(this.settings.split_interval);
|
||||
$('#sbvits_assist_text_weight').val(this.settings.assist_text_weight);
|
||||
$('#sbvits_style_weight').val(this.settings.style_weight);
|
||||
|
||||
// Update the UI to reflect changes
|
||||
$('#sbvits_sdp_ratio_output').text(this.settings.sdp_ratio);
|
||||
$('#sbvits_noise_output').text(this.settings.noise);
|
||||
$('#sbvits_noisew_output').text(this.settings.noisew);
|
||||
$('#sbvits_length_output').text(this.settings.length);
|
||||
$('#sbvits_split_interval_output').text(this.settings.split_interval);
|
||||
$('#sbvits_assist_text_weight_output').text(this.settings.assist_text_weight);
|
||||
$('#sbvits_style_weight_output').text(this.settings.style_weight);
|
||||
|
||||
// Register input/change event listeners to update settings on user interaction
|
||||
$('#sbvits_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_api_language').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_assist_text').on('input', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_reference_audio_path').on('input', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_sdp_ratio').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_noise').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_noisew').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_length').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_auto_split').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_split_interval').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_assist_text_weight').on('change', () => { this.onSettingsChange(); });
|
||||
$('#sbvits_style_weight').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.info('SBVits2: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
/**
|
||||
* Get a voice from the TTS provider.
|
||||
* @param {string} voiceName Voice name to get
|
||||
* @returns {Promise<Object>} Voice object
|
||||
*/
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
v => v.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await fetch(`${this.settings.provider_endpoint}/models/info`);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
|
||||
}
|
||||
const data = await response.json();
|
||||
const voices = Object.keys(data).flatMap(key => {
|
||||
const config = data[key];
|
||||
const spk2id = config.spk2id;
|
||||
const style2id = config.style2id;
|
||||
|
||||
return Object.entries(spk2id).flatMap(([speaker, speaker_id]) => {
|
||||
return Object.entries(style2id).map(([style, styleId]) => {
|
||||
return {
|
||||
name: `${speaker} (${style})`,
|
||||
voice_id: `${key}-${speaker_id}-${style}`,
|
||||
preview_url: false,
|
||||
};
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
this.voices = voices; // Assign to the class property
|
||||
return voices; // Also return this list
|
||||
}
|
||||
|
||||
// Each time a parameter is changed, we change the configuration
|
||||
async changeTTSSettings() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch TTS generation from the API.
|
||||
* @param {string} inputText Text to generate TTS for
|
||||
* @param {string} voiceId Voice ID to use (model_id-speaker_id-style)
|
||||
* @returns {Promise<Response>} Fetch response
|
||||
*/
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
const [model_id, speaker_id, ...rest] = voiceId.split('-');
|
||||
const style = rest.join('-');
|
||||
const params = new URLSearchParams();
|
||||
// restore for auto_split
|
||||
inputText = inputText.replaceAll('<br>', '\n');
|
||||
params.append('text', inputText);
|
||||
params.append('model_id', model_id);
|
||||
params.append('speaker_id', speaker_id);
|
||||
params.append('sdp_ratio', this.settings.sdp_ratio);
|
||||
params.append('noise', this.settings.noise);
|
||||
params.append('noisew', this.settings.noisew);
|
||||
params.append('length', this.settings.length);
|
||||
params.append('language', this.settings.language);
|
||||
params.append('auto_split', this.settings.auto_split);
|
||||
params.append('split_interval', this.settings.split_interval);
|
||||
if (this.settings.assist_text) {
|
||||
params.append('assist_text', this.settings.assist_text);
|
||||
params.append('assist_text_weight', this.settings.assist_text_weight);
|
||||
}
|
||||
params.append('style', style);
|
||||
params.append('style_weight', this.settings.style_weight);
|
||||
if (this.settings.reference_audio_path) {
|
||||
params.append('reference_audio_path', this.settings.reference_audio_path);
|
||||
}
|
||||
const url = `${this.settings.provider_endpoint}/voice?${params.toString()}`;
|
||||
|
||||
const response = await fetch(
|
||||
url,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
},
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} id Voice ID
|
||||
*/
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const lang_code = this.langKey2LangCode[this.settings.lang] ?? 'ja-JP';
|
||||
const text = getPreviewString(lang_code);
|
||||
const response = await this.fetchTtsGeneration(text, id);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
}
|
||||
|
||||
// Interface not used
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
}
|
||||
95
data/st-core-scripts/scripts/extensions/tts/settings.html
Normal file
95
data/st-core-scripts/scripts/extensions/tts/settings.html
Normal file
@@ -0,0 +1,95 @@
|
||||
<div id="tts_settings">
|
||||
<div class="inline-drawer">
|
||||
<div class="inline-drawer-toggle inline-drawer-header">
|
||||
<b>TTS</b>
|
||||
<div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
|
||||
</div>
|
||||
<div class="inline-drawer-content">
|
||||
<div id="tts_status">
|
||||
</div>
|
||||
<span data-i18n="Select TTS Provider">Select TTS Provider</span>
|
||||
<br>
|
||||
<div class="tts_block">
|
||||
<select id="tts_provider" class="flex1">
|
||||
</select>
|
||||
<input id="tts_refresh" data-i18n="[value]tts_refresh" class="menu_button" type="submit" value="Reload" />
|
||||
</div>
|
||||
<div>
|
||||
<label class="checkbox_label" for="tts_enabled">
|
||||
<input type="checkbox" id="tts_enabled" name="tts_enabled">
|
||||
<small data-i18n="tts_enabled">Enabled</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_narrate_user">
|
||||
<input type="checkbox" id="tts_narrate_user">
|
||||
<small data-i18n="Narrate user messages">Narrate user messages</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_auto_generation">
|
||||
<input type="checkbox" id="tts_auto_generation">
|
||||
<small data-i18n="Auto Generation">Auto Generation</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_periodic_auto_generation" data-i18n="[title]Requires auto generation to be enabled." title="Requires auto generation to be enabled.">
|
||||
<input type="checkbox" id="tts_periodic_auto_generation">
|
||||
<small data-i18n="Narrate by paragraphs (when streaming)">Narrate by paragraphs (when streaming)</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_narrate_by_paragraphs">
|
||||
<input type="checkbox" id="tts_narrate_by_paragraphs">
|
||||
<small data-i18n="Narrate by paragraphs (when not streaming)">Narrate by paragraphs (when not streaming)</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_narrate_quoted">
|
||||
<input type="checkbox" id="tts_narrate_quoted">
|
||||
<small data-i18n="Only narrate quotes">Only narrate "quotes"</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_narrate_dialogues">
|
||||
<input type="checkbox" id="tts_narrate_dialogues">
|
||||
<small data-i18n="Ignore text, even quotes, inside asterisk">Ignore *text, even "quotes", inside asterisks*</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_narrate_translated_only">
|
||||
<input type="checkbox" id="tts_narrate_translated_only">
|
||||
<small data-i18n="Narrate only the translated text">Narrate only the translated text</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_skip_codeblocks">
|
||||
<input type="checkbox" id="tts_skip_codeblocks">
|
||||
<small data-i18n="Skip codeblocks">Skip codeblocks</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_skip_tags">
|
||||
<input type="checkbox" id="tts_skip_tags">
|
||||
<small data-i18n="Skip tagged blocks">Skip <tagged> blocks</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_pass_asterisks">
|
||||
<input type="checkbox" id="tts_pass_asterisks">
|
||||
<small data-i18n="Pass Asterisks to TTS Engine">Pass Asterisks to TTS Engine</small>
|
||||
</label>
|
||||
<label class="checkbox_label" for="tts_multi_voice_enabled"
|
||||
data-i18n="[title]Works best when: Pass Asterisks to TTS Engine is enabled, and both Only narrate quotes and Ignore *text, even 'quotes', inside asterisks* are disabled."
|
||||
title="Works best when: Pass Asterisks to TTS Engine is enabled, and both Only narrate quotes and Ignore *text, even 'quotes', inside asterisks* are disabled.">
|
||||
<input type="checkbox" id="tts_multi_voice_enabled">
|
||||
<small data-i18n="Different voices for quotes and text inside asterisks">
|
||||
Different voices for "quotes", *text inside asterisks* and other text
|
||||
</small>
|
||||
</label>
|
||||
</div>
|
||||
<div id="playback_rate_block" class="range-block">
|
||||
<hr>
|
||||
<div class="range-block-title justifyLeft">
|
||||
<small data-i18n="Audio Playback Speed">Audio Playback Speed</small>
|
||||
</div>
|
||||
<div class="range-block-range-and-counter">
|
||||
<div class="range-block-range">
|
||||
<input type="range" id="playback_rate" name="volume" min="0" max="3" step="0.05">
|
||||
</div>
|
||||
<div class="range-block-counter">
|
||||
<input type="number" min="0" max="3" step="0.05" data-for="playback_rate" id="playback_rate_counter">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="tts_voicemap_block">
|
||||
</div>
|
||||
<hr>
|
||||
<form id="tts_provider_settings">
|
||||
</form>
|
||||
<div class="tts_buttons">
|
||||
<input id="tts_voices" class="menu_button" data-i18n="[value]Available voices" type="submit" value="Available voices" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
176
data/st-core-scripts/scripts/extensions/tts/silerotts.js
Normal file
176
data/st-core-scripts/scripts/extensions/tts/silerotts.js
Normal file
@@ -0,0 +1,176 @@
|
||||
import { doExtrasFetch, getApiUrl, modules } from '../../extensions.js';
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { SileroTtsProvider };
|
||||
|
||||
class SileroTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = ' ';
|
||||
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://localhost:8001/tts',
|
||||
voiceMap: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="silero_tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="silero_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
<span>
|
||||
<span>Use <a target="_blank" href="https://github.com/SillyTavern/SillyTavern-extras">SillyTavern Extras API</a> or <a target="_blank" href="https://github.com/ouoertheo/silero-api-server">Silero TTS Server</a>.</span>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#silero_tts_endpoint').val();
|
||||
saveTtsProviderSettings();
|
||||
this.refreshSession();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
const apiCheckInterval = setInterval(() => {
|
||||
// Use Extras API if TTS support is enabled
|
||||
if (modules.includes('tts') || modules.includes('silero-tts')) {
|
||||
const baseUrl = new URL(getApiUrl());
|
||||
baseUrl.pathname = '/api/tts';
|
||||
this.settings.provider_endpoint = baseUrl.toString();
|
||||
$('#silero_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
clearInterval(apiCheckInterval);
|
||||
}
|
||||
}, 2000);
|
||||
|
||||
$('#silero_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#silero_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
this.refreshSession();
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.debug('SileroTTS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
async refreshSession() {
|
||||
await this.initSession();
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
sileroVoice => sileroVoice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await doExtrasFetch(`${this.settings.provider_endpoint}/speakers`);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
const response = await doExtrasFetch(
|
||||
`${this.settings.provider_endpoint}/generate`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache', // Added this line to disable caching of file so new files are always played - Rolyat 7/7/23
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'text': inputText,
|
||||
'speaker': voiceId,
|
||||
'session': 'sillytavern',
|
||||
}),
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
async initSession() {
|
||||
console.info('Silero TTS: requesting new session');
|
||||
try {
|
||||
const response = await doExtrasFetch(
|
||||
`${this.settings.provider_endpoint}/session`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'path': 'sillytavern',
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
if (!response.ok && response.status !== 404) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.info('Silero TTS: endpoint not available', error);
|
||||
}
|
||||
}
|
||||
|
||||
// Interface not used by Silero TTS
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
|
||||
}
|
||||
199
data/st-core-scripts/scripts/extensions/tts/speecht5.js
Normal file
199
data/st-core-scripts/scripts/extensions/tts/speecht5.js
Normal file
@@ -0,0 +1,199 @@
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
import { getBase64Async } from '../../utils.js';
|
||||
import { getRequestHeaders } from '../../../script.js';
|
||||
|
||||
export { SpeechT5TtsProvider };
|
||||
|
||||
class SpeechT5TtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = ' .. ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
speakers: [],
|
||||
speaker: '',
|
||||
voiceMap: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="speecht5_tts_speaker">Speaker:</label>
|
||||
<div class="flex-container">
|
||||
<select id="speecht5_tts_speaker" class="text_pole flex1">
|
||||
</select>
|
||||
<div id="speecht5_tts_speaker_upload_button" class="menu_button" title="Upload speaker">
|
||||
<i class="fa-solid fa-upload"></i>
|
||||
</div>
|
||||
<div id="speecht5_tts_delete_speaker_button" class="menu_button" title="Delete speaker">
|
||||
<i class="fa-solid fa-trash"></i>
|
||||
</div>
|
||||
</div>
|
||||
<input type="file" id="speecht5_tts_speaker_upload" class="displayNone">
|
||||
<div><i>Loading model for the first time may take a while!</i></div>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.speaker = $('#speecht5_tts_speaker').val();
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async previewTtsVoice(voiceId) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
|
||||
const text = getPreviewString('en-US');
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
for (const speaker of this.settings.speakers) {
|
||||
$('#speecht5_tts_speaker').append($('<option>', {
|
||||
value: speaker.voice_id,
|
||||
text: speaker.name,
|
||||
}));
|
||||
}
|
||||
|
||||
$('#speecht5_tts_speaker').val(this.settings.speaker);
|
||||
$('#speecht5_tts_speaker').on('change', this.onSettingsChange.bind(this));
|
||||
$('#speecht5_tts_speaker_upload_button').on('click', () => {
|
||||
$('#speecht5_tts_speaker_upload').trigger('click');
|
||||
});
|
||||
$('#speecht5_tts_speaker_upload').on('change', async (event) => {
|
||||
const file = event.target.files[0];
|
||||
if (file.size != 2048) {
|
||||
toastr.error('Invalid speaker file size, expected 2048 bytes');
|
||||
return;
|
||||
}
|
||||
|
||||
const data = await getBase64Async(file);
|
||||
const speaker = {
|
||||
voice_id: file.name,
|
||||
name: file.name,
|
||||
data: data,
|
||||
lang: 'en-US',
|
||||
preview_url: false,
|
||||
};
|
||||
this.settings.speakers.push(speaker);
|
||||
$('#speecht5_tts_speaker').append($('<option>', {
|
||||
value: speaker.voice_id,
|
||||
text: speaker.name,
|
||||
}));
|
||||
$('#speecht5_tts_speaker').val(speaker.name);
|
||||
this.onSettingsChange();
|
||||
});
|
||||
$('#speecht5_tts_delete_speaker_button').on('click', () => {
|
||||
const confirmDelete = confirm('Are you sure you want to delete this speaker?');
|
||||
|
||||
if (!confirmDelete) {
|
||||
return;
|
||||
}
|
||||
|
||||
const speaker = this.settings.speakers.find(s => s.voice_id === this.settings.speaker);
|
||||
if (!speaker) {
|
||||
toastr.error('Speaker not found');
|
||||
return;
|
||||
}
|
||||
|
||||
const index = this.settings.speakers.indexOf(speaker);
|
||||
this.settings.speakers.splice(index, 1);
|
||||
$(`#speecht5_tts_speaker option[value="${speaker.voice_id}"]`).remove();
|
||||
|
||||
if (this.settings.speakers.length == 0) {
|
||||
console.log('No speakers left');
|
||||
return;
|
||||
}
|
||||
|
||||
$('#speecht5_tts_speaker').val(this.settings.speakers[0].voice_id);
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.debug('SpeechT5: Settings loaded');
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
return this.settings.speakers.find(s => s.voice_id === voiceName);
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
return this.settings.speakers;
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
const speaker = await this.getVoice(voiceId);
|
||||
|
||||
if (!speaker) {
|
||||
toastr.error(`Speaker not found: ${voiceId}`, 'TTS Generation Failed');
|
||||
throw new Error(`Speaker not found: ${voiceId}`);
|
||||
}
|
||||
|
||||
const response = await fetch(
|
||||
'/api/speech/synthesize',
|
||||
{
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
'text': inputText,
|
||||
'speaker': speaker.data,
|
||||
'model': 'Xenova/speecht5_tts',
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
}
|
||||
130
data/st-core-scripts/scripts/extensions/tts/style.css
Normal file
130
data/st-core-scripts/scripts/extensions/tts/style.css
Normal file
@@ -0,0 +1,130 @@
|
||||
@import './css/minimax-tts.css';
|
||||
@import './css/openai-tts.css';
|
||||
|
||||
.voice_preview {
|
||||
margin: 0.25rem 0.5rem;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.voice_preview .voice_name {
|
||||
text-align: left;
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.voice_preview .voice_lang {
|
||||
width: 4rem;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.voice_preview .fa-play {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.tts-button {
|
||||
margin: 0;
|
||||
outline: none;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
transition: var(--animation-duration-2x);
|
||||
opacity: 0.7;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
|
||||
}
|
||||
|
||||
.tts-button:hover {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.tts_block {
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
column-gap: 5px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.tts_custom_voices {
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 5px;
|
||||
}
|
||||
|
||||
.novel_tts_hints {
|
||||
font-size: calc(0.9 * var(--mainFontSize));
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 5px;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
|
||||
.at-settings-row {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.at-settings-option {
|
||||
flex: 1;
|
||||
margin: 0 10px;
|
||||
}
|
||||
|
||||
.at-endpoint-option {
|
||||
flex: 1;
|
||||
margin: 0 10px;
|
||||
margin-right: 25px;
|
||||
width: 38%;
|
||||
}
|
||||
|
||||
.at-website-row {
|
||||
display: flex;
|
||||
justify-content: start;
|
||||
align-items: center;
|
||||
margin-top: 10px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
.at-website-option {
|
||||
flex: 1;
|
||||
margin-right: 10px;
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
.at-settings-separator {
|
||||
margin-top: 10px;
|
||||
margin-bottom: 10px;
|
||||
padding: 18px;
|
||||
font-weight: bold;
|
||||
border-top: 1px solid #e1e1e1; /* Grey line */
|
||||
border-bottom: 1px solid #e1e1e1; /* Grey line */
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.at-status-message {
|
||||
flex: 1;
|
||||
margin: 0 10px;
|
||||
}
|
||||
|
||||
.at-model-endpoint-row {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.at-model-option, .endpoint-option {
|
||||
flex: 1;
|
||||
margin: 0 10px;
|
||||
margin-left: 10px;
|
||||
}
|
||||
|
||||
.at-endpoint-option {
|
||||
width: 38%;
|
||||
}
|
||||
|
||||
#at-status_info {
|
||||
color: lightgreen;
|
||||
}
|
||||
296
data/st-core-scripts/scripts/extensions/tts/system.js
Normal file
296
data/st-core-scripts/scripts/extensions/tts/system.js
Normal file
@@ -0,0 +1,296 @@
|
||||
import { isMobile } from '../../RossAscends-mods.js';
|
||||
import { getPreviewString } from './index.js';
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
export { SystemTtsProvider };
|
||||
import { t } from '../../i18n.js';
|
||||
|
||||
/**
|
||||
* Chunkify
|
||||
* Google Chrome Speech Synthesis Chunking Pattern
|
||||
* Fixes inconsistencies with speaking long texts in speechUtterance objects
|
||||
* Licensed under the MIT License
|
||||
*
|
||||
* Peter Woolley and Brett Zamir
|
||||
* Modified by Haaris for bug fixes
|
||||
*/
|
||||
|
||||
var speechUtteranceChunker = function (utt, settings, callback) {
|
||||
settings = settings || {};
|
||||
var newUtt;
|
||||
var txt = (settings && settings.offset !== undefined ? utt.text.substring(settings.offset) : utt.text);
|
||||
if (utt.voice && utt.voice.voiceURI === 'native') { // Not part of the spec
|
||||
newUtt = utt;
|
||||
newUtt.text = txt;
|
||||
newUtt.addEventListener('end', function () {
|
||||
if (speechUtteranceChunker.cancel) {
|
||||
speechUtteranceChunker.cancel = false;
|
||||
}
|
||||
if (callback !== undefined) {
|
||||
callback();
|
||||
}
|
||||
});
|
||||
}
|
||||
else {
|
||||
var chunkLength = (settings && settings.chunkLength) || 160;
|
||||
var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');
|
||||
var chunkArr = txt.match(pattRegex);
|
||||
|
||||
if (chunkArr == null || chunkArr[0] === undefined || chunkArr[0].length <= 2) {
|
||||
//call once all text has been spoken...
|
||||
if (callback !== undefined) {
|
||||
callback();
|
||||
}
|
||||
return;
|
||||
}
|
||||
var chunk = chunkArr[0];
|
||||
newUtt = new SpeechSynthesisUtterance(chunk);
|
||||
var x;
|
||||
for (x in utt) {
|
||||
if (Object.hasOwn(utt, x) && x !== 'text') {
|
||||
newUtt[x] = utt[x];
|
||||
}
|
||||
}
|
||||
newUtt.lang = utt.lang;
|
||||
newUtt.voice = utt.voice;
|
||||
newUtt.rate = utt.rate;
|
||||
newUtt.pitch = utt.pitch;
|
||||
newUtt.addEventListener('end', function () {
|
||||
if (speechUtteranceChunker.cancel) {
|
||||
speechUtteranceChunker.cancel = false;
|
||||
return;
|
||||
}
|
||||
settings.offset = settings.offset || 0;
|
||||
settings.offset += chunk.length;
|
||||
speechUtteranceChunker(utt, settings, callback);
|
||||
});
|
||||
}
|
||||
|
||||
if (settings.modifier) {
|
||||
settings.modifier(newUtt);
|
||||
}
|
||||
console.log(newUtt); //IMPORTANT!! Do not remove: Logging the object out fixes some onend firing issues.
|
||||
//placing the speak invocation inside a callback fixes ordering and onend issues.
|
||||
setTimeout(function () {
|
||||
speechSynthesis.speak(newUtt);
|
||||
}, 0);
|
||||
};
|
||||
|
||||
class SystemTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
// Static constants for the simulated default voice
|
||||
static BROWSER_DEFAULT_VOICE_ID = '__browser_default__';
|
||||
static BROWSER_DEFAULT_VOICE_NAME = 'System Default Voice';
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = ' ... ';
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
rate: 1,
|
||||
pitch: 1,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
if (!('speechSynthesis' in window)) {
|
||||
return t`Your browser or operating system doesn't support speech synthesis`;
|
||||
}
|
||||
|
||||
return '<p>' + t`Uses the voices provided by your operating system` + `</p>
|
||||
<label for="system_tts_rate">` + t`Rate:` + ` <span id="system_tts_rate_output"></span></label>
|
||||
<input id="system_tts_rate" type="range" value="${this.defaultSettings.rate}" min="0.1" max="2" step="0.01" />
|
||||
<label for="system_tts_pitch">` + t`Pitch:` + ` <span id="system_tts_pitch_output"></span></label>
|
||||
<input id="system_tts_pitch" type="range" value="${this.defaultSettings.pitch}" min="0" max="2" step="0.01" />`;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
this.settings.rate = Number($('#system_tts_rate').val());
|
||||
this.settings.pitch = Number($('#system_tts_pitch').val());
|
||||
$('#system_tts_pitch_output').text(this.settings.pitch);
|
||||
$('#system_tts_rate_output').text(this.settings.rate);
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// iOS should only allows speech synthesis trigged by user interaction
|
||||
if (isMobile()) {
|
||||
let hasEnabledVoice = false;
|
||||
|
||||
document.addEventListener('click', () => {
|
||||
if (hasEnabledVoice) {
|
||||
return;
|
||||
}
|
||||
const utterance = new SpeechSynthesisUtterance(' . ');
|
||||
utterance.volume = 0;
|
||||
speechSynthesis.speak(utterance);
|
||||
hasEnabledVoice = true;
|
||||
});
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#system_tts_rate').val(this.settings.rate || this.defaultSettings.rate);
|
||||
$('#system_tts_pitch').val(this.settings.pitch || this.defaultSettings.pitch);
|
||||
|
||||
// Trigger updates
|
||||
$('#system_tts_rate').on('input', () => { this.onSettingsChange(); });
|
||||
$('#system_tts_pitch').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#system_tts_pitch_output').text(this.settings.pitch);
|
||||
$('#system_tts_rate_output').text(this.settings.rate);
|
||||
console.debug('SystemTTS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
fetchTtsVoiceObjects() {
|
||||
if (!('speechSynthesis' in window)) {
|
||||
return Promise.resolve([]);
|
||||
}
|
||||
|
||||
return new Promise((resolve) => {
|
||||
setTimeout(() => {
|
||||
let voices = speechSynthesis.getVoices();
|
||||
|
||||
if (voices.length === 0) {
|
||||
// Edge compat: Provide default when voices empty
|
||||
console.warn('SystemTTS: getVoices() returned empty list. Providing browser default option.');
|
||||
const defaultVoice = {
|
||||
name: SystemTtsProvider.BROWSER_DEFAULT_VOICE_NAME,
|
||||
voice_id: SystemTtsProvider.BROWSER_DEFAULT_VOICE_ID,
|
||||
preview_url: false,
|
||||
lang: navigator.language || 'en-US',
|
||||
};
|
||||
resolve([defaultVoice]);
|
||||
} else {
|
||||
const mappedVoices = voices
|
||||
.sort((a, b) => a.lang.localeCompare(b.lang) || a.name.localeCompare(b.name))
|
||||
.map(x => ({ name: x.name, voice_id: x.voiceURI, preview_url: false, lang: x.lang }));
|
||||
resolve(mappedVoices);
|
||||
}
|
||||
}, 50);
|
||||
});
|
||||
}
|
||||
|
||||
previewTtsVoice(voiceId) {
|
||||
if (!('speechSynthesis' in window)) {
|
||||
throw new Error('Speech synthesis API is not supported');
|
||||
}
|
||||
|
||||
let voice = null;
|
||||
if (voiceId !== SystemTtsProvider.BROWSER_DEFAULT_VOICE_ID) {
|
||||
const voices = speechSynthesis.getVoices();
|
||||
voice = voices.find(x => x.voiceURI === voiceId);
|
||||
|
||||
if (!voice && voices.length > 0) {
|
||||
console.warn(`SystemTTS Preview: Voice ID "${voiceId}" not found among available voices. Using browser default.`);
|
||||
} else if (!voice && voices.length === 0) {
|
||||
console.warn('SystemTTS Preview: Voice list is empty. Using browser default.');
|
||||
}
|
||||
} else {
|
||||
console.log('SystemTTS Preview: Using browser default voice as requested.');
|
||||
}
|
||||
|
||||
speechSynthesis.cancel();
|
||||
const langForPreview = voice ? voice.lang : (navigator.language || 'en-US');
|
||||
const text = getPreviewString(langForPreview);
|
||||
const utterance = new SpeechSynthesisUtterance(text);
|
||||
|
||||
if (voice) {
|
||||
utterance.voice = voice;
|
||||
}
|
||||
|
||||
utterance.rate = this.settings.rate || 1;
|
||||
utterance.pitch = this.settings.pitch || 1;
|
||||
|
||||
utterance.onerror = (event) => {
|
||||
console.error(`SystemTTS Preview Error: ${event.error}`, event);
|
||||
};
|
||||
|
||||
speechSynthesis.speak(utterance);
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (!('speechSynthesis' in window)) {
|
||||
return { voice_id: null, name: 'API Not Supported' };
|
||||
}
|
||||
|
||||
if (voiceName === SystemTtsProvider.BROWSER_DEFAULT_VOICE_NAME) {
|
||||
return {
|
||||
voice_id: SystemTtsProvider.BROWSER_DEFAULT_VOICE_ID,
|
||||
name: SystemTtsProvider.BROWSER_DEFAULT_VOICE_NAME,
|
||||
};
|
||||
}
|
||||
|
||||
const voices = speechSynthesis.getVoices();
|
||||
|
||||
if (voices.length === 0) {
|
||||
console.warn('SystemTTS: Empty voice list, using default fallback');
|
||||
return {
|
||||
voice_id: SystemTtsProvider.BROWSER_DEFAULT_VOICE_ID,
|
||||
name: SystemTtsProvider.BROWSER_DEFAULT_VOICE_NAME,
|
||||
};
|
||||
}
|
||||
|
||||
const match = voices.find(x => x.name == voiceName);
|
||||
|
||||
if (!match) {
|
||||
throw new Error(`SystemTTS getVoice: TTS Voice name "${voiceName}" not found`);
|
||||
}
|
||||
|
||||
return { voice_id: match.voiceURI, name: match.name };
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
if (!('speechSynthesis' in window)) {
|
||||
throw 'Speech synthesis API is not supported';
|
||||
}
|
||||
|
||||
const silence = await fetch('/sounds/silence.mp3');
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const voices = speechSynthesis.getVoices();
|
||||
const voice = voices.find(x => x.voiceURI === voiceId);
|
||||
const utterance = new SpeechSynthesisUtterance(text);
|
||||
utterance.voice = voice;
|
||||
utterance.rate = this.settings.rate || 1;
|
||||
utterance.pitch = this.settings.pitch || 1;
|
||||
utterance.onend = () => resolve(silence);
|
||||
utterance.onerror = () => reject();
|
||||
speechUtteranceChunker(utterance, {
|
||||
chunkLength: 200,
|
||||
}, function () {
|
||||
resolve(silence);
|
||||
console.log('System TTS done');
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
566
data/st-core-scripts/scripts/extensions/tts/tts-webui.js
Normal file
566
data/st-core-scripts/scripts/extensions/tts/tts-webui.js
Normal file
@@ -0,0 +1,566 @@
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { TtsWebuiProvider };
|
||||
|
||||
class TtsWebuiProvider {
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
|
||||
audioElement = document.createElement('audio');
|
||||
audioContext = null;
|
||||
audioWorkletNode = null;
|
||||
currentVolume = 1.0; // Track current volume
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {},
|
||||
model: 'chatterbox',
|
||||
speed: 1,
|
||||
volume: 1.0,
|
||||
available_voices: [''],
|
||||
provider_endpoint: 'http://127.0.0.1:7778/v1/audio/speech',
|
||||
streaming: true,
|
||||
stream_chunk_size: 100,
|
||||
desired_length: 80,
|
||||
max_length: 200,
|
||||
halve_first_chunk: true,
|
||||
exaggeration: 0.5,
|
||||
cfg_weight: 0.5,
|
||||
temperature: 0.8,
|
||||
device: 'auto',
|
||||
dtype: 'float32',
|
||||
cpu_offload: false,
|
||||
chunked: true,
|
||||
cache_voice: false,
|
||||
tokens_per_slice: 1000,
|
||||
remove_milliseconds: 45,
|
||||
remove_milliseconds_start: 25,
|
||||
chunk_overlap_method: 'zero',
|
||||
seed: -1,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<h4 class="textAlignCenter">TTS WebUI Settings</h4>
|
||||
|
||||
<div class="flex gap10px marginBot10 alignItemsFlexEnd">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_endpoint">Provider Endpoint:</label>
|
||||
<input id="tts_webui_endpoint" type="text" class="text_pole" maxlength="500" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_model">Model:</label>
|
||||
<input id="tts_webui_model" type="text" class="text_pole" maxlength="500" value="${this.defaultSettings.model}"/>
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_voices">Available Voices (comma separated):</label>
|
||||
<input id="tts_webui_voices" type="text" class="text_pole" value="${this.defaultSettings.available_voices.join()}"/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_streaming" class="checkbox_label alignItemsCenter flexGap5">
|
||||
<input id="tts_webui_streaming" type="checkbox" />
|
||||
<span>Streaming</span>
|
||||
</label>
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_volume">Volume: <span id="tts_webui_volume_output">${this.defaultSettings.volume}</span></label>
|
||||
<input type="range" id="tts_webui_volume" value="${this.defaultSettings.volume}" min="0" max="2" step="0.1">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<h4 class="textAlignCenter">Generation Settings</h4>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_exaggeration">Exaggeration: <span id="tts_webui_exaggeration_output">${this.defaultSettings.exaggeration}</span></label>
|
||||
<input id="tts_webui_exaggeration" type="range" value="${this.defaultSettings.exaggeration}" min="0" max="2" step="0.1" />
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_cfg_weight">CFG Weight: <span id="tts_webui_cfg_weight_output">${this.defaultSettings.cfg_weight}</span></label>
|
||||
<input id="tts_webui_cfg_weight" type="range" value="${this.defaultSettings.cfg_weight}" min="0" max="2" step="0.1" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_temperature">Temperature: <span id="tts_webui_temperature_output">${this.defaultSettings.temperature}</span></label>
|
||||
<input id="tts_webui_temperature" type="range" value="${this.defaultSettings.temperature}" min="0" max="2" step="0.1" />
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_seed">Seed (-1 for random):</label>
|
||||
<input id="tts_webui_seed" type="text" class="text_pole" value="${this.defaultSettings.seed}"/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<h4 class="textAlignCenter">Chunking</h4>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_chunked" class="checkbox_label alignItemsCenter flexGap5">
|
||||
<input id="tts_webui_chunked" type="checkbox" />
|
||||
<span>Split prompt into chunks</span>
|
||||
</label>
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_halve_first_chunk" class="checkbox_label alignItemsCenter flexGap5">
|
||||
<input id="tts_webui_halve_first_chunk" type="checkbox" />
|
||||
<span>Halve First Chunk</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_desired_length">Desired Length: <span id="tts_webui_desired_length_output">${this.defaultSettings.desired_length}</span></label>
|
||||
<input id="tts_webui_desired_length" type="range" value="${this.defaultSettings.desired_length}" min="25" max="300" step="5" />
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_max_length">Max Length: <span id="tts_webui_max_length_output">${this.defaultSettings.max_length}</span></label>
|
||||
<input id="tts_webui_max_length" type="range" value="${this.defaultSettings.max_length}" min="50" max="450" step="5" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<h4 class="textAlignCenter">Model</h4>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_device">Device:</label>
|
||||
<select id="tts_webui_device">
|
||||
<option value="auto" ${this.defaultSettings.device === 'auto' ? 'selected' : ''}>Auto</option>
|
||||
<option value="cuda" ${this.defaultSettings.device === 'cuda' ? 'selected' : ''}>CUDA</option>
|
||||
<option value="mps" ${this.defaultSettings.device === 'mps' ? 'selected' : ''}>MPS</option>
|
||||
<option value="cpu" ${this.defaultSettings.device === 'cpu' ? 'selected' : ''}>CPU</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_dtype">Data Type:</label>
|
||||
<select id="tts_webui_dtype">
|
||||
<option value="float32" ${this.defaultSettings.dtype === 'float32' ? 'selected' : ''}>Float32</option>
|
||||
<option value="float16" ${this.defaultSettings.dtype === 'float16' ? 'selected' : ''}>Float16</option>
|
||||
<option value="bfloat16" ${this.defaultSettings.dtype === 'bfloat16' ? 'selected' : ''}>BFloat16</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_cpu_offload" class="checkbox_label alignItemsCenter flexGap5">
|
||||
<input id="tts_webui_cpu_offload" type="checkbox" />
|
||||
<span>CPU Offload</span>
|
||||
</label>
|
||||
</div>
|
||||
<div class="flex1">
|
||||
<!-- Empty for spacing -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<h4 class="textAlignCenter">Streaming (Advanced Settings)</h4>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_tokens_per_slice">Tokens Per Slice: <span id="tts_webui_tokens_per_slice_output">${this.defaultSettings.tokens_per_slice}</span></label>
|
||||
<input id="tts_webui_tokens_per_slice" type="range" value="${this.defaultSettings.tokens_per_slice}" min="15" max="1000" step="1" />
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_chunk_overlap_method">Chunk Overlap Method:</label>
|
||||
<select id="tts_webui_chunk_overlap_method">
|
||||
<option value="zero" ${this.defaultSettings.chunk_overlap_method === 'zero' ? 'selected' : ''}>Zero</option>
|
||||
<option value="full" ${this.defaultSettings.chunk_overlap_method === 'full' ? 'selected' : ''}>Full</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap10px marginBot10">
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_remove_milliseconds">Remove Milliseconds: <span id="tts_webui_remove_milliseconds_output">${this.defaultSettings.remove_milliseconds}</span></label>
|
||||
<input id="tts_webui_remove_milliseconds" type="range" value="${this.defaultSettings.remove_milliseconds}" min="0" max="100" step="1" />
|
||||
</div>
|
||||
<div class="flex1 flexFlowColumn">
|
||||
<label for="tts_webui_remove_milliseconds_start">Remove Milliseconds Start: <span id="tts_webui_remove_milliseconds_start_output">${this.defaultSettings.remove_milliseconds_start}</span></label>
|
||||
<input id="tts_webui_remove_milliseconds_start" type="range" value="${this.defaultSettings.remove_milliseconds_start}" min="0" max="100" step="1" />
|
||||
</div>
|
||||
</div>`;
|
||||
return html;
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#tts_webui_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#tts_webui_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_model').val(this.settings.model);
|
||||
$('#tts_webui_model').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_voices').val(this.settings.available_voices.join());
|
||||
$('#tts_webui_voices').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_streaming').prop('checked', this.settings.streaming);
|
||||
$('#tts_webui_streaming').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_volume').val(this.settings.volume);
|
||||
$('#tts_webui_volume').on('input', () => {
|
||||
this.onSettingsChange();
|
||||
});
|
||||
|
||||
$('#tts_webui_stream_chunk_size').val(this.settings.stream_chunk_size);
|
||||
$('#tts_webui_stream_chunk_size').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_desired_length').val(this.settings.desired_length);
|
||||
$('#tts_webui_desired_length').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_max_length').val(this.settings.max_length);
|
||||
$('#tts_webui_max_length').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_halve_first_chunk').prop('checked', this.settings.halve_first_chunk);
|
||||
$('#tts_webui_halve_first_chunk').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_exaggeration').val(this.settings.exaggeration);
|
||||
$('#tts_webui_exaggeration').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_cfg_weight').val(this.settings.cfg_weight);
|
||||
$('#tts_webui_cfg_weight').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_temperature').val(this.settings.temperature);
|
||||
$('#tts_webui_temperature').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_device').val(this.settings.device);
|
||||
$('#tts_webui_device').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_dtype').val(this.settings.dtype);
|
||||
$('#tts_webui_dtype').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_cpu_offload').prop('checked', this.settings.cpu_offload);
|
||||
$('#tts_webui_cpu_offload').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_chunked').prop('checked', this.settings.chunked);
|
||||
$('#tts_webui_chunked').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_tokens_per_slice').val(this.settings.tokens_per_slice);
|
||||
$('#tts_webui_tokens_per_slice').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_remove_milliseconds').val(this.settings.remove_milliseconds);
|
||||
$('#tts_webui_remove_milliseconds').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_remove_milliseconds_start').val(this.settings.remove_milliseconds_start);
|
||||
$('#tts_webui_remove_milliseconds_start').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_chunk_overlap_method').val(this.settings.chunk_overlap_method);
|
||||
$('#tts_webui_chunk_overlap_method').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
$('#tts_webui_seed').val(this.settings.seed);
|
||||
$('#tts_webui_seed').on('input', () => { this.onSettingsChange(); });
|
||||
|
||||
// Update output labels
|
||||
$('#tts_webui_volume_output').text(this.settings.volume);
|
||||
$('#tts_webui_desired_length_output').text(this.settings.desired_length);
|
||||
$('#tts_webui_max_length_output').text(this.settings.max_length);
|
||||
$('#tts_webui_exaggeration_output').text(this.settings.exaggeration);
|
||||
$('#tts_webui_cfg_weight_output').text(this.settings.cfg_weight);
|
||||
$('#tts_webui_temperature_output').text(this.settings.temperature);
|
||||
$('#tts_webui_tokens_per_slice_output').text(this.settings.tokens_per_slice);
|
||||
$('#tts_webui_remove_milliseconds_output').text(this.settings.remove_milliseconds);
|
||||
$('#tts_webui_remove_milliseconds_start_output').text(this.settings.remove_milliseconds_start);
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.debug('OpenAI Compatible TTS: Settings loaded');
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update dynamically
|
||||
this.settings.provider_endpoint = String($('#tts_webui_endpoint').val());
|
||||
this.settings.model = String($('#tts_webui_model').val());
|
||||
this.settings.available_voices = String($('#tts_webui_voices').val()).split(',');
|
||||
this.settings.volume = Number($('#tts_webui_volume').val());
|
||||
this.settings.streaming = $('#tts_webui_streaming').is(':checked');
|
||||
this.settings.stream_chunk_size = Number($('#tts_webui_stream_chunk_size').val());
|
||||
this.settings.desired_length = Number($('#tts_webui_desired_length').val());
|
||||
this.settings.max_length = Number($('#tts_webui_max_length').val());
|
||||
this.settings.halve_first_chunk = $('#tts_webui_halve_first_chunk').is(':checked');
|
||||
this.settings.exaggeration = Number($('#tts_webui_exaggeration').val());
|
||||
this.settings.cfg_weight = Number($('#tts_webui_cfg_weight').val());
|
||||
this.settings.temperature = Number($('#tts_webui_temperature').val());
|
||||
this.settings.device = String($('#tts_webui_device').val());
|
||||
this.settings.dtype = String($('#tts_webui_dtype').val());
|
||||
this.settings.cpu_offload = $('#tts_webui_cpu_offload').is(':checked');
|
||||
this.settings.chunked = $('#tts_webui_chunked').is(':checked');
|
||||
this.settings.tokens_per_slice = Number($('#tts_webui_tokens_per_slice').val());
|
||||
this.settings.remove_milliseconds = Number($('#tts_webui_remove_milliseconds').val());
|
||||
this.settings.remove_milliseconds_start = Number($('#tts_webui_remove_milliseconds_start').val());
|
||||
this.settings.chunk_overlap_method = String($('#tts_webui_chunk_overlap_method').val());
|
||||
this.settings.seed = parseInt($('#tts_webui_seed').val()) || -1;
|
||||
|
||||
// Apply volume change immediately
|
||||
this.setVolume(this.settings.volume);
|
||||
|
||||
// Update output labels
|
||||
$('#tts_webui_volume_output').text(this.settings.volume);
|
||||
$('#tts_webui_desired_length_output').text(this.settings.desired_length);
|
||||
$('#tts_webui_max_length_output').text(this.settings.max_length);
|
||||
$('#tts_webui_exaggeration_output').text(this.settings.exaggeration);
|
||||
$('#tts_webui_cfg_weight_output').text(this.settings.cfg_weight);
|
||||
$('#tts_webui_temperature_output').text(this.settings.temperature);
|
||||
$('#tts_webui_tokens_per_slice_output').text(this.settings.tokens_per_slice);
|
||||
$('#tts_webui_remove_milliseconds_output').text(this.settings.remove_milliseconds);
|
||||
$('#tts_webui_remove_milliseconds_start_output').text(this.settings.remove_milliseconds_start);
|
||||
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
console.info('TTS voices refreshed');
|
||||
}
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
oaicVoice => oaicVoice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
|
||||
if (this.settings.streaming) {
|
||||
// Stream audio in real-time
|
||||
await this.processStreamingAudio(response);
|
||||
// Return empty string since audio is already played via AudioWorklet
|
||||
return '';
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
// Try to fetch voices from the provider endpoint
|
||||
try {
|
||||
const voicesEndpoint = this.settings.provider_endpoint.replace('/speech', '/voices/' + this.settings.model);
|
||||
const response = await fetch(voicesEndpoint);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const responseJson = await response.json();
|
||||
console.info('Discovered voices from provider:', responseJson);
|
||||
|
||||
this.voices = responseJson.voices.map(({ value, label }) => ({
|
||||
name: label,
|
||||
voice_id: value,
|
||||
lang: 'en-US',
|
||||
}));
|
||||
|
||||
return this.voices;
|
||||
} catch (error) {
|
||||
console.warn('Voice discovery failed, using configured voices:', error);
|
||||
}
|
||||
|
||||
// Fallback to configured voices
|
||||
this.voices = this.settings.available_voices.map(name => ({
|
||||
name, voice_id: name, lang: 'en-US',
|
||||
}));
|
||||
|
||||
return this.voices;
|
||||
}
|
||||
|
||||
async initAudioWorklet(wavSampleRate) {
|
||||
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: wavSampleRate });
|
||||
|
||||
// Load the PCM processor from separate file
|
||||
const processorUrl = './scripts/extensions/tts/lib/pcm-processor.js';
|
||||
await this.audioContext.audioWorklet.addModule(processorUrl);
|
||||
this.audioWorkletNode = new AudioWorkletNode(this.audioContext, 'pcm-processor');
|
||||
this.audioWorkletNode.connect(this.audioContext.destination);
|
||||
}
|
||||
|
||||
parseWavHeader(buffer) {
|
||||
const view = new DataView(buffer);
|
||||
// Sample rate is at bytes 24-27 (little endian)
|
||||
const sampleRate = view.getUint32(24, true);
|
||||
// Number of channels is at bytes 22-23 (little endian)
|
||||
const channels = view.getUint16(22, true);
|
||||
// Bits per sample is at bytes 34-35 (little endian)
|
||||
const bitsPerSample = view.getUint16(34, true);
|
||||
|
||||
return { sampleRate, channels, bitsPerSample };
|
||||
}
|
||||
|
||||
async processStreamingAudio(response) {
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const reader = response.body.getReader();
|
||||
let headerParsed = false;
|
||||
let wavInfo = null;
|
||||
|
||||
const processStream = async ({ done, value }) => {
|
||||
if (done) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!headerParsed) {
|
||||
// Parse WAV header to get sample rate
|
||||
wavInfo = this.parseWavHeader(value.buffer);
|
||||
console.log('WAV Info:', wavInfo);
|
||||
|
||||
// Initialize AudioWorklet with correct sample rate
|
||||
await this.initAudioWorklet(wavInfo.sampleRate);
|
||||
|
||||
// Skip WAV header (first 44 bytes typically)
|
||||
const pcmData = value.slice(44);
|
||||
this.audioWorkletNode.port.postMessage({ pcmData });
|
||||
headerParsed = true;
|
||||
|
||||
const next = await reader.read();
|
||||
return processStream(next);
|
||||
}
|
||||
|
||||
// Send PCM data to AudioWorklet for immediate playback
|
||||
this.audioWorkletNode.port.postMessage({ pcmData: value });
|
||||
const next = await reader.read();
|
||||
return processStream(next);
|
||||
};
|
||||
|
||||
const firstChunk = await reader.read();
|
||||
await processStream(firstChunk);
|
||||
}
|
||||
|
||||
async previewTtsVoice(voiceId) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
|
||||
const text = getPreviewString('en-US');
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
|
||||
if (this.settings.streaming) {
|
||||
// Use shared streaming method
|
||||
await this.processStreamingAudio(response);
|
||||
} else {
|
||||
// For non-streaming, response is a fetch Response object
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
this.audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
const settings = this.settings;
|
||||
const streaming = settings.streaming;
|
||||
|
||||
const chatterboxParams = [
|
||||
'desired_length',
|
||||
'max_length',
|
||||
'halve_first_chunk',
|
||||
'exaggeration',
|
||||
'cfg_weight',
|
||||
'temperature',
|
||||
'device',
|
||||
'dtype',
|
||||
'cpu_offload',
|
||||
'chunked',
|
||||
'cache_voice',
|
||||
'tokens_per_slice',
|
||||
'remove_milliseconds',
|
||||
'remove_milliseconds_start',
|
||||
'chunk_overlap_method',
|
||||
'seed',
|
||||
];
|
||||
const getParams = settings => Object.fromEntries(
|
||||
Object.entries(settings).filter(([key]) =>
|
||||
chatterboxParams.includes(key),
|
||||
),
|
||||
);
|
||||
|
||||
const requestBody = {
|
||||
model: settings.model,
|
||||
voice: voiceId,
|
||||
input: inputText,
|
||||
response_format: 'wav',
|
||||
speed: settings.speed,
|
||||
stream: streaming,
|
||||
params: getParams(settings),
|
||||
};
|
||||
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': streaming ? 'no-cache' : undefined,
|
||||
};
|
||||
|
||||
if (streaming) {
|
||||
headers['Cache-Control'] = 'no-cache';
|
||||
}
|
||||
|
||||
const response = await fetch(settings.provider_endpoint, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(
|
||||
`HTTP ${response.status}: ${await response.text()}`,
|
||||
);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
setVolume(volume) {
|
||||
// Clamp volume between 0.0 and 2.0 (0% to 200%)
|
||||
this.currentVolume = Math.max(0, Math.min(2.0, volume));
|
||||
|
||||
// Set volume for regular audio element (non-streaming)
|
||||
this.audioElement.volume = Math.min(this.currentVolume, 1.0); // HTML audio element max is 1.0
|
||||
|
||||
// Set volume for AudioWorklet (streaming)
|
||||
if (this.audioWorkletNode) {
|
||||
this.audioWorkletNode.port.postMessage({ volume: this.currentVolume });
|
||||
}
|
||||
}
|
||||
}
|
||||
404
data/st-core-scripts/scripts/extensions/tts/vits.js
Normal file
404
data/st-core-scripts/scripts/extensions/tts/vits.js
Normal file
@@ -0,0 +1,404 @@
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { VITSTtsProvider };
|
||||
|
||||
class VITSTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = '. ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
return text;
|
||||
}
|
||||
|
||||
audioFormats = ['wav', 'ogg', 'silk', 'mp3', 'flac'];
|
||||
|
||||
languageLabels = {
|
||||
'Auto': 'auto',
|
||||
'Chinese': 'zh',
|
||||
'English': 'en',
|
||||
'Japanese': 'ja',
|
||||
'Korean': 'ko',
|
||||
};
|
||||
|
||||
langKey2LangCode = {
|
||||
'zh': 'zh-CN',
|
||||
'en': 'en-US',
|
||||
'ja': 'ja-JP',
|
||||
'ko': 'ko-KR',
|
||||
};
|
||||
|
||||
modelTypes = {
|
||||
VITS: 'VITS',
|
||||
W2V2_VITS: 'W2V2-VITS',
|
||||
BERT_VITS2: 'BERT-VITS2',
|
||||
};
|
||||
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://localhost:23456',
|
||||
format: 'wav',
|
||||
lang: 'auto',
|
||||
length: 1.0,
|
||||
noise: 0.33,
|
||||
noisew: 0.4,
|
||||
segment_size: 50,
|
||||
streaming: false,
|
||||
dim_emotion: 0,
|
||||
sdp_ratio: 0.2,
|
||||
emotion: 0,
|
||||
text_prompt: '',
|
||||
style_text: '',
|
||||
style_weight: 1,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="vits_lang">Text Language</label>
|
||||
<select id="vits_lang">`;
|
||||
|
||||
for (let language in this.languageLabels) {
|
||||
if (this.languageLabels[language] == this.settings?.lang) {
|
||||
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
|
||||
continue;
|
||||
}
|
||||
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
|
||||
}
|
||||
|
||||
html += `
|
||||
</select>
|
||||
<label>VITS / W2V2-VITS / Bert-VITS2 Settings:</label><br/>
|
||||
<label for="vits_endpoint">Provider Endpoint:</label>
|
||||
<input id="vits_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
<span>Use <a target="_blank" href="https://github.com/Artrajz/vits-simple-api">vits-simple-api</a>.</span><br/>
|
||||
|
||||
<label for="vits_format">Audio format:</label>
|
||||
<select id="vits_format">`;
|
||||
|
||||
for (let format of this.audioFormats) {
|
||||
if (format == this.settings?.format) {
|
||||
html += `<option value="${format}" selected="selected">${format}</option>`;
|
||||
continue;
|
||||
}
|
||||
html += `<option value="${format}">${format}</option>`;
|
||||
}
|
||||
|
||||
html += `
|
||||
</select>
|
||||
<label for="vits_length">Audio length: <span id="vits_length_output">${this.defaultSettings.length}</span></label>
|
||||
<input id="vits_length" type="range" value="${this.defaultSettings.length}" min="0.0" max="5" step="0.01" />
|
||||
|
||||
<label for="vits_noise">Noise: <span id="vits_noise_output">${this.defaultSettings.noise}</span></label>
|
||||
<input id="vits_noise" type="range" value="${this.defaultSettings.noise}" min="0.1" max="2" step="0.01" />
|
||||
|
||||
<label for="vits_noisew">SDP noise: <span id="vits_noisew_output">${this.defaultSettings.noisew}</span></label>
|
||||
<input id="vits_noisew" type="range" value="${this.defaultSettings.noisew}" min="0.1" max="2" step="0.01" />
|
||||
|
||||
<label for="vits_segment_size">Segment Size: <span id="vits_segment_size_output">${this.defaultSettings.segment_size}</span></label>
|
||||
<input id="vits_segment_size" type="range" value="${this.defaultSettings.segment_size}" min="0" max="1000" step="1" />
|
||||
|
||||
<label for="vits_streaming" class="checkbox_label">
|
||||
<input id="vits_streaming" type="checkbox" />
|
||||
<span>Streaming</span>
|
||||
</label>
|
||||
|
||||
<label>W2V2-VITS Settings:</label><br/>
|
||||
<label for="vits_dim_emotion">Dimensional emotion:</label>
|
||||
<input id="vits_dim_emotion" type="number" class="text_pole" min="0" max="5457" step="1" value="${this.defaultSettings.dim_emotion}"/>
|
||||
|
||||
<label>BERT-VITS2 Settings:</label><br/>
|
||||
<label for="vits_sdp_ratio">sdp_ratio: <span id="vits_sdp_ratio_output">${this.defaultSettings.sdp_ratio}</span></label>
|
||||
<input id="vits_sdp_ratio" type="range" value="${this.defaultSettings.sdp_ratio}" min="0.0" max="1" step="0.01" />
|
||||
|
||||
<label for="vits_emotion">emotion: <span id="vits_emotion_output">${this.defaultSettings.emotion}</span></label>
|
||||
<input id="vits_emotion" type="range" value="${this.defaultSettings.emotion}" min="0" max="9" step="1" />
|
||||
|
||||
<label for="vits_text_prompt">Text Prompt:</label>
|
||||
<input id="vits_text_prompt" type="text" class="text_pole" maxlength="512" value="${this.defaultSettings.text_prompt}"/>
|
||||
|
||||
<label for="vits_style_text">Style text:</label>
|
||||
<input id="vits_style_text" type="text" class="text_pole" maxlength="512" value="${this.defaultSettings.style_text}"/>
|
||||
|
||||
<label for="vits_style_weight">Style weight <span id="vits_style_weight_output">${this.defaultSettings.style_weight}</span></label>
|
||||
<input id="vits_style_weight" type="range" value="${this.defaultSettings.style_weight}" min="0" max="1" step="0.01" />
|
||||
`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#vits_endpoint').val();
|
||||
this.settings.lang = $('#vits_lang').val();
|
||||
this.settings.format = $('#vits_format').val();
|
||||
this.settings.dim_emotion = $('#vits_dim_emotion').val();
|
||||
this.settings.text_prompt = $('#vits_text_prompt').val();
|
||||
this.settings.style_text = $('#vits_style_text').val();
|
||||
|
||||
// Update the default TTS settings based on input fields
|
||||
this.settings.length = $('#vits_length').val();
|
||||
this.settings.noise = $('#vits_noise').val();
|
||||
this.settings.noisew = $('#vits_noisew').val();
|
||||
this.settings.segment_size = $('#vits_segment_size').val();
|
||||
this.settings.streaming = $('#vits_streaming').is(':checked');
|
||||
this.settings.sdp_ratio = $('#vits_sdp_ratio').val();
|
||||
this.settings.emotion = $('#vits_emotion').val();
|
||||
this.settings.style_weight = $('#vits_style_weight').val();
|
||||
|
||||
// Update the UI to reflect changes
|
||||
$('#vits_length_output').text(this.settings.length);
|
||||
$('#vits_noise_output').text(this.settings.noise);
|
||||
$('#vits_noisew_output').text(this.settings.noisew);
|
||||
$('#vits_segment_size_output').text(this.settings.segment_size);
|
||||
$('#vits_sdp_ratio_output').text(this.settings.sdp_ratio);
|
||||
$('#vits_emotion_output').text(this.settings.emotion);
|
||||
$('#vits_style_weight_output').text(this.settings.style_weight);
|
||||
|
||||
saveTtsProviderSettings();
|
||||
this.changeTTSSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
console.debug(`Ignoring non-user-configurable setting: ${key}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Set initial values from the settings
|
||||
$('#vits_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#vits_lang').val(this.settings.lang);
|
||||
$('#vits_format').val(this.settings.format);
|
||||
$('#vits_length').val(this.settings.length);
|
||||
$('#vits_noise').val(this.settings.noise);
|
||||
$('#vits_noisew').val(this.settings.noisew);
|
||||
$('#vits_segment_size').val(this.settings.segment_size);
|
||||
$('#vits_streaming').prop('checked', this.settings.streaming);
|
||||
$('#vits_dim_emotion').val(this.settings.dim_emotion);
|
||||
$('#vits_sdp_ratio').val(this.settings.sdp_ratio);
|
||||
$('#vits_emotion').val(this.settings.emotion);
|
||||
$('#vits_text_prompt').val(this.settings.text_prompt);
|
||||
$('#vits_style_text').val(this.settings.style_text);
|
||||
$('#vits_style_weight').val(this.settings.style_weight);
|
||||
|
||||
// Update the UI to reflect changes
|
||||
$('#vits_length_output').text(this.settings.length);
|
||||
$('#vits_noise_output').text(this.settings.noise);
|
||||
$('#vits_noisew_output').text(this.settings.noisew);
|
||||
$('#vits_segment_size_output').text(this.settings.segment_size);
|
||||
$('#vits_sdp_ratio_output').text(this.settings.sdp_ratio);
|
||||
$('#vits_emotion_output').text(this.settings.emotion);
|
||||
$('#vits_style_weight_output').text(this.settings.style_weight);
|
||||
|
||||
// Register input/change event listeners to update settings on user interaction
|
||||
$('#vits_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
$('#vits_lang').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_format').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_length').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_noise').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_noisew').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_segment_size').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_streaming').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_dim_emotion').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_sdp_ratio').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_emotion').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_text_prompt').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_style_text').on('change', () => { this.onSettingsChange(); });
|
||||
$('#vits_style_weight').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.info('VITS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
v => v.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async getVoiceById(voiceId) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
v => v.voice_id == voiceId,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice id ${voiceId} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await fetch(`${this.settings.provider_endpoint}/voice/speakers`);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
|
||||
}
|
||||
const jsonData = await response.json();
|
||||
const voices = [];
|
||||
|
||||
const addVoices = (modelType) => {
|
||||
jsonData[modelType].forEach(voice => {
|
||||
voices.push({
|
||||
name: `[${modelType}] ${voice.name} (${voice.lang})`,
|
||||
voice_id: `${modelType}&${voice.id}`,
|
||||
preview_url: false,
|
||||
lang: voice.lang,
|
||||
});
|
||||
});
|
||||
};
|
||||
for (const key in this.modelTypes) {
|
||||
addVoices(this.modelTypes[key]);
|
||||
}
|
||||
|
||||
this.voices = voices; // Assign to the class property
|
||||
return voices; // Also return this list
|
||||
}
|
||||
|
||||
// Each time a parameter is changed, we change the configuration
|
||||
async changeTTSSettings() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch TTS generation from the API.
|
||||
* @param {string} inputText Text to generate TTS for
|
||||
* @param {string} voiceId Voice ID to use (model_type&speaker_id))
|
||||
* @returns {Promise<Response|string>} Fetch response
|
||||
*/
|
||||
async fetchTtsGeneration(inputText, voiceId, lang = null, forceNoStreaming = false) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
const streaming = !forceNoStreaming && this.settings.streaming;
|
||||
const [model_type, speaker_id] = voiceId.split('&');
|
||||
const params = new URLSearchParams();
|
||||
params.append('text', inputText);
|
||||
params.append('id', speaker_id);
|
||||
if (streaming) {
|
||||
params.append('streaming', streaming);
|
||||
// Streaming response only supports MP3
|
||||
}
|
||||
else {
|
||||
params.append('format', this.settings.format);
|
||||
}
|
||||
params.append('lang', lang ?? this.settings.lang);
|
||||
params.append('length', this.settings.length);
|
||||
params.append('noise', this.settings.noise);
|
||||
params.append('noisew', this.settings.noisew);
|
||||
params.append('segment_size', this.settings.segment_size);
|
||||
|
||||
if (model_type == this.modelTypes.W2V2_VITS) {
|
||||
params.append('emotion', this.settings.dim_emotion);
|
||||
}
|
||||
else if (model_type == this.modelTypes.BERT_VITS2) {
|
||||
params.append('sdp_ratio', this.settings.sdp_ratio);
|
||||
params.append('emotion', this.settings.emotion);
|
||||
if (this.settings.text_prompt) {
|
||||
params.append('text_prompt', this.settings.text_prompt);
|
||||
}
|
||||
if (this.settings.style_text) {
|
||||
params.append('style_text', this.settings.style_text);
|
||||
params.append('style_weight', this.settings.style_weight);
|
||||
}
|
||||
}
|
||||
|
||||
const url = `${this.settings.provider_endpoint}/voice/${model_type.toLowerCase()}`;
|
||||
|
||||
if (streaming) {
|
||||
return url + `?${params.toString()}`;
|
||||
}
|
||||
|
||||
const response = await fetch(
|
||||
url,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
},
|
||||
body: params,
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} id Voice ID
|
||||
*/
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const voice = await this.getVoiceById(id);
|
||||
const lang = voice.lang.includes(this.settings.lang) ? this.settings.lang : voice.lang[0];
|
||||
|
||||
let lang_code = this.langKey2LangCode[lang];
|
||||
const text = getPreviewString(lang_code);
|
||||
const response = await this.fetchTtsGeneration(text, id, lang, true);
|
||||
if (typeof response != 'string') {
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
}
|
||||
}
|
||||
|
||||
// Interface not used
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
}
|
||||
327
data/st-core-scripts/scripts/extensions/tts/xtts.js
Normal file
327
data/st-core-scripts/scripts/extensions/tts/xtts.js
Normal file
@@ -0,0 +1,327 @@
|
||||
import { doExtrasFetch, getApiUrl, modules } from '../../extensions.js';
|
||||
import { saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export { XTTSTtsProvider };
|
||||
|
||||
class XTTSTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
ready = false;
|
||||
voices = [];
|
||||
separator = '. ';
|
||||
|
||||
/**
|
||||
* Perform any text processing before passing to TTS engine.
|
||||
* @param {string} text Input text
|
||||
* @returns {string} Processed text
|
||||
*/
|
||||
processText(text) {
|
||||
// Replace fancy ellipsis with "..."
|
||||
text = text.replace(/…/g, '...');
|
||||
// Remove quotes
|
||||
text = text.replace(/["“”‘’]/g, '');
|
||||
// Replace multiple "." with single "."
|
||||
text = text.replace(/\.+/g, '.');
|
||||
return text;
|
||||
}
|
||||
|
||||
languageLabels = {
|
||||
'Arabic': 'ar',
|
||||
'Brazilian Portuguese': 'pt',
|
||||
'Chinese': 'zh-cn',
|
||||
'Czech': 'cs',
|
||||
'Dutch': 'nl',
|
||||
'English': 'en',
|
||||
'French': 'fr',
|
||||
'German': 'de',
|
||||
'Italian': 'it',
|
||||
'Polish': 'pl',
|
||||
'Russian': 'ru',
|
||||
'Spanish': 'es',
|
||||
'Turkish': 'tr',
|
||||
'Japanese': 'ja',
|
||||
'Korean': 'ko',
|
||||
'Hungarian': 'hu',
|
||||
'Hindi': 'hi',
|
||||
};
|
||||
|
||||
defaultSettings = {
|
||||
provider_endpoint: 'http://localhost:8020',
|
||||
language: 'en',
|
||||
temperature: 0.75,
|
||||
length_penalty: 1.0,
|
||||
repetition_penalty: 5.0,
|
||||
top_k: 50,
|
||||
top_p: 0.85,
|
||||
speed: 1,
|
||||
enable_text_splitting: true,
|
||||
stream_chunk_size: 100,
|
||||
voiceMap: {},
|
||||
streaming: false,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<label for="xtts_api_language">Language</label>
|
||||
<select id="xtts_api_language">`;
|
||||
|
||||
for (let language in this.languageLabels) {
|
||||
if (this.languageLabels[language] == this.settings?.language) {
|
||||
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
|
||||
continue;
|
||||
}
|
||||
|
||||
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
|
||||
}
|
||||
|
||||
html += `
|
||||
</select>
|
||||
<label">XTTS Settings:</label><br/>
|
||||
<label for="xtts_tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="xtts_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
<span>Use <a target="_blank" href="https://github.com/daswer123/xtts-api-server">XTTSv2 TTS Server</a>.</span>
|
||||
<label for="xtts_tts_streaming" class="checkbox_label">
|
||||
<input id="xtts_tts_streaming" type="checkbox" />
|
||||
<span>Streaming <small>(RVC not supported)</small></span>
|
||||
</label>
|
||||
<label for="xtts_speed">Speed: <span id="xtts_tts_speed_output">${this.defaultSettings.speed}</span></label>
|
||||
<input id="xtts_speed" type="range" value="${this.defaultSettings.speed}" min="0.5" max="2" step="0.01" />
|
||||
|
||||
<label for="xtts_temperature">Temperature: <span id="xtts_tts_temperature_output">${this.defaultSettings.temperature}</span></label>
|
||||
<input id="xtts_temperature" type="range" value="${this.defaultSettings.temperature}" min="0.01" max="1" step="0.01" />
|
||||
|
||||
<label for="xtts_length_penalty">Length Penalty: <span id="xtts_length_penalty_output">${this.defaultSettings.length_penalty}</span></label>
|
||||
<input id="xtts_length_penalty" type="range" value="${this.defaultSettings.length_penalty}" min="0.5" max="2" step="0.1" />
|
||||
|
||||
<label for="xtts_repetition_penalty">Repetition Penalty: <span id="xtts_repetition_penalty_output">${this.defaultSettings.repetition_penalty}</span></label>
|
||||
<input id="xtts_repetition_penalty" type="range" value="${this.defaultSettings.repetition_penalty}" min="1" max="10" step="0.1" />
|
||||
|
||||
<label for="xtts_top_k">Top K: <span id="xtts_top_k_output">${this.defaultSettings.top_k}</span></label>
|
||||
<input id="xtts_top_k" type="range" value="${this.defaultSettings.top_k}" min="0" max="100" step="1" />
|
||||
|
||||
<label for="xtts_top_p">Top P: <span id="xtts_top_p_output">${this.defaultSettings.top_p}</span></label>
|
||||
<input id="xtts_top_p" type="range" value="${this.defaultSettings.top_p}" min="0" max="1" step="0.01" />
|
||||
|
||||
<label for="xtts_stream_chunk_size">Stream Chunk Size: <span id="xtts_stream_chunk_size_output">${this.defaultSettings.stream_chunk_size}</span></label>
|
||||
<input id="xtts_stream_chunk_size" type="range" value="${this.defaultSettings.stream_chunk_size}" min="100" max="400" step="1" />
|
||||
|
||||
<label for="xtts_enable_text_splitting" class="checkbox_label">
|
||||
<input id="xtts_enable_text_splitting" type="checkbox" ${this.defaultSettings.enable_text_splitting ? 'checked' : ''} />
|
||||
Enable Text Splitting
|
||||
</label>
|
||||
`;
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#xtts_tts_endpoint').val();
|
||||
this.settings.language = $('#xtts_api_language').val();
|
||||
|
||||
// Update the default TTS settings based on input fields
|
||||
this.settings.speed = $('#xtts_speed').val();
|
||||
this.settings.temperature = $('#xtts_temperature').val();
|
||||
this.settings.length_penalty = $('#xtts_length_penalty').val();
|
||||
this.settings.repetition_penalty = $('#xtts_repetition_penalty').val();
|
||||
this.settings.top_k = $('#xtts_top_k').val();
|
||||
this.settings.top_p = $('#xtts_top_p').val();
|
||||
this.settings.stream_chunk_size = $('#xtts_stream_chunk_size').val();
|
||||
this.settings.enable_text_splitting = $('#xtts_enable_text_splitting').is(':checked');
|
||||
this.settings.streaming = $('#xtts_tts_streaming').is(':checked');
|
||||
|
||||
// Update the UI to reflect changes
|
||||
$('#xtts_tts_speed_output').text(this.settings.speed);
|
||||
$('#xtts_tts_temperature_output').text(this.settings.temperature);
|
||||
$('#xtts_length_penalty_output').text(this.settings.length_penalty);
|
||||
$('#xtts_repetition_penalty_output').text(this.settings.repetition_penalty);
|
||||
$('#xtts_top_k_output').text(this.settings.top_k);
|
||||
$('#xtts_top_p_output').text(this.settings.top_p);
|
||||
$('#xtts_stream_chunk_size_output').text(this.settings.stream_chunk_size);
|
||||
|
||||
saveTtsProviderSettings();
|
||||
this.changeTTSSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
const apiCheckInterval = setInterval(() => {
|
||||
// Use Extras API if TTS support is enabled
|
||||
if (modules.includes('tts') || modules.includes('xtts-tts')) {
|
||||
const baseUrl = new URL(getApiUrl());
|
||||
baseUrl.pathname = '/api/tts';
|
||||
this.settings.provider_endpoint = baseUrl.toString();
|
||||
$('#xtts_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
clearInterval(apiCheckInterval);
|
||||
}
|
||||
}, 2000);
|
||||
|
||||
// Set initial values from the settings
|
||||
$('#xtts_tts_endpoint').val(this.settings.provider_endpoint);
|
||||
$('#xtts_api_language').val(this.settings.language);
|
||||
$('#xtts_speed').val(this.settings.speed);
|
||||
$('#xtts_temperature').val(this.settings.temperature);
|
||||
$('#xtts_length_penalty').val(this.settings.length_penalty);
|
||||
$('#xtts_repetition_penalty').val(this.settings.repetition_penalty);
|
||||
$('#xtts_top_k').val(this.settings.top_k);
|
||||
$('#xtts_top_p').val(this.settings.top_p);
|
||||
$('#xtts_enable_text_splitting').prop('checked', this.settings.enable_text_splitting);
|
||||
$('#xtts_stream_chunk_size').val(this.settings.stream_chunk_size);
|
||||
$('#xtts_tts_streaming').prop('checked', this.settings.streaming);
|
||||
|
||||
// Update the UI to reflect changes
|
||||
$('#xtts_tts_speed_output').text(this.settings.speed);
|
||||
$('#xtts_tts_temperature_output').text(this.settings.temperature);
|
||||
$('#xtts_length_penalty_output').text(this.settings.length_penalty);
|
||||
$('#xtts_repetition_penalty_output').text(this.settings.repetition_penalty);
|
||||
$('#xtts_top_k_output').text(this.settings.top_k);
|
||||
$('#xtts_top_p_output').text(this.settings.top_p);
|
||||
$('#xtts_stream_chunk_size_output').text(this.settings.stream_chunk_size);
|
||||
|
||||
// Register input/change event listeners to update settings on user interaction
|
||||
$('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_api_language').on('change', () => { this.onSettingsChange(); });
|
||||
$('#xtts_speed').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_temperature').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_length_penalty').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_repetition_penalty').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_top_k').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_top_p').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_enable_text_splitting').on('change', () => { this.onSettingsChange(); });
|
||||
$('#xtts_stream_chunk_size').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_tts_streaming').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
console.debug('XTTS: Settings loaded');
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return;
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
XTTSVoice => XTTSVoice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
const response = await doExtrasFetch(`${this.settings.provider_endpoint}/speakers`);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
|
||||
}
|
||||
const responseJson = await response.json();
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
// Each time a parameter is changed, we change the configuration
|
||||
async changeTTSSettings() {
|
||||
if (!this.settings.provider_endpoint) {
|
||||
return;
|
||||
}
|
||||
|
||||
const response = await doExtrasFetch(
|
||||
`${this.settings.provider_endpoint}/set_tts_settings`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'temperature': this.settings.temperature,
|
||||
'speed': this.settings.speed,
|
||||
'length_penalty': this.settings.length_penalty,
|
||||
'repetition_penalty': this.settings.repetition_penalty,
|
||||
'top_p': this.settings.top_p,
|
||||
'top_k': this.settings.top_k,
|
||||
'enable_text_splitting': this.settings.enable_text_splitting,
|
||||
'stream_chunk_size': this.settings.stream_chunk_size,
|
||||
}),
|
||||
},
|
||||
);
|
||||
return response;
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
if (this.settings.streaming) {
|
||||
const params = new URLSearchParams();
|
||||
params.append('text', inputText);
|
||||
params.append('speaker_wav', voiceId);
|
||||
params.append('language', this.settings.language);
|
||||
return `${this.settings.provider_endpoint}/tts_stream/?${params.toString()}`;
|
||||
}
|
||||
|
||||
const response = await doExtrasFetch(
|
||||
`${this.settings.provider_endpoint}/tts_to_audio/`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'no-cache', // Added this line to disable caching of file so new files are always played - Rolyat 7/7/23
|
||||
},
|
||||
body: JSON.stringify({
|
||||
'text': inputText,
|
||||
'speaker_wav': voiceId,
|
||||
'language': this.settings.language,
|
||||
}),
|
||||
},
|
||||
);
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
// Interface not used by XTTS TTS
|
||||
async fetchTtsFromHistory(history_item_id) {
|
||||
return Promise.resolve(history_item_id);
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user