2024-06-08 03:18:48 +00:00
< script lang = "ts" >
import { toast } from 'svelte-sonner';
2024-08-02 17:24:47 +00:00
import { createEventDispatcher , onMount , getContext } from 'svelte';
const dispatch = createEventDispatcher();
2024-06-08 03:18:48 +00:00
import { getBackendConfig } from '$lib/apis';
2024-08-02 17:24:47 +00:00
import {
getAudioConfig,
updateAudioConfig,
getModels as _getModels,
getVoices as _getVoices
} from '$lib/apis/audio';
2025-02-12 09:17:30 +00:00
import { config , settings } from '$lib/stores';
2024-08-02 17:24:47 +00:00
2025-06-25 22:44:45 +00:00
import Spinner from '$lib/components/common/Spinner.svelte';
2024-06-25 12:15:29 +00:00
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
2024-06-08 03:18:48 +00:00
2024-08-25 00:35:42 +00:00
import { TTS_RESPONSE_SPLIT } from '$lib/types';
2024-06-08 03:18:48 +00:00
2024-08-25 00:35:42 +00:00
import type { Writable } from 'svelte/store';
import type { i18n as i18nType } from 'i18next';
2025-10-07 21:20:27 +00:00
import Textarea from '$lib/components/common/Textarea.svelte';
2024-06-08 03:18:48 +00:00
2024-08-25 00:35:42 +00:00
const i18n = getContext< Writable < i18nType > >('i18n');
export let saveHandler: () => void;
2024-06-08 03:18:48 +00:00
2024-08-25 00:35:42 +00:00
// Audio
2024-06-08 03:18:48 +00:00
let TTS_OPENAI_API_BASE_URL = '';
let TTS_OPENAI_API_KEY = '';
2024-07-19 08:35:05 +00:00
let TTS_API_KEY = '';
2024-06-08 03:18:48 +00:00
let TTS_ENGINE = '';
let TTS_MODEL = '';
let TTS_VOICE = '';
2025-10-07 21:20:27 +00:00
let TTS_OPENAI_PARAMS = '';
2024-08-25 00:35:42 +00:00
let TTS_SPLIT_ON: TTS_RESPONSE_SPLIT = TTS_RESPONSE_SPLIT.PUNCTUATION;
2024-09-18 13:13:42 +00:00
let TTS_AZURE_SPEECH_REGION = '';
2025-05-06 02:08:48 +00:00
let TTS_AZURE_SPEECH_BASE_URL = '';
2024-09-18 13:13:42 +00:00
let TTS_AZURE_SPEECH_OUTPUT_FORMAT = '';
2024-06-08 03:18:48 +00:00
let STT_OPENAI_API_BASE_URL = '';
let STT_OPENAI_API_KEY = '';
let STT_ENGINE = '';
let STT_MODEL = '';
2025-06-16 12:13:40 +00:00
let STT_SUPPORTED_CONTENT_TYPES = '';
2024-10-21 04:34:36 +00:00
let STT_WHISPER_MODEL = '';
2025-04-10 20:38:59 +00:00
let STT_AZURE_API_KEY = '';
let STT_AZURE_REGION = '';
let STT_AZURE_LOCALES = '';
2025-04-30 12:51:01 +00:00
let STT_AZURE_BASE_URL = '';
2025-05-03 19:48:12 +00:00
let STT_AZURE_MAX_SPEAKERS = '';
2025-02-02 13:58:59 +00:00
let STT_DEEPGRAM_API_KEY = '';
2025-11-04 19:57:36 +00:00
let STT_MISTRAL_API_KEY = '';
let STT_MISTRAL_API_BASE_URL = '';
let STT_MISTRAL_USE_CHAT_COMPLETIONS = false;
2024-10-21 04:34:36 +00:00
let STT_WHISPER_MODEL_LOADING = false;
2024-06-08 03:18:48 +00:00
2024-08-25 00:35:42 +00:00
// eslint-disable-next-line no-undef
let voices: SpeechSynthesisVoice[] = [];
let models: Awaited< ReturnType < typeof _getModels > >['models'] = [];
2024-06-08 03:18:48 +00:00
2024-08-02 17:24:47 +00:00
const getModels = async () => {
if (TTS_ENGINE === '') {
models = [];
} else {
2025-02-12 09:22:53 +00:00
const res = await _getModels(
localStorage.token,
2025-02-12 09:32:49 +00:00
$config?.features?.enable_direct_connections & & ($settings?.directConnections ?? null)
2025-02-12 09:22:53 +00:00
).catch((e) => {
toast.error(`${ e } `);
});
2024-06-08 03:18:48 +00:00
2024-08-02 17:24:47 +00:00
if (res) {
console.log(res);
models = res.models;
}
}
2024-06-08 03:18:48 +00:00
};
2024-08-02 17:24:47 +00:00
const getVoices = async () => {
if (TTS_ENGINE === '') {
2024-08-25 00:35:42 +00:00
const getVoicesLoop = setInterval(() => {
voices = speechSynthesis.getVoices();
2024-06-08 03:18:48 +00:00
2024-08-02 17:24:47 +00:00
// do your loop
if (voices.length > 0) {
clearInterval(getVoicesLoop);
2024-10-06 23:51:07 +00:00
voices.sort((a, b) => a.name.localeCompare(b.name, $i18n.resolvedLanguage));
2024-08-02 17:24:47 +00:00
}
}, 100);
} else {
const res = await _getVoices(localStorage.token).catch((e) => {
2025-01-30 05:56:28 +00:00
toast.error(`${ e } `);
2024-08-02 17:24:47 +00:00
});
if (res) {
console.log(res);
voices = res.voices;
2024-10-06 23:51:07 +00:00
voices.sort((a, b) => a.name.localeCompare(b.name, $i18n.resolvedLanguage));
2024-06-08 03:18:48 +00:00
}
2024-08-02 17:24:47 +00:00
}
2024-06-08 03:18:48 +00:00
};
const updateConfigHandler = async () => {
2025-10-07 21:20:27 +00:00
let openaiParams = {} ;
try {
openaiParams = TTS_OPENAI_PARAMS ? JSON.parse(TTS_OPENAI_PARAMS) : {} ;
TTS_OPENAI_PARAMS = JSON.stringify(openaiParams, null, 2);
} catch (e) {
toast.error($i18n.t('Invalid JSON format for Parameters'));
return;
}
2024-06-08 03:18:48 +00:00
const res = await updateAudioConfig(localStorage.token, {
tts: {
OPENAI_API_BASE_URL: TTS_OPENAI_API_BASE_URL,
OPENAI_API_KEY: TTS_OPENAI_API_KEY,
2025-10-07 21:20:27 +00:00
OPENAI_PARAMS: openaiParams,
2024-07-19 11:30:36 +00:00
API_KEY: TTS_API_KEY,
2024-06-08 03:18:48 +00:00
ENGINE: TTS_ENGINE,
MODEL: TTS_MODEL,
2024-08-25 00:35:42 +00:00
VOICE: TTS_VOICE,
2024-09-18 13:13:42 +00:00
AZURE_SPEECH_REGION: TTS_AZURE_SPEECH_REGION,
2025-05-06 02:08:48 +00:00
AZURE_SPEECH_BASE_URL: TTS_AZURE_SPEECH_BASE_URL,
2025-10-07 21:20:27 +00:00
AZURE_SPEECH_OUTPUT_FORMAT: TTS_AZURE_SPEECH_OUTPUT_FORMAT,
SPLIT_ON: TTS_SPLIT_ON
2024-06-08 03:18:48 +00:00
},
stt: {
2025-04-12 23:35:11 +00:00
OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL,
OPENAI_API_KEY: STT_OPENAI_API_KEY,
ENGINE: STT_ENGINE,
MODEL: STT_MODEL,
2025-06-16 12:13:40 +00:00
SUPPORTED_CONTENT_TYPES: STT_SUPPORTED_CONTENT_TYPES.split(','),
2025-04-12 23:35:11 +00:00
WHISPER_MODEL: STT_WHISPER_MODEL,
DEEPGRAM_API_KEY: STT_DEEPGRAM_API_KEY,
AZURE_API_KEY: STT_AZURE_API_KEY,
AZURE_REGION: STT_AZURE_REGION,
2025-04-30 12:51:01 +00:00
AZURE_LOCALES: STT_AZURE_LOCALES,
AZURE_BASE_URL: STT_AZURE_BASE_URL,
2025-11-04 19:57:36 +00:00
AZURE_MAX_SPEAKERS: STT_AZURE_MAX_SPEAKERS,
MISTRAL_API_KEY: STT_MISTRAL_API_KEY,
MISTRAL_API_BASE_URL: STT_MISTRAL_API_BASE_URL,
MISTRAL_USE_CHAT_COMPLETIONS: STT_MISTRAL_USE_CHAT_COMPLETIONS
2024-06-08 03:18:48 +00:00
}
});
if (res) {
2024-08-25 00:35:42 +00:00
saveHandler();
2024-10-21 04:34:36 +00:00
config.set(await getBackendConfig());
2024-06-08 03:18:48 +00:00
}
};
2024-10-21 04:34:36 +00:00
const sttModelUpdateHandler = async () => {
STT_WHISPER_MODEL_LOADING = true;
await updateConfigHandler();
STT_WHISPER_MODEL_LOADING = false;
};
2024-06-08 03:18:48 +00:00
onMount(async () => {
const res = await getAudioConfig(localStorage.token);
if (res) {
console.log(res);
TTS_OPENAI_API_BASE_URL = res.tts.OPENAI_API_BASE_URL;
TTS_OPENAI_API_KEY = res.tts.OPENAI_API_KEY;
2025-10-07 21:20:27 +00:00
TTS_OPENAI_PARAMS = JSON.stringify(res?.tts?.OPENAI_PARAMS ?? '', null, 2);
2024-07-19 11:30:36 +00:00
TTS_API_KEY = res.tts.API_KEY;
2024-06-08 03:18:48 +00:00
TTS_ENGINE = res.tts.ENGINE;
TTS_MODEL = res.tts.MODEL;
TTS_VOICE = res.tts.VOICE;
2024-08-25 00:35:42 +00:00
TTS_SPLIT_ON = res.tts.SPLIT_ON || TTS_RESPONSE_SPLIT.PUNCTUATION;
2024-09-18 13:13:42 +00:00
TTS_AZURE_SPEECH_REGION = res.tts.AZURE_SPEECH_REGION;
2025-05-06 02:08:48 +00:00
TTS_AZURE_SPEECH_BASE_URL = res.tts.AZURE_SPEECH_BASE_URL;
TTS_AZURE_SPEECH_OUTPUT_FORMAT = res.tts.AZURE_SPEECH_OUTPUT_FORMAT;
2024-09-18 13:13:42 +00:00
2024-06-08 03:18:48 +00:00
STT_OPENAI_API_BASE_URL = res.stt.OPENAI_API_BASE_URL;
STT_OPENAI_API_KEY = res.stt.OPENAI_API_KEY;
2025-04-12 23:35:11 +00:00
2024-06-08 03:18:48 +00:00
STT_ENGINE = res.stt.ENGINE;
STT_MODEL = res.stt.MODEL;
2025-06-16 12:13:40 +00:00
STT_SUPPORTED_CONTENT_TYPES = (res?.stt?.SUPPORTED_CONTENT_TYPES ?? []).join(',');
2024-10-21 04:34:36 +00:00
STT_WHISPER_MODEL = res.stt.WHISPER_MODEL;
2025-04-10 20:38:59 +00:00
STT_AZURE_API_KEY = res.stt.AZURE_API_KEY;
STT_AZURE_REGION = res.stt.AZURE_REGION;
STT_AZURE_LOCALES = res.stt.AZURE_LOCALES;
2025-04-30 12:51:01 +00:00
STT_AZURE_BASE_URL = res.stt.AZURE_BASE_URL;
STT_AZURE_MAX_SPEAKERS = res.stt.AZURE_MAX_SPEAKERS;
2025-02-02 13:58:59 +00:00
STT_DEEPGRAM_API_KEY = res.stt.DEEPGRAM_API_KEY;
2025-11-04 19:57:36 +00:00
STT_MISTRAL_API_KEY = res.stt.MISTRAL_API_KEY;
STT_MISTRAL_API_BASE_URL = res.stt.MISTRAL_API_BASE_URL;
STT_MISTRAL_USE_CHAT_COMPLETIONS = res.stt.MISTRAL_USE_CHAT_COMPLETIONS;
2024-06-08 03:18:48 +00:00
}
2024-08-02 17:24:47 +00:00
await getVoices();
await getModels();
2024-06-08 03:18:48 +00:00
});
< / script >
< form
class="flex flex-col h-full justify-between space-y-3 text-sm"
on:submit| preventDefault={ async () => {
await updateConfigHandler();
dispatch('save');
}}
2024-07-20 07:07:50 +00:00
>
2024-06-09 09:41:52 +00:00
< div class = " space-y-3 overflow-y-scroll scrollbar-hidden h-full" >
2024-06-09 09:05:36 +00:00
< div class = "flex flex-col gap-3" >
< div >
2025-06-16 12:13:40 +00:00
< div class = " mb-2.5 text-base font-medium" > { $i18n . t ( 'Speech-to-Text' )} </ div >
2024-06-09 09:05:36 +00:00
2025-06-16 12:13:40 +00:00
< hr class = " border-gray-100 dark:border-gray-850 my-2" / >
2025-06-16 12:21:57 +00:00
{ #if STT_ENGINE !== 'web' }
< div class = "mb-2" >
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'Supported MIME Types' )} </ div >
< div class = "flex w-full" >
< div class = "flex-1" >
< input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ STT_SUPPORTED_CONTENT_TYPES }
2025-07-12 20:15:16 +00:00
placeholder={ $i18n . t (
2025-07-18 10:54:08 +00:00
'e.g., audio/wav,audio/mpeg,video/* (leave blank for defaults)'
2025-07-12 20:15:16 +00:00
)}
2025-06-16 12:21:57 +00:00
/>
< / div >
< / div >
< / div >
{ /if }
2025-06-16 12:13:40 +00:00
< div class = "mb-2 py-0.5 flex w-full justify-between" >
2024-06-09 09:05:36 +00:00
< div class = " self-center text-xs font-medium" > { $i18n . t ( 'Speech-to-Text Engine' )} </ div >
< div class = "flex items-center relative" >
< select
2025-02-16 03:27:25 +00:00
class="dark:bg-gray-900 cursor-pointer w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
2024-06-09 09:05:36 +00:00
bind:value={ STT_ENGINE }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select an engine' )}
2024-06-09 09:05:36 +00:00
>
< option value = "" > { $i18n . t ( 'Whisper (Local)' )} </ option >
2025-08-14 00:15:16 +00:00
< option value = "openai" > { $i18n . t ( 'OpenAI' )} </ option >
2024-06-09 09:05:36 +00:00
< option value = "web" > { $i18n . t ( 'Web API' )} </ option >
2025-08-14 00:15:16 +00:00
< option value = "deepgram" > { $i18n . t ( 'Deepgram' )} </ option >
< option value = "azure" > { $i18n . t ( 'Azure AI Speech' )} </ option >
2025-11-04 19:57:36 +00:00
< option value = "mistral" > { $i18n . t ( 'MistralAI' )} </ option >
2025-04-12 23:35:11 +00:00
< / select >
2024-06-08 03:18:48 +00:00
< / div >
< / div >
2024-06-09 09:05:36 +00:00
{ #if STT_ENGINE === 'openai' }
< div >
< div class = "mt-1 flex gap-2 mb-1" >
2024-06-08 03:18:48 +00:00
< input
2025-02-16 03:27:25 +00:00
class="flex-1 w-full bg-transparent outline-hidden"
2024-06-09 09:05:36 +00:00
placeholder={ $i18n . t ( 'API Base URL' )}
bind:value={ STT_OPENAI_API_BASE_URL }
required
2024-06-08 03:18:48 +00:00
/>
2024-06-25 12:15:29 +00:00
< SensitiveInput placeholder = { $i18n . t ( 'API Key' )} bind:value= { STT_OPENAI_API_KEY } />
2024-06-08 03:18:48 +00:00
< / div >
< / div >
2025-02-16 03:27:25 +00:00
< hr class = "border-gray-100 dark:border-gray-850 my-2" / >
2024-06-09 09:05:36 +00:00
< div >
2025-06-16 12:13:40 +00:00
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'STT Model' )} </ div >
2024-06-09 09:05:36 +00:00
< div class = "flex w-full" >
< div class = "flex-1" >
< input
list="model-list"
2025-02-16 03:27:25 +00:00
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
2024-06-09 09:05:36 +00:00
bind:value={ STT_MODEL }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select a model' )}
2024-06-09 09:05:36 +00:00
/>
< datalist id = "model-list" >
< option value = "whisper-1" / >
< / datalist >
< / div >
< / div >
< / div >
2025-02-02 13:58:59 +00:00
{ :else if STT_ENGINE === 'deepgram' }
< div >
< div class = "mt-1 flex gap-2 mb-1" >
< SensitiveInput placeholder = { $i18n . t ( 'API Key' )} bind:value= { STT_DEEPGRAM_API_KEY } />
< / div >
< / div >
2025-02-16 03:27:25 +00:00
< hr class = "border-gray-100 dark:border-gray-850 my-2" / >
2025-02-02 13:58:59 +00:00
< div >
2025-06-16 12:13:40 +00:00
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'STT Model' )} </ div >
2025-02-02 13:58:59 +00:00
< div class = "flex w-full" >
< div class = "flex-1" >
< input
2025-02-16 03:27:25 +00:00
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
2025-02-02 13:58:59 +00:00
bind:value={ STT_MODEL }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select a model (optional)' )}
2025-02-02 13:58:59 +00:00
/>
< / div >
< / div >
< div class = "mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500" >
{ $i18n . t ( 'Leave model field empty to use the default model.' )}
< a
class=" hover:underline dark:text-gray-200 text-gray-800"
href="https://developers.deepgram.com/docs/models"
target="_blank"
>
{ $i18n . t ( 'Click here to see available models.' )}
< / a >
< / div >
< / div >
2025-04-10 20:38:59 +00:00
{ :else if STT_ENGINE === 'azure' }
2025-04-12 23:35:11 +00:00
< div >
< div class = "mt-1 flex gap-2 mb-1" >
< SensitiveInput
placeholder={ $i18n . t ( 'API Key' )}
bind:value={ STT_AZURE_API_KEY }
required
/>
< / div >
< hr class = "border-gray-100 dark:border-gray-850 my-2" / >
2025-05-06 02:08:48 +00:00
< div >
2025-06-16 12:13:40 +00:00
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'Azure Region' )} </ div >
2025-05-06 02:08:48 +00:00
< div class = "flex w-full" >
< div class = "flex-1" >
< input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ STT_AZURE_REGION }
placeholder={ $i18n . t ( 'e.g., westus (leave blank for eastus)' )}
/>
< / div >
< / div >
2025-05-06 02:12:32 +00:00
< / div >
2025-05-06 02:08:48 +00:00
2025-04-12 23:35:11 +00:00
< div >
2025-06-16 12:13:40 +00:00
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'Language Locales' )} </ div >
2025-04-12 23:35:11 +00:00
< div class = "flex w-full" >
< div class = "flex-1" >
< input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ STT_AZURE_LOCALES }
placeholder={ $i18n . t ( 'e.g., en-US,ja-JP (leave blank for auto-detect)' )}
/>
< / div >
< / div >
2025-05-06 02:12:32 +00:00
< / div >
2025-05-03 19:48:12 +00:00
< div >
2025-06-16 12:13:40 +00:00
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'Endpoint URL' )} </ div >
2025-04-30 12:51:01 +00:00
< div class = "flex w-full" >
< div class = "flex-1" >
< input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ STT_AZURE_BASE_URL }
2025-05-06 02:08:48 +00:00
placeholder={ $i18n . t ( '(leave blank for to use commercial endpoint)' )}
2025-04-30 12:51:01 +00:00
/>
< / div >
< / div >
< / div >
2025-05-03 19:48:12 +00:00
< div >
2025-06-16 12:13:40 +00:00
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'Max Speakers' )} </ div >
2025-04-30 12:51:01 +00:00
< div class = "flex w-full" >
< div class = "flex-1" >
< input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ STT_AZURE_MAX_SPEAKERS }
placeholder={ $i18n . t ( 'e.g., 3, 4, 5 (leave blank for default)' )}
/>
< / div >
< / div >
2025-04-12 23:35:11 +00:00
< / div >
< / div >
2025-11-04 19:57:36 +00:00
{ :else if STT_ENGINE === 'mistral' }
< div >
< div class = "mt-1 flex gap-2 mb-1" >
< input
class="flex-1 w-full bg-transparent outline-hidden"
placeholder={ $i18n . t ( 'API Base URL' )}
bind:value={ STT_MISTRAL_API_BASE_URL }
required
/>
< SensitiveInput placeholder = { $i18n . t ( 'API Key' )} bind:value= { STT_MISTRAL_API_KEY } />
< / div >
< / div >
< hr class = "border-gray-100 dark:border-gray-850 my-2" / >
< div >
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'STT Model' )} </ div >
< div class = "flex w-full" >
< div class = "flex-1" >
< input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ STT_MODEL }
placeholder="voxtral-mini-latest"
/>
< / div >
< / div >
< div class = "mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500" >
{ $i18n . t ( 'Leave empty to use the default model (voxtral-mini-latest).' )}
< a
class=" hover:underline dark:text-gray-200 text-gray-800"
href="https://docs.mistral.ai/capabilities/audio_transcription"
target="_blank"
>
{ $i18n . t ( 'Learn more about Voxtral transcription.' )}
< / a >
< / div >
< / div >
< hr class = "border-gray-100 dark:border-gray-850 my-2" / >
< div >
< div class = "flex items-center justify-between mb-2" >
< div class = "text-xs font-medium" > { $i18n . t ( 'Use Chat Completions API' )} </ div >
< label class = "relative inline-flex items-center cursor-pointer" >
< input
type="checkbox"
bind:checked={ STT_MISTRAL_USE_CHAT_COMPLETIONS }
class="sr-only peer"
/>
< div
class="w-9 h-5 bg-gray-200 peer-focus:outline-none peer-focus:ring-2 peer-focus:ring-blue-300 dark:peer-focus:ring-blue-800 rounded-full peer dark:bg-gray-700 peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-4 after:w-4 after:transition-all dark:border-gray-600 peer-checked:bg-blue-600"
>< / div >
< / label >
< / div >
< div class = "text-xs text-gray-400 dark:text-gray-500" >
{ $i18n . t (
'Use /v1/chat/completions endpoint instead of /v1/audio/transcriptions for potentially better accuracy.'
)}
< / div >
< / div >
2024-10-21 04:34:36 +00:00
{ :else if STT_ENGINE === '' }
2025-04-12 23:35:11 +00:00
< div >
2025-06-16 12:13:40 +00:00
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'STT Model' )} </ div >
2024-10-21 04:34:36 +00:00
< div class = "flex w-full" >
< div class = "flex-1 mr-2" >
< input
2025-02-16 03:27:25 +00:00
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
2024-10-21 04:34:36 +00:00
placeholder={ $i18n . t ( 'Set whisper model' )}
bind:value={ STT_WHISPER_MODEL }
/>
< / div >
< button
class="px-2.5 bg-gray-50 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
on:click={() => {
sttModelUpdateHandler();
}}
disabled={ STT_WHISPER_MODEL_LOADING }
>
{ #if STT_WHISPER_MODEL_LOADING }
< div class = "self-center" >
2025-06-25 22:44:45 +00:00
< Spinner / >
2024-10-21 04:34:36 +00:00
< / div >
{ : else }
< svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
fill="currentColor"
class="w-4 h-4"
>
< path
d="M8.75 2.75a.75.75 0 0 0-1.5 0v5.69L5.03 6.22a.75.75 0 0 0-1.06 1.06l3.5 3.5a.75.75 0 0 0 1.06 0l3.5-3.5a.75.75 0 0 0-1.06-1.06L8.75 8.44V2.75Z"
/>
< path
d="M3.5 9.75a.75.75 0 0 0-1.5 0v1.5A2.75 2.75 0 0 0 4.75 14h6.5A2.75 2.75 0 0 0 14 11.25v-1.5a.75.75 0 0 0-1.5 0v1.5c0 .69-.56 1.25-1.25 1.25h-6.5c-.69 0-1.25-.56-1.25-1.25v-1.5Z"
/>
< / svg >
{ /if }
< / button >
< / div >
< div class = "mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500" >
2024-10-21 04:43:19 +00:00
{ $i18n . t ( `Open WebUI uses faster-whisper internally.` )}
< a
class=" hover:underline dark:text-gray-200 text-gray-800"
href="https://github.com/SYSTRAN/faster-whisper"
target="_blank"
>
{ $i18n . t (
`Click here to learn more about faster-whisper and see the available models.`
)}
< / a >
2024-10-21 04:34:36 +00:00
< / div >
< / div >
2024-06-09 09:05:36 +00:00
{ /if }
2024-06-08 03:18:48 +00:00
< / div >
2024-06-09 09:05:36 +00:00
< div >
2025-06-16 12:13:40 +00:00
< div class = " mb-2.5 text-base font-medium" > { $i18n . t ( 'Text-to-Speech' )} </ div >
2024-06-09 09:05:36 +00:00
2025-06-16 12:13:40 +00:00
< hr class = " border-gray-100 dark:border-gray-850 my-2" / >
< div class = "mb-2 py-0.5 flex w-full justify-between" >
2024-06-09 09:05:36 +00:00
< div class = " self-center text-xs font-medium" > { $i18n . t ( 'Text-to-Speech Engine' )} </ div >
< div class = "flex items-center relative" >
< select
2025-02-16 03:27:25 +00:00
class=" dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
2024-06-09 09:05:36 +00:00
bind:value={ TTS_ENGINE }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select a mode' )}
2024-07-20 06:56:00 +00:00
on:change={ async ( e ) => {
2024-08-02 17:24:47 +00:00
await updateConfigHandler();
await getVoices();
await getModels();
2024-08-25 00:35:42 +00:00
if (e.target?.value === 'openai') {
2024-06-09 09:05:36 +00:00
TTS_VOICE = 'alloy';
TTS_MODEL = 'tts-1';
} else {
TTS_VOICE = '';
2024-07-19 08:35:05 +00:00
TTS_MODEL = '';
2024-06-09 09:05:36 +00:00
}
}}
>
< option value = "" > { $i18n . t ( 'Web API' )} </ option >
2024-11-04 09:16:51 +00:00
< option value = "transformers" > { $i18n . t ( 'Transformers' )} ({ $i18n . t ( 'Local' )} )</ option >
2024-06-25 06:00:02 +00:00
< option value = "openai" > { $i18n . t ( 'OpenAI' )} </ option >
2024-07-20 07:07:50 +00:00
< option value = "elevenlabs" > { $i18n . t ( 'ElevenLabs' )} </ option >
2024-09-19 00:40:54 +00:00
< option value = "azure" > { $i18n . t ( 'Azure AI Speech' )} </ option >
2024-06-09 09:05:36 +00:00
< / select >
2024-06-08 03:18:48 +00:00
< / div >
< / div >
2024-06-09 09:05:36 +00:00
{ #if TTS_ENGINE === 'openai' }
< div >
< div class = "mt-1 flex gap-2 mb-1" >
< input
2025-02-16 03:27:25 +00:00
class="flex-1 w-full bg-transparent outline-hidden"
2024-06-09 09:05:36 +00:00
placeholder={ $i18n . t ( 'API Base URL' )}
bind:value={ TTS_OPENAI_API_BASE_URL }
required
/>
2024-06-08 03:18:48 +00:00
2024-06-25 12:15:29 +00:00
< SensitiveInput placeholder = { $i18n . t ( 'API Key' )} bind:value= { TTS_OPENAI_API_KEY } />
2024-06-08 03:18:48 +00:00
< / div >
< / div >
2024-07-19 08:35:05 +00:00
{ :else if TTS_ENGINE === 'elevenlabs' }
< div >
< div class = "mt-1 flex gap-2 mb-1" >
< input
2025-02-16 03:27:25 +00:00
class="flex-1 w-full rounded-lg py-2 pl-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
2024-07-19 08:35:05 +00:00
placeholder={ $i18n . t ( 'API Key' )}
bind:value={ TTS_API_KEY }
required
/>
< / div >
< / div >
2024-09-19 00:40:54 +00:00
{ :else if TTS_ENGINE === 'azure' }
2024-09-17 07:47:30 +00:00
< div >
< div class = "mt-1 flex gap-2 mb-1" >
2025-05-06 02:17:18 +00:00
< SensitiveInput placeholder = { $i18n . t ( 'API Key' )} bind:value= { TTS_API_KEY } required />
2025-05-06 02:08:48 +00:00
< / div >
< hr class = "border-gray-100 dark:border-gray-850 my-2" / >
< div >
2025-06-16 12:13:40 +00:00
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'Azure Region' )} </ div >
2025-05-06 02:08:48 +00:00
< div class = "flex w-full" >
< div class = "flex-1" >
< input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ TTS_AZURE_SPEECH_REGION }
placeholder={ $i18n . t ( 'e.g., westus (leave blank for eastus)' )}
/>
< / div >
< / div >
2025-05-06 02:12:32 +00:00
< / div >
2025-05-06 02:08:48 +00:00
< div >
2025-06-16 12:13:40 +00:00
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'Endpoint URL' )} </ div >
2025-05-06 02:08:48 +00:00
< div class = "flex w-full" >
< div class = "flex-1" >
< input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ TTS_AZURE_SPEECH_BASE_URL }
placeholder={ $i18n . t ( '(leave blank for to use commercial endpoint)' )}
/>
< / div >
< / div >
2024-09-17 07:47:30 +00:00
< / div >
2024-09-19 00:40:54 +00:00
< / div >
2024-06-09 09:05:36 +00:00
{ /if }
2025-06-16 12:13:40 +00:00
< div class = "mb-2" >
{ #if TTS_ENGINE === '' }
< div >
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'TTS Voice' )} </ div >
2024-07-19 08:35:05 +00:00
< div class = "flex w-full" >
< div class = "flex-1" >
2025-06-16 12:13:40 +00:00
< select
2025-02-16 03:27:25 +00:00
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
2024-07-19 08:35:05 +00:00
bind:value={ TTS_VOICE }
2025-06-16 12:13:40 +00:00
>
< option value = "" selected = { TTS_VOICE !== '' } > { $i18n . t ( 'Default' )} </option >
2024-07-19 08:35:05 +00:00
{ #each voices as voice }
2025-06-16 12:13:40 +00:00
< option
value={ voice . voiceURI }
class="bg-gray-100 dark:bg-gray-700"
selected={ TTS_VOICE === voice . voiceURI } >{ voice . name } < /option
>
2024-07-19 08:35:05 +00:00
{ /each }
2025-06-16 12:13:40 +00:00
< / select >
2024-07-19 08:35:05 +00:00
< / div >
< / div >
< / div >
2025-06-16 12:13:40 +00:00
{ :else if TTS_ENGINE === 'transformers' }
< div >
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'TTS Model' )} </ div >
2024-07-19 08:35:05 +00:00
< div class = "flex w-full" >
< div class = "flex-1" >
< input
2025-06-16 12:13:40 +00:00
list="model-list"
2025-02-16 03:27:25 +00:00
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
2024-07-19 08:35:05 +00:00
bind:value={ TTS_MODEL }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'CMU ARCTIC speaker embedding name' )}
2024-07-19 08:35:05 +00:00
/>
2025-06-16 12:13:40 +00:00
< datalist id = "model-list" >
< option value = "tts-1" / >
2024-07-19 08:35:05 +00:00
< / datalist >
< / div >
< / div >
2025-06-16 12:13:40 +00:00
< div class = "mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500" >
{ $i18n . t ( `Open WebUI uses SpeechT5 and CMU Arctic speaker embeddings.` )}
To learn more about SpeechT5,
< a
class=" hover:underline dark:text-gray-200 text-gray-800"
href="https://github.com/microsoft/SpeechT5"
target="_blank"
>
{ $i18n . t ( `click here` , {
name: 'SpeechT5'
})}.
< / a >
To see the available CMU Arctic speaker embeddings,
< a
class=" hover:underline dark:text-gray-200 text-gray-800"
href="https://huggingface.co/datasets/Matthijs/cmu-arctic-xvectors"
target="_blank"
>
{ $i18n . t ( `click here` )} .
< / a >
< / div >
2024-07-19 08:35:05 +00:00
< / div >
2025-06-16 12:13:40 +00:00
{ :else if TTS_ENGINE === 'openai' }
< div class = " flex gap-2" >
< div class = "w-full" >
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'TTS Voice' )} </ div >
< div class = "flex w-full" >
< div class = "flex-1" >
< input
list="voice-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ TTS_VOICE }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select a voice' )}
2025-06-16 12:13:40 +00:00
/>
2024-09-19 00:40:54 +00:00
2025-06-16 12:13:40 +00:00
< datalist id = "voice-list" >
{ #each voices as voice }
< option value = { voice . id } > { voice . name } </option >
{ /each }
< / datalist >
< / div >
2024-06-09 09:05:36 +00:00
< / div >
< / div >
2025-06-16 12:13:40 +00:00
< div class = "w-full" >
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'TTS Model' )} </ div >
< div class = "flex w-full" >
< div class = "flex-1" >
< input
list="tts-model-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ TTS_MODEL }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select a model' )}
2025-06-16 12:13:40 +00:00
/>
2024-09-19 00:40:54 +00:00
2025-06-16 12:13:40 +00:00
< datalist id = "tts-model-list" >
{ #each models as model }
< option value = { model . id } class="bg-gray-50 dark:bg-gray-700 " />
{ /each }
< / datalist >
< / div >
2024-09-18 13:13:42 +00:00
< / div >
< / div >
2024-09-19 00:40:54 +00:00
< / div >
2025-10-07 21:20:27 +00:00
< div class = "mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500" >
< div class = "w-full" >
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'Additional Parameters' )} </ div >
< div class = "flex w-full" >
< div class = "flex-1" >
< Textarea
className="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ TTS_OPENAI_PARAMS }
placeholder={ $i18n . t ( 'Enter additional parameters in JSON format' )}
minSize={ 100 }
/>
< / div >
< / div >
< / div >
< / div >
2025-06-16 12:13:40 +00:00
{ :else if TTS_ENGINE === 'elevenlabs' }
< div class = " flex gap-2" >
< div class = "w-full" >
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'TTS Voice' )} </ div >
< div class = "flex w-full" >
< div class = "flex-1" >
< input
list="voice-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ TTS_VOICE }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select a voice' )}
2025-06-16 12:13:40 +00:00
/>
2024-09-19 00:40:54 +00:00
2025-06-16 12:13:40 +00:00
< datalist id = "voice-list" >
{ #each voices as voice }
< option value = { voice . id } > { voice . name } </option >
{ /each }
< / datalist >
< / div >
< / div >
< / div >
< div class = "w-full" >
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'TTS Model' )} </ div >
< div class = "flex w-full" >
< div class = "flex-1" >
< input
list="tts-model-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ TTS_MODEL }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select a model' )}
2025-06-16 12:13:40 +00:00
/>
< datalist id = "tts-model-list" >
{ #each models as model }
< option value = { model . id } class="bg-gray-50 dark:bg-gray-700 " />
{ /each }
< / datalist >
< / div >
2024-09-18 13:13:42 +00:00
< / div >
< / div >
2024-09-19 00:40:54 +00:00
< / div >
2025-06-16 12:13:40 +00:00
{ :else if TTS_ENGINE === 'azure' }
< div class = " flex gap-2" >
< div class = "w-full" >
< div class = " mb-1.5 text-xs font-medium" > { $i18n . t ( 'TTS Voice' )} </ div >
< div class = "flex w-full" >
< div class = "flex-1" >
< input
list="voice-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ TTS_VOICE }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select a voice' )}
2025-06-16 12:13:40 +00:00
/>
< datalist id = "voice-list" >
{ #each voices as voice }
< option value = { voice . id } > { voice . name } </option >
{ /each }
< / datalist >
< / div >
< / div >
2024-09-19 00:40:54 +00:00
< / div >
2025-06-16 12:13:40 +00:00
< div class = "w-full" >
< div class = " mb-1.5 text-xs font-medium" >
{ $i18n . t ( 'Output format' )}
< a
href="https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-text-to-speech?tabs=streaming#audio-outputs"
target="_blank"
>
< small > { $i18n . t ( 'Available list' )} </ small >
< / a >
< / div >
< div class = "flex w-full" >
< div class = "flex-1" >
< input
list="tts-model-list"
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
bind:value={ TTS_AZURE_SPEECH_OUTPUT_FORMAT }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select an output format' )}
2025-06-16 12:13:40 +00:00
/>
< / div >
2024-06-09 09:05:36 +00:00
< / div >
2024-06-08 03:18:48 +00:00
< / div >
< / div >
2025-06-16 12:13:40 +00:00
{ /if }
< / div >
2024-08-25 00:35:42 +00:00
< div class = "pt-0.5 flex w-full justify-between" >
< div class = "self-center text-xs font-medium" > { $i18n . t ( 'Response splitting' )} </ div >
< div class = "flex items-center relative" >
< select
2025-02-16 03:27:25 +00:00
class="dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
2025-08-14 00:15:16 +00:00
aria-label={ $i18n . t ( 'Select how to split message text for TTS requests' )}
2024-08-25 00:35:42 +00:00
bind:value={ TTS_SPLIT_ON }
>
2024-08-26 13:38:42 +00:00
{ #each Object . values ( TTS_RESPONSE_SPLIT ) as split }
< option value = { split }
>{ $i18n . t ( split . charAt ( 0 ). toUpperCase () + split . slice ( 1 ))} < /option
>
{ /each }
2024-08-25 00:35:42 +00:00
< / select >
< / div >
< / div >
< div class = "mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500" >
{ $i18n . t (
2024-08-25 00:45:10 +00:00
"Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string."
2024-08-25 00:35:42 +00:00
)}
< / div >
2024-06-09 09:05:36 +00:00
< / div >
2024-06-08 03:18:48 +00:00
< / div >
< / div >
< div class = "flex justify-end text-sm font-medium" >
< button
2024-10-21 07:05:27 +00:00
class="px-3.5 py-1.5 text-sm font-medium bg-black hover:bg-gray-900 text-white dark:bg-white dark:text-black dark:hover:bg-gray-100 transition rounded-full"
2024-06-08 03:18:48 +00:00
type="submit"
>
{ $i18n . t ( 'Save' )}
< / button >
< / div >
< / form >