2024-02-06 05:05:38 +00:00
< script lang = "ts" >
2024-03-01 09:18:07 +00:00
import { toast } from 'svelte-sonner';
2024-08-02 17:29:03 +00:00
import { createEventDispatcher , onMount , getContext } from 'svelte';
import { user , settings , config } from '$lib/stores';
import { getVoices as _getVoices } from '$lib/apis/audio';
2024-06-03 18:09:55 +00:00
import Switch from '$lib/components/common/Switch.svelte';
2025-02-10 07:42:27 +00:00
import { round } from '@huggingface/transformers';
import Spinner from '$lib/components/common/Spinner.svelte';
2025-05-23 20:36:30 +00:00
import Tooltip from '$lib/components/common/Tooltip.svelte';
2024-02-06 05:05:38 +00:00
const dispatch = createEventDispatcher();
2024-03-02 20:38:51 +00:00
const i18n = getContext('i18n');
2024-02-06 05:05:38 +00:00
export let saveSettings: Function;
2024-02-11 10:21:06 +00:00
// Audio
2024-02-10 01:09:14 +00:00
let conversationMode = false;
2024-02-10 00:00:39 +00:00
let speechAutoSend = false;
let responseAutoPlayback = false;
2024-06-03 18:09:55 +00:00
let nonLocalVoices = false;
2024-02-10 00:00:39 +00:00
2024-06-08 03:18:48 +00:00
let STTEngine = '';
2025-05-23 20:36:30 +00:00
let STTLanguage = '';
2024-02-06 05:36:03 +00:00
2025-02-10 07:42:27 +00:00
let TTSEngine = '';
let TTSEngineConfig = {} ;
let TTSModel = null;
let TTSModelProgress = null;
let TTSModelLoading = false;
2024-02-06 05:36:03 +00:00
let voices = [];
2024-06-08 03:18:48 +00:00
let voice = '';
2024-02-06 05:05:38 +00:00
2024-09-19 01:22:55 +00:00
// Audio speed control
2024-09-21 13:42:39 +00:00
let playbackRate = 1;
2024-09-19 01:22:55 +00:00
2024-08-02 17:29:03 +00:00
const getVoices = async () => {
2025-02-10 07:42:27 +00:00
if (TTSEngine === 'browser-kokoro') {
if (!TTSModel) {
await loadKokoro();
}
2024-02-06 05:05:38 +00:00
2025-02-10 07:42:27 +00:00
voices = Object.entries(TTSModel.voices).map(([key, value]) => {
return {
id: key,
name: value.name,
localService: false
};
2024-08-02 17:29:03 +00:00
});
2025-02-10 07:42:27 +00:00
} else {
if ($config.audio.tts.engine === '') {
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
// do your loop
if (voices.length > 0) {
clearInterval(getVoicesLoop);
}
}, 100);
} else {
const res = await _getVoices(localStorage.token).catch((e) => {
toast.error(`${ e } `);
});
2024-02-06 05:05:38 +00:00
2025-02-10 07:42:27 +00:00
if (res) {
console.log(res);
voices = res.voices;
}
2024-02-06 05:05:38 +00:00
}
2024-08-02 17:29:03 +00:00
}
2024-02-06 06:51:08 +00:00
};
2024-02-10 00:00:39 +00:00
const toggleResponseAutoPlayback = async () => {
responseAutoPlayback = !responseAutoPlayback;
saveSettings({ responseAutoPlayback : responseAutoPlayback } );
};
const toggleSpeechAutoSend = async () => {
speechAutoSend = !speechAutoSend;
saveSettings({ speechAutoSend : speechAutoSend } );
};
2024-02-06 06:51:08 +00:00
onMount(async () => {
2024-09-21 13:42:39 +00:00
playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
2024-05-27 05:47:42 +00:00
conversationMode = $settings.conversationMode ?? false;
speechAutoSend = $settings.speechAutoSend ?? false;
responseAutoPlayback = $settings.responseAutoPlayback ?? false;
2024-06-08 03:18:48 +00:00
STTEngine = $settings?.audio?.stt?.engine ?? '';
2025-05-23 20:36:30 +00:00
STTLanguage = $settings?.audio?.stt?.language ?? '';
2024-08-02 20:28:39 +00:00
2025-02-10 07:42:27 +00:00
TTSEngine = $settings?.audio?.tts?.engine ?? '';
TTSEngineConfig = $settings?.audio?.tts?.engineConfig ?? {} ;
2024-08-02 20:28:39 +00:00
if ($settings?.audio?.tts?.defaultVoice === $config.audio.tts.voice) {
voice = $settings?.audio?.tts?.voice ?? $config.audio.tts.voice ?? '';
} else {
voice = $config.audio.tts.voice ?? '';
}
2024-06-08 03:18:48 +00:00
nonLocalVoices = $settings.audio?.tts?.nonLocalVoices ?? false;
2024-02-06 06:51:08 +00:00
2024-08-02 17:29:03 +00:00
await getVoices();
2024-02-06 05:05:38 +00:00
});
2025-02-10 07:42:27 +00:00
$: if (TTSEngine && TTSEngineConfig) {
onTTSEngineChange();
}
const onTTSEngineChange = async () => {
if (TTSEngine === 'browser-kokoro') {
await loadKokoro();
}
};
const loadKokoro = async () => {
if (TTSEngine === 'browser-kokoro') {
voices = [];
if (TTSEngineConfig?.dtype) {
TTSModel = null;
TTSModelProgress = null;
TTSModelLoading = true;
const model_id = 'onnx-community/Kokoro-82M-v1.0-ONNX';
2025-08-18 15:43:54 +00:00
const { KokoroTTS } = await import('kokoro-js');
2025-02-10 07:42:27 +00:00
TTSModel = await KokoroTTS.from_pretrained(model_id, {
dtype: TTSEngineConfig.dtype, // Options: "fp32", "fp16", "q8", "q4", "q4f16"
device: !!navigator?.gpu ? 'webgpu' : 'wasm', // Detect WebGPU
progress_callback: (e) => {
TTSModelProgress = e;
console.log(e);
}
});
await getVoices();
// const rawAudio = await tts.generate(inputText, {
// // Use `tts.list_voices()` to list all available voices
// voice: voice
// });
// const blobUrl = URL.createObjectURL(await rawAudio.toBlob());
// const audio = new Audio(blobUrl);
// audio.play();
}
}
};
2024-02-06 05:05:38 +00:00
< / script >
< form
2025-06-11 11:45:47 +00:00
id="tab-audio"
2024-02-06 05:05:38 +00:00
class="flex flex-col h-full justify-between space-y-3 text-sm"
2024-04-20 21:00:24 +00:00
on:submit| preventDefault={ async () => {
2024-02-06 05:05:38 +00:00
saveSettings({
2024-02-11 10:21:06 +00:00
audio: {
2024-06-08 03:18:48 +00:00
stt: {
2025-05-23 20:36:30 +00:00
engine: STTEngine !== '' ? STTEngine : undefined,
language: STTLanguage !== '' ? STTLanguage : undefined
2024-06-08 03:18:48 +00:00
},
tts: {
2025-02-10 07:42:27 +00:00
engine: TTSEngine !== '' ? TTSEngine : undefined,
engineConfig: TTSEngineConfig,
2024-09-21 13:42:39 +00:00
playbackRate: playbackRate,
2024-06-08 03:35:50 +00:00
voice: voice !== '' ? voice : undefined,
2024-08-02 20:28:39 +00:00
defaultVoice: $config?.audio?.tts?.voice ?? '',
2024-06-08 03:18:48 +00:00
nonLocalVoices: $config.audio.tts.engine === '' ? nonLocalVoices : undefined
}
2024-02-06 06:51:08 +00:00
}
2024-02-06 05:05:38 +00:00
});
dispatch('save');
}}
>
2024-11-13 06:43:18 +00:00
< div class = " space-y-3 overflow-y-scroll max-h-[28rem] lg:max-h-full" >
2024-02-10 00:00:39 +00:00
< div >
2024-03-02 20:38:51 +00:00
< div class = " mb-1 text-sm font-medium" > { $i18n . t ( 'STT Settings' )} </ div >
2024-02-10 00:00:39 +00:00
2024-06-08 03:18:48 +00:00
{ #if $config . audio . stt . engine !== 'web' }
< div class = " py-0.5 flex w-full justify-between" >
< div class = " self-center text-xs font-medium" > { $i18n . t ( 'Speech-to-Text Engine' )} </ div >
< div class = "flex items-center relative" >
< select
2025-02-16 03:27:25 +00:00
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
2024-06-08 03:18:48 +00:00
bind:value={ STTEngine }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select an engine' )}
2024-06-08 03:18:48 +00:00
>
< option value = "" > { $i18n . t ( 'Default' )} </ option >
< option value = "web" > { $i18n . t ( 'Web API' )} </ option >
< / select >
< / div >
2024-02-10 00:00:39 +00:00
< / div >
2025-05-23 20:36:30 +00:00
< div class = " py-0.5 flex w-full justify-between" >
< div class = " self-center text-xs font-medium" > { $i18n . t ( 'Language' )} </ div >
< div class = "flex items-center relative text-xs px-3" >
< Tooltip
content={ $i18n . t (
'The language of the input audio. Supplying the input language in ISO-639-1 (e.g. en) format will improve accuracy and latency. Leave blank to automatically detect the language.'
)}
placement="top"
>
< input
type="text"
bind:value={ STTLanguage }
placeholder={ $i18n . t ( 'e.g. en' )}
class=" text-sm text-right bg-transparent dark:text-gray-300 outline-hidden"
/>
< / Tooltip >
< / div >
< / div >
2024-06-08 03:18:48 +00:00
{ /if }
2024-02-10 00:00:39 +00:00
2024-02-10 01:09:14 +00:00
< div class = " py-0.5 flex w-full justify-between" >
2024-03-02 20:38:51 +00:00
< div class = " self-center text-xs font-medium" >
2024-06-07 04:56:09 +00:00
{ $i18n . t ( 'Instant Auto-Send After Voice Transcription' )}
2024-03-02 20:38:51 +00:00
< / div >
2024-02-10 00:00:39 +00:00
< button
2025-02-16 03:27:25 +00:00
class="p-1 px-3 text-xs flex rounded-sm transition"
2024-02-10 00:00:39 +00:00
on:click={() => {
toggleSpeechAutoSend();
}}
type="button"
>
{ #if speechAutoSend === true }
2024-03-02 20:38:51 +00:00
< span class = "ml-2 self-center" > { $i18n . t ( 'On' )} </ span >
2024-02-10 00:00:39 +00:00
{ : else }
2024-03-02 20:38:51 +00:00
< span class = "ml-2 self-center" > { $i18n . t ( 'Off' )} </ span >
2024-02-10 00:00:39 +00:00
{ /if }
< / button >
< / div >
2024-02-11 10:12:49 +00:00
< / div >
< div >
2024-03-02 20:38:51 +00:00
< div class = " mb-1 text-sm font-medium" > { $i18n . t ( 'TTS Settings' )} </ div >
2024-02-11 10:12:49 +00:00
2025-02-10 07:42:27 +00:00
< div class = " py-0.5 flex w-full justify-between" >
< div class = " self-center text-xs font-medium" > { $i18n . t ( 'Text-to-Speech Engine' )} </ div >
< div class = "flex items-center relative" >
< select
2025-02-16 03:27:25 +00:00
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
2025-02-10 07:42:27 +00:00
bind:value={ TTSEngine }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select an engine' )}
2025-02-10 07:42:27 +00:00
>
< option value = "" > { $i18n . t ( 'Default' )} </ option >
< option value = "browser-kokoro" > { $i18n . t ( 'Kokoro.js (Browser)' )} </ option >
< / select >
< / div >
< / div >
{ #if TTSEngine === 'browser-kokoro' }
< div class = " py-0.5 flex w-full justify-between" >
< div class = " self-center text-xs font-medium" > { $i18n . t ( 'Kokoro.js Dtype' )} </ div >
< div class = "flex items-center relative" >
< select
2025-02-16 03:27:25 +00:00
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
2025-02-10 07:42:27 +00:00
bind:value={ TTSEngineConfig . dtype }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select dtype' )}
2025-02-10 07:42:27 +00:00
>
< option value = "" disabled selected > Select dtype< / option >
< option value = "fp32" > fp32< / option >
< option value = "fp16" > fp16< / option >
< option value = "q8" > q8< / option >
< option value = "q4" > q4< / option >
< / select >
< / div >
< / div >
{ /if }
2024-02-10 00:00:39 +00:00
< div class = " py-0.5 flex w-full justify-between" >
2024-03-02 20:38:51 +00:00
< div class = " self-center text-xs font-medium" > { $i18n . t ( 'Auto-playback response' )} </ div >
2024-02-10 00:00:39 +00:00
< button
2025-02-16 03:27:25 +00:00
class="p-1 px-3 text-xs flex rounded-sm transition"
2024-02-10 00:00:39 +00:00
on:click={() => {
toggleResponseAutoPlayback();
2024-02-06 05:36:03 +00:00
}}
2024-02-10 00:00:39 +00:00
type="button"
2024-02-06 05:36:03 +00:00
>
2024-02-10 00:00:39 +00:00
{ #if responseAutoPlayback === true }
2024-03-02 20:38:51 +00:00
< span class = "ml-2 self-center" > { $i18n . t ( 'On' )} </ span >
2024-02-10 00:00:39 +00:00
{ : else }
2024-03-02 20:38:51 +00:00
< span class = "ml-2 self-center" > { $i18n . t ( 'Off' )} </ span >
2024-02-10 00:00:39 +00:00
{ /if }
< / button >
2024-02-06 05:36:03 +00:00
< / div >
2024-09-19 01:22:55 +00:00
< div class = " py-0.5 flex w-full justify-between" >
2024-09-21 13:42:39 +00:00
< div class = " self-center text-xs font-medium" > { $i18n . t ( 'Speech Playback Speed' )} </ div >
2024-09-19 01:22:55 +00:00
2025-05-23 20:36:30 +00:00
< div class = "flex items-center relative text-xs px-3" >
2025-05-23 16:10:16 +00:00
< input
type="number"
min="0"
step="0.01"
2024-09-21 13:42:39 +00:00
bind:value={ playbackRate }
2025-05-23 16:10:16 +00:00
class=" text-sm text-right bg-transparent dark:text-gray-300 outline-hidden"
/>
x
2024-09-19 01:22:55 +00:00
< / div >
< / div >
2024-02-06 05:36:03 +00:00
< / div >
2025-02-16 03:27:25 +00:00
< hr class = " border-gray-100 dark:border-gray-850" / >
2024-02-06 05:36:03 +00:00
2025-02-10 07:42:27 +00:00
{ #if TTSEngine === 'browser-kokoro' }
{ #if TTSModel }
< div >
< div class = " mb-2.5 text-sm font-medium" > { $i18n . t ( 'Set Voice' )} </ div >
< div class = "flex w-full" >
< div class = "flex-1" >
< input
list="voice-list"
2025-05-23 16:06:05 +00:00
class="w-full text-sm bg-transparent dark:text-gray-300 outline-hidden"
2025-02-10 07:42:27 +00:00
bind:value={ voice }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select a voice' )}
2025-02-10 07:42:27 +00:00
/>
< datalist id = "voice-list" >
{ #each voices as voice }
< option value = { voice . id } > { voice . name } </option >
{ /each }
< / datalist >
< / div >
< / div >
< / div >
{ : else }
< div >
< div class = " mb-2.5 text-sm font-medium flex gap-2 items-center" >
< Spinner className = "size-4" / >
< div class = " text-sm font-medium shimmer" >
{ $i18n . t ( 'Loading Kokoro.js...' )}
{ TTSModelProgress && TTSModelProgress . status === 'progress'
? `(${ Math . round ( TTSModelProgress . progress * 10 ) / 10 } %)`
: ''}
< / div >
< / div >
< div class = "text-xs text-gray-500" >
{ $i18n . t ( 'Please do not close the settings page while loading the model.' )}
< / div >
< / div >
{ /if }
{ :else if $config . audio . tts . engine === '' }
2024-02-06 05:05:38 +00:00
< div >
2024-03-02 20:38:51 +00:00
< div class = " mb-2.5 text-sm font-medium" > { $i18n . t ( 'Set Voice' )} </ div >
2024-02-06 05:05:38 +00:00
< div class = "flex w-full" >
< div class = "flex-1" >
< select
2025-05-23 16:06:05 +00:00
class="w-full text-sm bg-transparent dark:text-gray-300 outline-hidden"
2024-06-08 03:18:48 +00:00
bind:value={ voice }
2024-02-06 05:05:38 +00:00
>
2024-06-08 03:18:48 +00:00
< option value = "" selected = { voice !== '' } > { $i18n . t ( 'Default' )} </option >
{ #each voices . filter (( v ) => nonLocalVoices || v . localService === true ) as _voice }
2024-06-03 18:09:55 +00:00
< option
2024-06-08 03:18:48 +00:00
value={ _voice . name }
2024-06-03 18:09:55 +00:00
class="bg-gray-100 dark:bg-gray-700"
2024-06-08 03:18:48 +00:00
selected={ voice === _voice . name } >{ _voice . name } < /option
2024-02-06 05:05:38 +00:00
>
{ /each }
< / select >
< / div >
< / div >
2024-06-05 15:48:02 +00:00
< div class = "flex items-center justify-between my-1.5" >
< div class = "text-xs" >
2024-06-03 18:09:55 +00:00
{ $i18n . t ( 'Allow non-local voices' )}
< / div >
< div class = "mt-1" >
< Switch bind:state = { nonLocalVoices } / >
< / div >
< / div >
2024-02-06 05:05:38 +00:00
< / div >
2024-07-19 11:30:36 +00:00
{ :else if $config . audio . tts . engine !== '' }
2024-02-06 06:51:08 +00:00
< div >
2024-03-02 20:38:51 +00:00
< div class = " mb-2.5 text-sm font-medium" > { $i18n . t ( 'Set Voice' )} </ div >
2024-02-06 06:51:08 +00:00
< div class = "flex w-full" >
< div class = "flex-1" >
2024-04-20 21:00:24 +00:00
< input
list="voice-list"
2025-05-23 16:06:05 +00:00
class="w-full text-sm bg-transparent dark:text-gray-300 outline-hidden"
2024-06-08 03:18:48 +00:00
bind:value={ voice }
2025-08-14 00:15:16 +00:00
placeholder={ $i18n . t ( 'Select a voice' )}
2024-04-20 21:00:24 +00:00
/>
< datalist id = "voice-list" >
2024-02-06 06:51:08 +00:00
{ #each voices as voice }
2024-08-02 17:29:03 +00:00
< option value = { voice . id } > { voice . name } </option >
2024-02-06 06:51:08 +00:00
{ /each }
2024-04-20 21:00:24 +00:00
< / datalist >
2024-02-06 06:51:08 +00:00
< / div >
< / div >
< / div >
2024-02-06 05:36:03 +00:00
{ /if }
2024-02-06 05:05:38 +00:00
< / div >
2024-05-15 22:55:13 +00:00
< div class = "flex justify-end text-sm font-medium" >
2024-02-06 05:05:38 +00:00
< button
2024-10-21 07:05:27 +00:00
class="px-3.5 py-1.5 text-sm font-medium bg-black hover:bg-gray-900 text-white dark:bg-white dark:text-black dark:hover:bg-gray-100 transition rounded-full"
2024-02-06 05:05:38 +00:00
type="submit"
>
2024-03-04 08:53:56 +00:00
{ $i18n . t ( 'Save' )}
2024-02-06 05:05:38 +00:00
< / button >
< / div >
< / form >