diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index fc60455e3e..f17b9f94b9 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -3361,6 +3361,19 @@ AUDIO_TTS_OPENAI_API_KEY = PersistentConfig( os.getenv("AUDIO_TTS_OPENAI_API_KEY", OPENAI_API_KEY), ) +audio_tts_openai_params = os.getenv("AUDIO_TTS_OPENAI_PARAMS", "") +try: + audio_tts_openai_params = json.loads(audio_tts_openai_params) +except json.JSONDecodeError: + audio_tts_openai_params = {} + +AUDIO_TTS_OPENAI_PARAMS = PersistentConfig( + "AUDIO_TTS_OPENAI_PARAMS", + "audio.tts.openai.params", + audio_tts_openai_params, +) + + AUDIO_TTS_API_KEY = PersistentConfig( "AUDIO_TTS_API_KEY", "audio.tts.api_key", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 5dad3d7904..a68c943966 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -175,13 +175,14 @@ from open_webui.config import ( AUDIO_STT_AZURE_LOCALES, AUDIO_STT_AZURE_BASE_URL, AUDIO_STT_AZURE_MAX_SPEAKERS, - AUDIO_TTS_API_KEY, AUDIO_TTS_ENGINE, AUDIO_TTS_MODEL, + AUDIO_TTS_VOICE, AUDIO_TTS_OPENAI_API_BASE_URL, AUDIO_TTS_OPENAI_API_KEY, + AUDIO_TTS_OPENAI_PARAMS, + AUDIO_TTS_API_KEY, AUDIO_TTS_SPLIT_ON, - AUDIO_TTS_VOICE, AUDIO_TTS_AZURE_SPEECH_REGION, AUDIO_TTS_AZURE_SPEECH_BASE_URL, AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT, @@ -1096,11 +1097,15 @@ app.state.config.AUDIO_STT_AZURE_LOCALES = AUDIO_STT_AZURE_LOCALES app.state.config.AUDIO_STT_AZURE_BASE_URL = AUDIO_STT_AZURE_BASE_URL app.state.config.AUDIO_STT_AZURE_MAX_SPEAKERS = AUDIO_STT_AZURE_MAX_SPEAKERS -app.state.config.TTS_OPENAI_API_BASE_URL = AUDIO_TTS_OPENAI_API_BASE_URL -app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY app.state.config.TTS_ENGINE = AUDIO_TTS_ENGINE + app.state.config.TTS_MODEL = AUDIO_TTS_MODEL app.state.config.TTS_VOICE = AUDIO_TTS_VOICE + +app.state.config.TTS_OPENAI_API_BASE_URL = AUDIO_TTS_OPENAI_API_BASE_URL +app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY +app.state.config.TTS_OPENAI_PARAMS = AUDIO_TTS_OPENAI_PARAMS + app.state.config.TTS_API_KEY = AUDIO_TTS_API_KEY app.state.config.TTS_SPLIT_ON = AUDIO_TTS_SPLIT_ON diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py index e0aee2f726..cb7a57b5b7 100644 --- a/backend/open_webui/routers/audio.py +++ b/backend/open_webui/routers/audio.py @@ -154,6 +154,7 @@ def set_faster_whisper_model(model: str, auto_update: bool = False): class TTSConfigForm(BaseModel): OPENAI_API_BASE_URL: str OPENAI_API_KEY: str + OPENAI_PARAMS: Optional[dict] = None API_KEY: str ENGINE: str MODEL: str @@ -190,6 +191,7 @@ async def get_audio_config(request: Request, user=Depends(get_admin_user)): "tts": { "OPENAI_API_BASE_URL": request.app.state.config.TTS_OPENAI_API_BASE_URL, "OPENAI_API_KEY": request.app.state.config.TTS_OPENAI_API_KEY, + "OPENAI_PARAMS": request.app.state.config.TTS_OPENAI_PARAMS, "API_KEY": request.app.state.config.TTS_API_KEY, "ENGINE": request.app.state.config.TTS_ENGINE, "MODEL": request.app.state.config.TTS_MODEL, @@ -222,6 +224,7 @@ async def update_audio_config( ): request.app.state.config.TTS_OPENAI_API_BASE_URL = form_data.tts.OPENAI_API_BASE_URL request.app.state.config.TTS_OPENAI_API_KEY = form_data.tts.OPENAI_API_KEY + request.app.state.config.TTS_OPENAI_PARAMS = form_data.tts.OPENAI_PARAMS request.app.state.config.TTS_API_KEY = form_data.tts.API_KEY request.app.state.config.TTS_ENGINE = form_data.tts.ENGINE request.app.state.config.TTS_MODEL = form_data.tts.MODEL @@ -262,12 +265,13 @@ async def update_audio_config( return { "tts": { - "OPENAI_API_BASE_URL": request.app.state.config.TTS_OPENAI_API_BASE_URL, - "OPENAI_API_KEY": request.app.state.config.TTS_OPENAI_API_KEY, - "API_KEY": request.app.state.config.TTS_API_KEY, "ENGINE": request.app.state.config.TTS_ENGINE, "MODEL": request.app.state.config.TTS_MODEL, "VOICE": request.app.state.config.TTS_VOICE, + "OPENAI_API_BASE_URL": request.app.state.config.TTS_OPENAI_API_BASE_URL, + "OPENAI_API_KEY": request.app.state.config.TTS_OPENAI_API_KEY, + "OPENAI_PARAMS": request.app.state.config.TTS_OPENAI_PARAMS, + "API_KEY": request.app.state.config.TTS_API_KEY, "SPLIT_ON": request.app.state.config.TTS_SPLIT_ON, "AZURE_SPEECH_REGION": request.app.state.config.TTS_AZURE_SPEECH_REGION, "AZURE_SPEECH_BASE_URL": request.app.state.config.TTS_AZURE_SPEECH_BASE_URL, @@ -337,6 +341,11 @@ async def speech(request: Request, user=Depends(get_verified_user)): async with aiohttp.ClientSession( timeout=timeout, trust_env=True ) as session: + payload = { + **payload, + **(request.app.state.config.TTS_OPENAI_PARAMS or {}), + } + r = await session.post( url=f"{request.app.state.config.TTS_OPENAI_API_BASE_URL}/audio/speech", json=payload, diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte index 10c8a6adb2..588b6bed7c 100644 --- a/src/lib/components/admin/Settings/Audio.svelte +++ b/src/lib/components/admin/Settings/Audio.svelte @@ -19,6 +19,7 @@ import type { Writable } from 'svelte/store'; import type { i18n as i18nType } from 'i18next'; + import Textarea from '$lib/components/common/Textarea.svelte'; const i18n = getContext>('i18n'); @@ -31,6 +32,7 @@ let TTS_ENGINE = ''; let TTS_MODEL = ''; let TTS_VOICE = ''; + let TTS_OPENAI_PARAMS = ''; let TTS_SPLIT_ON: TTS_RESPONSE_SPLIT = TTS_RESPONSE_SPLIT.PUNCTUATION; let TTS_AZURE_SPEECH_REGION = ''; let TTS_AZURE_SPEECH_BASE_URL = ''; @@ -98,18 +100,28 @@ }; const updateConfigHandler = async () => { + let openaiParams = {}; + try { + openaiParams = TTS_OPENAI_PARAMS ? JSON.parse(TTS_OPENAI_PARAMS) : {}; + TTS_OPENAI_PARAMS = JSON.stringify(openaiParams, null, 2); + } catch (e) { + toast.error($i18n.t('Invalid JSON format for Parameters')); + return; + } + const res = await updateAudioConfig(localStorage.token, { tts: { OPENAI_API_BASE_URL: TTS_OPENAI_API_BASE_URL, OPENAI_API_KEY: TTS_OPENAI_API_KEY, + OPENAI_PARAMS: openaiParams, API_KEY: TTS_API_KEY, ENGINE: TTS_ENGINE, MODEL: TTS_MODEL, VOICE: TTS_VOICE, - SPLIT_ON: TTS_SPLIT_ON, AZURE_SPEECH_REGION: TTS_AZURE_SPEECH_REGION, AZURE_SPEECH_BASE_URL: TTS_AZURE_SPEECH_BASE_URL, - AZURE_SPEECH_OUTPUT_FORMAT: TTS_AZURE_SPEECH_OUTPUT_FORMAT + AZURE_SPEECH_OUTPUT_FORMAT: TTS_AZURE_SPEECH_OUTPUT_FORMAT, + SPLIT_ON: TTS_SPLIT_ON }, stt: { OPENAI_API_BASE_URL: STT_OPENAI_API_BASE_URL, @@ -146,6 +158,7 @@ console.log(res); TTS_OPENAI_API_BASE_URL = res.tts.OPENAI_API_BASE_URL; TTS_OPENAI_API_KEY = res.tts.OPENAI_API_KEY; + TTS_OPENAI_PARAMS = JSON.stringify(res?.tts?.OPENAI_PARAMS ?? '', null, 2); TTS_API_KEY = res.tts.API_KEY; TTS_ENGINE = res.tts.ENGINE; @@ -612,6 +625,22 @@ + +
+
+
{$i18n.t('Additional Parameters')}
+
+
+