{$i18n.t('Speech-to-Text')}

{#if STT_ENGINE !== 'web'}

{$i18n.t('Supported MIME Types')}

{/if}

{$i18n.t('Speech-to-Text Engine')}

{#if STT_ENGINE === 'openai'}

{$i18n.t('STT Model')}

{:else if STT_ENGINE === 'deepgram'}

{$i18n.t('STT Model')}

{$i18n.t('Leave model field empty to use the default model.')} {$i18n.t('Click here to see available models.')}

{:else if STT_ENGINE === 'azure'}

{$i18n.t('Azure Region')}

{$i18n.t('Language Locales')}

{$i18n.t('Endpoint URL')}

{$i18n.t('Max Speakers')}

{:else if STT_ENGINE === 'mistral'}

{$i18n.t('STT Model')}

{$i18n.t('Leave empty to use the default model (voxtral-mini-latest).')} {$i18n.t('Learn more about Voxtral transcription.')}

{$i18n.t('Use Chat Completions API')}

{$i18n.t( 'Use /v1/chat/completions endpoint instead of /v1/audio/transcriptions for potentially better accuracy.' )}

{:else if STT_ENGINE === ''}

{$i18n.t('STT Model')}

{$i18n.t(`Open WebUI uses faster-whisper internally.`)} {$i18n.t( `Click here to learn more about faster-whisper and see the available models.` )}

{/if}

{$i18n.t('Text-to-Speech')}

{$i18n.t('Text-to-Speech Engine')}

{#if TTS_ENGINE === 'openai'}

{:else if TTS_ENGINE === 'elevenlabs'}

{:else if TTS_ENGINE === 'azure'}

{$i18n.t('Azure Region')}

{$i18n.t('Endpoint URL')}

{/if}

{#if TTS_ENGINE === ''}

{$i18n.t('TTS Voice')}

{:else if TTS_ENGINE === 'transformers'}

{$i18n.t('TTS Model')}

{$i18n.t(`Open WebUI uses SpeechT5 and CMU Arctic speaker embeddings.`)} To learn more about SpeechT5, {$i18n.t(`click here`, { name: 'SpeechT5' })}. To see the available CMU Arctic speaker embeddings, {$i18n.t(`click here`)}.

{:else if TTS_ENGINE === 'openai'}

{$i18n.t('TTS Voice')}

{#each voices as voice} {/each}

{$i18n.t('TTS Model')}

{#each models as model}

{$i18n.t('Additional Parameters')}

</div>
								</div>
							</div>
						</div>
					{:else if TTS_ENGINE === 'elevenlabs'}
						<div class=" flex gap-2">
							<div class="w-full">
								<div class=" mb-1.5 text-xs font-medium">{$i18n.t('TTS Voice')}</div>
								<div class="flex w-full">
									<div class="flex-1">
										<input
											list="voice-list"
											class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
											bind:value={TTS_VOICE}
											placeholder={$i18n.t('Select a voice')}
										/>

<datalist id="voice-list">
											{#each voices as voice}
												<option value={voice.id}>{voice.name}</option>
											{/each}
										</datalist>
									</div>
								</div>
							</div>
							<div class="w-full">
								<div class=" mb-1.5 text-xs font-medium">{$i18n.t('TTS Model')}</div>
								<div class="flex w-full">
									<div class="flex-1">
										<input
											list="tts-model-list"
											class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
											bind:value={TTS_MODEL}
											placeholder={$i18n.t('Select a model')}
										/>

<datalist id="tts-model-list">
											{#each models as model}
												<option value={model.id} class="bg-gray-50 dark:bg-gray-700" />
											{/each}
										</datalist>
									</div>
								</div>
							</div>
						</div>
					{:else if TTS_ENGINE === 'azure'}
						<div class=" flex gap-2">
							<div class="w-full">
								<div class=" mb-1.5 text-xs font-medium">{$i18n.t('TTS Voice')}</div>
								<div class="flex w-full">
									<div class="flex-1">
										<input
											list="voice-list"
											class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
											bind:value={TTS_VOICE}
											placeholder={$i18n.t('Select a voice')}
										/>

<datalist id="voice-list">
											{#each voices as voice}
												<option value={voice.id}>{voice.name}</option>
											{/each}
										</datalist>
									</div>
								</div>
							</div>
							<div class="w-full">
								<div class=" mb-1.5 text-xs font-medium">
									{$i18n.t('Output format')}
									<a
										href="https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-text-to-speech?tabs=streaming#audio-outputs"
										target="_blank"
									>
										<small>{$i18n.t('Available list')}</small>
									</a>
								</div>
								<div class="flex w-full">
									<div class="flex-1">
										<input
											list="tts-model-list"
											class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
											bind:value={TTS_AZURE_SPEECH_OUTPUT_FORMAT}
											placeholder={$i18n.t('Select an output format')}
										/>
									</div>
								</div>
							</div>
						</div>
					{/if}
				</div>

<div class="pt-0.5 flex w-full justify-between">
					<div class="self-center text-xs font-medium">{$i18n.t('Response splitting')}</div>
					<div class="flex items-center relative">
						<select
							class="dark:bg-gray-900 w-fit pr-8 cursor-pointer rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
							aria-label={$i18n.t('Select how to split message text for TTS requests')}
							bind:value={TTS_SPLIT_ON}
						>
							{#each Object.values(TTS_RESPONSE_SPLIT) as split}
								<option value={split}
									>{$i18n.t(split.charAt(0).toUpperCase() + split.slice(1))}</option
								>
							{/each}
						</select>
					</div>
				</div>
				<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500">
					{$i18n.t(
						"Control how message text is split for TTS requests. 'Punctuation' splits into sentences, 'paragraphs' splits into paragraphs, and 'none' keeps the message as a single string."
					)}
				</div>
			</div>
		</div>
	</div>
	<div class="flex justify-end text-sm font-medium">
		<button
			class="px-3.5 py-1.5 text-sm font-medium bg-black hover:bg-gray-900 text-white dark:bg-white dark:text-black dark:hover:bg-gray-100 transition rounded-full"
			type="submit"
		>
			{$i18n.t('Save')}
		</button>
	</div>
</form>