From 485896753d319be6d90059286f4140e149ab1a1f Mon Sep 17 00:00:00 2001 From: Classic298 <27028174+Classic298@users.noreply.github.com> Date: Thu, 20 Nov 2025 22:43:22 +0100 Subject: [PATCH] feat: Add user header information for TTS/STT requests (#93) (#19323) Resolves #19312 Co-authored-by: Claude --- backend/open_webui/routers/audio.py | 41 ++++++++++++++--------------- backend/open_webui/routers/files.py | 2 +- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py index 45b4f1e692..1edf31fa9c 100644 --- a/backend/open_webui/routers/audio.py +++ b/backend/open_webui/routers/audio.py @@ -35,6 +35,7 @@ from pydantic import BaseModel from open_webui.utils.auth import get_admin_user, get_verified_user +from open_webui.utils.headers import include_user_info_headers from open_webui.config import ( WHISPER_MODEL_AUTO_UPDATE, WHISPER_MODEL_DIR, @@ -364,23 +365,17 @@ async def speech(request: Request, user=Depends(get_verified_user)): **(request.app.state.config.TTS_OPENAI_PARAMS or {}), } + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {request.app.state.config.TTS_OPENAI_API_KEY}", + } + if ENABLE_FORWARD_USER_INFO_HEADERS: + headers = include_user_info_headers(headers, user) + r = await session.post( url=f"{request.app.state.config.TTS_OPENAI_API_BASE_URL}/audio/speech", json=payload, - headers={ - "Content-Type": "application/json", - "Authorization": f"Bearer {request.app.state.config.TTS_OPENAI_API_KEY}", - **( - { - "X-OpenWebUI-User-Name": quote(user.name, safe=" "), - "X-OpenWebUI-User-Id": user.id, - "X-OpenWebUI-User-Email": user.email, - "X-OpenWebUI-User-Role": user.role, - } - if ENABLE_FORWARD_USER_INFO_HEADERS - else {} - ), - }, + headers=headers, ssl=AIOHTTP_CLIENT_SESSION_SSL, ) @@ -570,7 +565,7 @@ async def speech(request: Request, user=Depends(get_verified_user)): return FileResponse(file_path) -def transcription_handler(request, file_path, metadata): +def transcription_handler(request, file_path, metadata, user=None): filename = os.path.basename(file_path) file_dir = os.path.dirname(file_path) id = filename.split(".")[0] @@ -621,11 +616,15 @@ def transcription_handler(request, file_path, metadata): if language: payload["language"] = language + headers = { + "Authorization": f"Bearer {request.app.state.config.STT_OPENAI_API_KEY}" + } + if user and ENABLE_FORWARD_USER_INFO_HEADERS: + headers = include_user_info_headers(headers, user) + r = requests.post( url=f"{request.app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions", - headers={ - "Authorization": f"Bearer {request.app.state.config.STT_OPENAI_API_KEY}" - }, + headers=headers, files={"file": (filename, open(file_path, "rb"))}, data=payload, ) @@ -1027,7 +1026,7 @@ def transcription_handler(request, file_path, metadata): ) -def transcribe(request: Request, file_path: str, metadata: Optional[dict] = None): +def transcribe(request: Request, file_path: str, metadata: Optional[dict] = None, user=None): log.info(f"transcribe: {file_path} {metadata}") if is_audio_conversion_required(file_path): @@ -1054,7 +1053,7 @@ def transcribe(request: Request, file_path: str, metadata: Optional[dict] = None with ThreadPoolExecutor() as executor: # Submit tasks for each chunk_path futures = [ - executor.submit(transcription_handler, request, chunk_path, metadata) + executor.submit(transcription_handler, request, chunk_path, metadata, user) for chunk_path in chunk_paths ] # Gather results as they complete @@ -1189,7 +1188,7 @@ def transcription( if language: metadata = {"language": language} - result = transcribe(request, file_path, metadata) + result = transcribe(request, file_path, metadata, user) return { **result, diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index 2a5c3e5bb1..54084941fe 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -102,7 +102,7 @@ def process_uploaded_file(request, file, file_path, file_item, file_metadata, us ) ): file_path = Storage.get_file(file_path) - result = transcribe(request, file_path, file_metadata) + result = transcribe(request, file_path, file_metadata, user) process_file( request,