From d3acc2be35d2615dbf1ce89f9854212e619bbe0e Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Mon, 27 Oct 2025 19:48:26 +0200 Subject: [PATCH 01/57] Update Knowledge directory sync process --- backend/open_webui/routers/knowledge.py | 171 ++++++++++++++++++ src/lib/apis/knowledge/index.ts | 34 ++++ .../workspace/Knowledge/KnowledgeBase.svelte | 50 +++-- 3 files changed, 237 insertions(+), 18 deletions(-) diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 71722d706e..1b296fb1eb 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -441,6 +441,177 @@ def add_file_to_knowledge_by_id( ) +@router.post("/{id}/file/sync", response_model=Optional[KnowledgeFilesResponse]) +def sync_file_to_knowledge_by_id( + request: Request, + id: str, + form_data: KnowledgeFileIdForm, + user=Depends(get_verified_user), +): + """ + Sync a single file into a knowledge base by filename with hash comparison: + - If a file with the same name exists and hashes match: skip (discard the new upload). + - If a file with the same name exists and hashes differ: replace old with new. + - If no same-named file exists: add new. + """ + log.info(f"[KB Sync] start kb_id={id} file_id={form_data.file_id}") + knowledge = Knowledges.get_knowledge_by_id(id=id) + + if not knowledge: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + if ( + knowledge.user_id != user.id + and not has_access(user.id, "write", knowledge.access_control) + and user.role != "admin" + ): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, + ) + + new_file = Files.get_file_by_id(form_data.file_id) + if not new_file: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + # Ensure the new file is processed so that hash/content exist + if not (new_file.hash and new_file.data and new_file.data.get("content")): + try: + process_file( + request, + ProcessFileForm(file_id=form_data.file_id), + user=user, + ) + new_file = Files.get_file_by_id(form_data.file_id) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + + data = knowledge.data or {} + file_ids = data.get("file_ids", []) + + existing_files = Files.get_files_by_ids(file_ids) if file_ids else [] + same_name_file = next( + (f for f in existing_files if f.filename == new_file.filename), None + ) + + if same_name_file: + # If hashes match, skip (discard the new upload) and keep existing + if ( + same_name_file.hash + and new_file.hash + and same_name_file.hash == new_file.hash + ): + try: + # Cleanup new file's vector collection if exists + try: + VECTOR_DB_CLIENT.delete_collection( + collection_name=f"file-{new_file.id}" + ) + except Exception as e: + log.debug(e) + try: + if new_file.path: + Storage.delete_file(new_file.path) + except Exception as e: + log.debug(e) + Files.delete_file_by_id(new_file.id) + except Exception as e: + log.debug(e) + + log.info(f"[KB Sync] skip (hash match) kb_id={id} name={new_file.filename}") + files = Files.get_file_metadatas_by_ids(file_ids) + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + + # Hash is different: replace old with new + try: + # Remove old file's embeddings from KB collection + try: + VECTOR_DB_CLIENT.delete( + collection_name=knowledge.id, filter={"file_id": same_name_file.id} + ) + except Exception as e: + log.debug(e) + + # Remove old file's own collection and DB record + try: + if VECTOR_DB_CLIENT.has_collection( + collection_name=f"file-{same_name_file.id}" + ): + VECTOR_DB_CLIENT.delete_collection( + collection_name=f"file-{same_name_file.id}" + ) + except Exception as e: + log.debug(e) + try: + if same_name_file.path: + Storage.delete_file(same_name_file.path) + except Exception as e: + log.debug(e) + Files.delete_file_by_id(same_name_file.id) + + # Add new file to KB collection + process_file( + request, + ProcessFileForm(file_id=new_file.id, collection_name=id), + user=user, + ) + log.info(f"[KB Sync] replace kb_id={id} old_id={same_name_file.id} new_id={new_file.id} name={new_file.filename}") + + # Replace old id with new id in knowledge + file_ids = [fid for fid in file_ids if fid != same_name_file.id] + if new_file.id not in file_ids: + file_ids.append(new_file.id) + data["file_ids"] = file_ids + knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data) + + files = Files.get_file_metadatas_by_ids(file_ids) + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + else: + # No same-named file: add new + try: + process_file( + request, + ProcessFileForm(file_id=new_file.id, collection_name=id), + user=user, + ) + log.info(f"[KB Sync] add kb_id={id} name={new_file.filename}") + if new_file.id not in file_ids: + file_ids.append(new_file.id) + data["file_ids"] = file_ids + knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data) + + files = Files.get_file_metadatas_by_ids(file_ids) + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + + @router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse]) def update_file_from_knowledge_by_id( request: Request, diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts index c01c986a2a..f436cb86bb 100644 --- a/src/lib/apis/knowledge/index.ts +++ b/src/lib/apis/knowledge/index.ts @@ -212,6 +212,40 @@ export const addFileToKnowledgeById = async (token: string, id: string, fileId: return res; }; +export const syncFileToKnowledgeById = async (token: string, id: string, fileId: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/sync`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + file_id: fileId + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + console.error(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const updateFileFromKnowledgeById = async (token: string, id: string, fileId: string) => { let error = null; diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte index 3c494e7609..d66971d6ee 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte @@ -29,9 +29,9 @@ getKnowledgeById, getKnowledgeBases, removeFileFromKnowledgeById, - resetKnowledgeById, updateFileFromKnowledgeById, - updateKnowledgeById + updateKnowledgeById, + syncFileToKnowledgeById } from '$lib/apis/knowledge'; import { blobToFile } from '$lib/utils'; @@ -78,6 +78,7 @@ let showAccessControlModal = false; let inputFiles = null; + let syncMode = false; let filteredItems = []; $: if (knowledge && knowledge.files) { @@ -199,7 +200,11 @@ delete item.itemId; return item; }); - await addFileHandler(uploadedFile.id); + if (syncMode) { + await syncFileHandler(uploadedFile.id); + } else { + await addFileHandler(uploadedFile.id); + } } else { toast.error($i18n.t('Failed to upload file.')); } @@ -382,20 +387,12 @@ // Helper function to maintain file paths within zip const syncDirectoryHandler = async () => { - if ((knowledge?.files ?? []).length > 0) { - const res = await resetKnowledgeById(localStorage.token, id).catch((e) => { - toast.error(`${e}`); - }); - - if (res) { - knowledge = res; - toast.success($i18n.t('Knowledge reset successfully.')); - - // Upload directory - uploadDirectoryHandler(); - } - } else { - uploadDirectoryHandler(); + syncMode = true; + try { + await uploadDirectoryHandler(); + toast.success($i18n.t('Directory sync completed.')); + } finally { + syncMode = false; } }; @@ -416,6 +413,23 @@ } }; + const syncFileHandler = async (fileId) => { + const updatedKnowledge = await syncFileToKnowledgeById(localStorage.token, id, fileId).catch( + (e) => { + toast.error(`${e}`); + return null; + } + ); + + if (updatedKnowledge) { + knowledge = updatedKnowledge; + toast.success($i18n.t('File synced successfully.')); + } else { + toast.error($i18n.t('Failed to sync file.')); + knowledge.files = knowledge.files.filter((file) => file.id !== fileId); + } + }; + const deleteFileHandler = async (fileId) => { try { console.log('Starting file deletion process for:', fileId); @@ -637,7 +651,7 @@ { syncDirectoryHandler(); From 5ec74504c495d26e6484de45604a396ef467a356 Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Mon, 27 Oct 2025 22:07:00 +0200 Subject: [PATCH 02/57] Update sync confirmation text --- src/lib/components/workspace/Knowledge/KnowledgeBase.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte index d66971d6ee..7be697c96f 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte @@ -651,7 +651,7 @@ { syncDirectoryHandler(); From 950a859e55fae97b75ca9ad89b1822ed2c91efab Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Mon, 27 Oct 2025 22:38:38 +0200 Subject: [PATCH 03/57] Update endpoint name --- backend/open_webui/routers/knowledge.py | 2 +- src/lib/apis/knowledge/index.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 1b296fb1eb..63e0ff8d3e 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -441,7 +441,7 @@ def add_file_to_knowledge_by_id( ) -@router.post("/{id}/file/sync", response_model=Optional[KnowledgeFilesResponse]) +@router.post("/{id}/sync", response_model=Optional[KnowledgeFilesResponse]) def sync_file_to_knowledge_by_id( request: Request, id: str, diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts index f436cb86bb..0af04c3604 100644 --- a/src/lib/apis/knowledge/index.ts +++ b/src/lib/apis/knowledge/index.ts @@ -215,7 +215,7 @@ export const addFileToKnowledgeById = async (token: string, id: string, fileId: export const syncFileToKnowledgeById = async (token: string, id: string, fileId: string) => { let error = null; - const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/sync`, { + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/sync`, { method: 'POST', headers: { Accept: 'application/json', From 2c5bec6f76eaf68d532798f6e83127e6a6ab9fef Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Mon, 27 Oct 2025 22:56:57 +0200 Subject: [PATCH 04/57] Revert endpoint name --- backend/open_webui/routers/knowledge.py | 2 +- src/lib/apis/knowledge/index.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 63e0ff8d3e..1b296fb1eb 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -441,7 +441,7 @@ def add_file_to_knowledge_by_id( ) -@router.post("/{id}/sync", response_model=Optional[KnowledgeFilesResponse]) +@router.post("/{id}/file/sync", response_model=Optional[KnowledgeFilesResponse]) def sync_file_to_knowledge_by_id( request: Request, id: str, diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts index 0af04c3604..f436cb86bb 100644 --- a/src/lib/apis/knowledge/index.ts +++ b/src/lib/apis/knowledge/index.ts @@ -215,7 +215,7 @@ export const addFileToKnowledgeById = async (token: string, id: string, fileId: export const syncFileToKnowledgeById = async (token: string, id: string, fileId: string) => { let error = null; - const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/sync`, { + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/sync`, { method: 'POST', headers: { Accept: 'application/json', From 735619f1065eb9f9b951115280034c001a249db2 Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Mon, 27 Oct 2025 23:05:12 +0200 Subject: [PATCH 05/57] Reformat long log line --- backend/open_webui/routers/knowledge.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 1b296fb1eb..3eae210e14 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -567,7 +567,8 @@ def sync_file_to_knowledge_by_id( ProcessFileForm(file_id=new_file.id, collection_name=id), user=user, ) - log.info(f"[KB Sync] replace kb_id={id} old_id={same_name_file.id} new_id={new_file.id} name={new_file.filename}") + log.info(f"[KB Sync] replace kb_id={id} old_id={same_name_file.id} " + f"new_id={new_file.id} name={new_file.filename}") # Replace old id with new id in knowledge file_ids = [fid for fid in file_ids if fid != same_name_file.id] From 60a8a6ebbb3eb8e151972a010b02ea3a8ee7036e Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Mon, 27 Oct 2025 23:09:49 +0200 Subject: [PATCH 06/57] Reformat log line using black/pre-commit --- backend/open_webui/routers/knowledge.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 3eae210e14..6c08c9b0bc 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -567,8 +567,10 @@ def sync_file_to_knowledge_by_id( ProcessFileForm(file_id=new_file.id, collection_name=id), user=user, ) - log.info(f"[KB Sync] replace kb_id={id} old_id={same_name_file.id} " - f"new_id={new_file.id} name={new_file.filename}") + log.info( + f"[KB Sync] replace kb_id={id} old_id={same_name_file.id} " + f"new_id={new_file.id} name={new_file.filename}" + ) # Replace old id with new id in knowledge file_ids = [fid for fid in file_ids if fid != same_name_file.id] From aaef7538b2406cc9f971978d58fc70d52636796f Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Tue, 28 Oct 2025 09:53:35 +0200 Subject: [PATCH 07/57] Reformat --- .../workspace/Knowledge/KnowledgeBase.svelte | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte index c1a95dae21..34122f3558 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte @@ -199,12 +199,12 @@ toast.warning(uploadedFile.error); knowledge.files = knowledge.files.filter((file) => file.id !== uploadedFile.id); } else { - if (syncMode) { - await syncFileHandler(uploadedFile.id); - } else { - await addFileHandler(uploadedFile.id); - } - } + if (syncMode) { + await syncFileHandler(uploadedFile.id); + } else { + await addFileHandler(uploadedFile.id); + } + } } else { toast.error($i18n.t('Failed to upload file.')); } From 40f38da6281bd075836529d5af0224c240ef7308 Mon Sep 17 00:00:00 2001 From: htulipe <1255457+htulipe@users.noreply.github.com> Date: Fri, 24 Oct 2025 19:28:21 +0200 Subject: [PATCH 08/57] Update translation.json --- src/lib/i18n/locales/fr-FR/translation.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/i18n/locales/fr-FR/translation.json b/src/lib/i18n/locales/fr-FR/translation.json index 6836db111f..67ca786991 100644 --- a/src/lib/i18n/locales/fr-FR/translation.json +++ b/src/lib/i18n/locales/fr-FR/translation.json @@ -1370,7 +1370,7 @@ "Select": "Choisir", "Select a base model": "Sélectionnez un modèle de base", "Select a base model (e.g. llama3, gpt-4o)": "Chosir un modèle de base (ex : lamma3, gpt-4o)", - "Select a conversation to preview": "Choisir une converation pour la prévisualiser", + "Select a conversation to preview": "Choisir une conversation pour la prévisualiser", "Select a engine": "Sélectionnez un moteur", "Select a function": "Sélectionnez une fonction", "Select a group": "Sélectionner un groupe", From b0a16eb47648790a5578d1436db86c23709190cd Mon Sep 17 00:00:00 2001 From: Pavel Garaev Date: Fri, 24 Oct 2025 20:27:34 +0400 Subject: [PATCH 09/57] refac: update spacing in UserMenu dropdown items --- src/lib/components/layout/Sidebar/UserMenu.svelte | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/components/layout/Sidebar/UserMenu.svelte b/src/lib/components/layout/Sidebar/UserMenu.svelte index ce95120007..b4385fc39f 100644 --- a/src/lib/components/layout/Sidebar/UserMenu.svelte +++ b/src/lib/components/layout/Sidebar/UserMenu.svelte @@ -155,7 +155,7 @@ { show = false; @@ -170,7 +170,7 @@ { show = false; @@ -183,7 +183,7 @@ {/if} { show = false; From 594925219e016fd53d56d0bc8472e14282e9a559 Mon Sep 17 00:00:00 2001 From: Pavel Garaev Date: Fri, 24 Oct 2025 19:26:31 +0400 Subject: [PATCH 10/57] fix: add stable scrollbar gutter to Modal component --- src/lib/components/common/Modal.svelte | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib/components/common/Modal.svelte b/src/lib/components/common/Modal.svelte index 23006c1762..16c36fa403 100644 --- a/src/lib/components/common/Modal.svelte +++ b/src/lib/components/common/Modal.svelte @@ -92,6 +92,7 @@ aria-modal="true" role="dialog" class="modal fixed top-0 right-0 left-0 bottom-0 bg-black/30 dark:bg-black/60 w-full h-screen max-h-[100dvh] {containerClassName} flex justify-center z-9999 overflow-y-auto overscroll-contain" + style="scrollbar-gutter: stable;" in:fade={{ duration: 10 }} on:mousedown={() => { show = false; From d9aa8a5ce4fcac11aea362940df40f3993ef6d6f Mon Sep 17 00:00:00 2001 From: silentoplayz Date: Fri, 24 Oct 2025 08:53:51 -0400 Subject: [PATCH 11/57] fix: prevent UI freeze by initializing distances array Fixes a bug where the UI would freeze when processing citation sources with mixed distance metrics. The `Citations.svelte` component was attempting to call `.push()` on an `undefined` `distances` array. This happened when the first document for a source had no distance value, but a subsequent document for the same source did. This patch ensures the `distances` array is always initialized as an empty array `[]` instead of `undefined`, preventing the `TypeError` and resolving the UI freeze. --- src/lib/components/chat/Messages/Citations.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/chat/Messages/Citations.svelte b/src/lib/components/chat/Messages/Citations.svelte index 8fe7d2dd27..0da0e9c0aa 100644 --- a/src/lib/components/chat/Messages/Citations.svelte +++ b/src/lib/components/chat/Messages/Citations.svelte @@ -108,7 +108,7 @@ source: _source, document: [document], metadata: metadata ? [metadata] : [], - distances: distance !== undefined ? [distance] : undefined + distances: distance !== undefined ? [distance] : [] }); } }); From 9d11aa82f9d47956c1d62a78f45fd16deed4065e Mon Sep 17 00:00:00 2001 From: Classic298 <27028174+Classic298@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:46:15 +0200 Subject: [PATCH 12/57] fix: Modify ActionsSelector to handle global action states Updated checkbox behavior to account for global actions. --- .../workspace/Models/ActionsSelector.svelte | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/lib/components/workspace/Models/ActionsSelector.svelte b/src/lib/components/workspace/Models/ActionsSelector.svelte index 4b3b52d4e8..1cfb5ed991 100644 --- a/src/lib/components/workspace/Models/ActionsSelector.svelte +++ b/src/lib/components/workspace/Models/ActionsSelector.svelte @@ -34,11 +34,18 @@
{ - _actions[action].selected = e.detail === 'checked'; - selectedActionIds = Object.keys(_actions).filter((t) => _actions[t].selected); - }} + state={_actions[action].is_global + ? 'checked' + : _actions[action].selected + ? 'checked' + : 'unchecked'} + disabled={_actions[action].is_global} + on:change={(e) => { + if (!_actions[action].is_global) { + _actions[action].selected = e.detail === 'checked'; + selectedActionIds = Object.keys(_actions).filter((t) => _actions[t].selected); + } + }} />
From d53a9304109c41cdbc2e91abce1fa4eedb264375 Mon Sep 17 00:00:00 2001 From: Pavel Garaev Date: Fri, 24 Oct 2025 19:56:01 +0400 Subject: [PATCH 13/57] fix: conditionally render system instructions in Chat component --- src/lib/components/playground/Chat.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/playground/Chat.svelte b/src/lib/components/playground/Chat.svelte index 5b4d179429..c395c6fe6a 100644 --- a/src/lib/components/playground/Chat.svelte +++ b/src/lib/components/playground/Chat.svelte @@ -224,7 +224,7 @@ {$i18n.t('System Instructions')}
- {#if !showSystem} + {#if !showSystem && system.trim()}
{system}
From 29d4f971d8a6d0009371e944790e0d1e25979a31 Mon Sep 17 00:00:00 2001 From: Pavel Garaev Date: Fri, 24 Oct 2025 02:18:26 +0400 Subject: [PATCH 14/57] fix: validate folder and channel names before creation --- src/lib/components/layout/Sidebar.svelte | 9 ++++++++- src/lib/i18n/locales/en-US/translation.json | 1 + src/lib/i18n/locales/ru-RU/translation.json | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/lib/components/layout/Sidebar.svelte b/src/lib/components/layout/Sidebar.svelte index 2714ab6d45..e9e9d4ae1e 100644 --- a/src/lib/components/layout/Sidebar.svelte +++ b/src/lib/components/layout/Sidebar.svelte @@ -129,7 +129,8 @@ }; const createFolder = async ({ name, data }) => { - if (name === '') { + name = name?.trim(); + if (!name) { toast.error($i18n.t('Folder name cannot be empty.')); return; } @@ -479,6 +480,12 @@ { + name = name?.trim(); + if (!name) { + toast.error($i18n.t('Channel name cannot be empty.')); + return; + } + const res = await createNewChannel(localStorage.token, { name: name, access_control: access_control diff --git a/src/lib/i18n/locales/en-US/translation.json b/src/lib/i18n/locales/en-US/translation.json index 5977953945..0f9ba6a329 100644 --- a/src/lib/i18n/locales/en-US/translation.json +++ b/src/lib/i18n/locales/en-US/translation.json @@ -221,6 +221,7 @@ "Channel": "", "Channel deleted successfully": "", "Channel Name": "", + "Channel name cannot be empty.": "", "Channel updated successfully": "", "Channels": "", "Character": "", diff --git a/src/lib/i18n/locales/ru-RU/translation.json b/src/lib/i18n/locales/ru-RU/translation.json index 16d68dcd02..bba612d950 100644 --- a/src/lib/i18n/locales/ru-RU/translation.json +++ b/src/lib/i18n/locales/ru-RU/translation.json @@ -221,6 +221,7 @@ "Channel": "Канал", "Channel deleted successfully": "Канал успешно удалён", "Channel Name": "Название канала", + "Channel name cannot be empty.": "Название канала не может быть пустым.", "Channel updated successfully": "Канал успешно обновлён", "Channels": "Каналы", "Character": "Символ", From 1f06ae2d60da08da3dd18d330c658cbf7ccf48c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20Ol=C3=A1h?= Date: Fri, 24 Oct 2025 08:42:28 +0200 Subject: [PATCH 15/57] feat: add OAUTH_ROLES_SEPARATOR env var This allows changing the separator for the `OAUTH_ALLOWED_ROLES` and `OAUTH_ADMIN_ROLES` env vars, from the default comma (,) to something that is not present in the role name. The intended audience is folks with LDAP-syntax groups/roles, e.g. `cn=webui_admin,ou=it_department,o=my_org` instead of just `webui_admin`. --- backend/open_webui/config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index f7926abe85..466adc4f1b 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -576,19 +576,21 @@ OAUTH_ROLES_CLAIM = PersistentConfig( os.environ.get("OAUTH_ROLES_CLAIM", "roles"), ) +SEP = os.environ.get("OAUTH_ROLES_SEPARATOR", ",") + OAUTH_ALLOWED_ROLES = PersistentConfig( "OAUTH_ALLOWED_ROLES", "oauth.allowed_roles", [ role.strip() - for role in os.environ.get("OAUTH_ALLOWED_ROLES", "user,admin").split(",") + for role in os.environ.get("OAUTH_ALLOWED_ROLES", f"user{SEP}admin").split(SEP) ], ) OAUTH_ADMIN_ROLES = PersistentConfig( "OAUTH_ADMIN_ROLES", "oauth.admin_roles", - [role.strip() for role in os.environ.get("OAUTH_ADMIN_ROLES", "admin").split(",")], + [role.strip() for role in os.environ.get("OAUTH_ADMIN_ROLES", "admin").split(SEP)], ) OAUTH_ALLOWED_DOMAINS = PersistentConfig( From e85192e84d5e36ad01fb705bd6483811678c339c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20Ol=C3=A1h?= Date: Fri, 24 Oct 2025 08:48:57 +0200 Subject: [PATCH 16/57] fix: exclude empty roles This is a minor tweak that allows using whitespace as a separator, without it having to be exactly one space. Convenient for using YAML text fold syntax in Helm charts when providing long lists of roles. --- backend/open_webui/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 466adc4f1b..ae174a65ab 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -584,13 +584,14 @@ OAUTH_ALLOWED_ROLES = PersistentConfig( [ role.strip() for role in os.environ.get("OAUTH_ALLOWED_ROLES", f"user{SEP}admin").split(SEP) + if role ], ) OAUTH_ADMIN_ROLES = PersistentConfig( "OAUTH_ADMIN_ROLES", "oauth.admin_roles", - [role.strip() for role in os.environ.get("OAUTH_ADMIN_ROLES", "admin").split(SEP)], + [role.strip() for role in os.environ.get("OAUTH_ADMIN_ROLES", "admin").split(SEP) if role], ) OAUTH_ALLOWED_DOMAINS = PersistentConfig( From 057ca6a339e8a6ee3d161ba33d8090e33201e014 Mon Sep 17 00:00:00 2001 From: Omar Aburub Date: Thu, 23 Oct 2025 15:34:47 +0300 Subject: [PATCH 17/57] fix: prevent cancellation scope corruption by exitting in LIFO and handling exceptions --- backend/open_webui/main.py | 8 +++--- backend/open_webui/utils/mcp/client.py | 37 +++++++++++++++----------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 9998af0e73..76cb9d7e07 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -1556,11 +1556,13 @@ async def chat_completion( log.info("Chat processing was cancelled") try: event_emitter = get_event_emitter(metadata) - await event_emitter( + await asyncio.shield(event_emitter( {"type": "chat:tasks:cancel"}, - ) + )) except Exception as e: pass + finally: + raise # re-raise to ensure proper task cancellation handling except Exception as e: log.debug(f"Error processing chat payload: {e}") if metadata.get("chat_id") and metadata.get("message_id"): @@ -1591,7 +1593,7 @@ async def chat_completion( finally: try: if mcp_clients := metadata.get("mcp_clients"): - for client in mcp_clients.values(): + for client in reversed(mcp_clients.values()): await client.disconnect() except Exception as e: log.debug(f"Error cleaning up: {e}") diff --git a/backend/open_webui/utils/mcp/client.py b/backend/open_webui/utils/mcp/client.py index 01df38886c..67903b94d8 100644 --- a/backend/open_webui/utils/mcp/client.py +++ b/backend/open_webui/utils/mcp/client.py @@ -2,35 +2,40 @@ import asyncio from typing import Optional from contextlib import AsyncExitStack +import anyio + from mcp import ClientSession from mcp.client.auth import OAuthClientProvider, TokenStorage from mcp.client.streamable_http import streamablehttp_client from mcp.shared.auth import OAuthClientInformationFull, OAuthClientMetadata, OAuthToken - class MCPClient: def __init__(self): self.session: Optional[ClientSession] = None - self.exit_stack = AsyncExitStack() + self.exit_stack = None async def connect(self, url: str, headers: Optional[dict] = None): - try: - self._streams_context = streamablehttp_client(url, headers=headers) + async with AsyncExitStack() as exit_stack: + try: + self._streams_context = streamablehttp_client(url, headers=headers) - transport = await self.exit_stack.enter_async_context(self._streams_context) - read_stream, write_stream, _ = transport + transport = await exit_stack.enter_async_context(self._streams_context) + read_stream, write_stream, _ = transport - self._session_context = ClientSession( - read_stream, write_stream - ) # pylint: disable=W0201 + self._session_context = ClientSession( + read_stream, write_stream + ) # pylint: disable=W0201 - self.session = await self.exit_stack.enter_async_context( - self._session_context - ) - await self.session.initialize() - except Exception as e: - await self.disconnect() - raise e + self.session = await exit_stack.enter_async_context( + self._session_context + ) + with anyio.fail_after(10): + await self.session.initialize() + self.exit_stack = exit_stack.pop_all() + except Exception as e: + await asyncio.shield(self.disconnect()) + raise e + async def list_tool_specs(self) -> Optional[dict]: if not self.session: From 1df9305bc723b5f80758a2494518d8245bb1e8af Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sat, 25 Oct 2025 23:01:13 -0700 Subject: [PATCH 18/57] refac --- backend/open_webui/utils/payload.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/open_webui/utils/payload.py b/backend/open_webui/utils/payload.py index 4a431dcab3..bf372e0e76 100644 --- a/backend/open_webui/utils/payload.py +++ b/backend/open_webui/utils/payload.py @@ -297,6 +297,10 @@ def convert_payload_openai_to_ollama(openai_payload: dict) -> dict: if "tools" in openai_payload: ollama_payload["tools"] = openai_payload["tools"] + if "max_tokens" in openai_payload: + ollama_payload["num_predict"] = openai_payload["max_tokens"] + del openai_payload["max_tokens"] + # If there are advanced parameters in the payload, format them in Ollama's options field if openai_payload.get("options"): ollama_payload["options"] = openai_payload["options"] From 62b722db89686cb9b8d5c9c897c0a71b738d16ea Mon Sep 17 00:00:00 2001 From: _00_ <131402327+rgaricano@users.noreply.github.com> Date: Sat, 25 Oct 2025 13:35:58 +0200 Subject: [PATCH 19/57] FIX:style_dark_mode_select_boxes ### UPD_Styles: Add dark mode styles for select elements and options. Actually some select "boxes" have css dark theme support, but other not. This PR add CSS for dark theme selects. --- src/app.css | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/app.css b/src/app.css index e8f4ee137b..7727f3fe9f 100644 --- a/src/app.css +++ b/src/app.css @@ -152,6 +152,16 @@ select { -webkit-appearance: none; } +.dark select { + background-color: rgb(23, 23, 23); /* gray-900 */ + color: rgb(209, 213, 219); /* gray-300 */ +} + +.dark select option { + background-color: rgb(38, 38, 38); /* gray-850 */ + color: rgb(255, 255, 255); +} + @keyframes shimmer { 0% { background-position: 200% 0; From 808a5aea78d1babac4fcc4c4487265ae914c077b Mon Sep 17 00:00:00 2001 From: _00_ <131402327+rgaricano@users.noreply.github.com> Date: Sun, 26 Oct 2025 09:40:05 +0100 Subject: [PATCH 20/57] UPD: Refactor dark select styles using Tailwind CSS classes ### UPD_Styles: Add dark mode styles for select elements and options. Actually some select "boxes" have css dark theme support, but other not. This PR add CSS for dark theme selects. --- src/app.css | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/app.css b/src/app.css index 7727f3fe9f..c8c0b0470e 100644 --- a/src/app.css +++ b/src/app.css @@ -153,13 +153,11 @@ select { } .dark select { - background-color: rgb(23, 23, 23); /* gray-900 */ - color: rgb(209, 213, 219); /* gray-300 */ + @apply bg-gray-900 text-gray-300; } - + .dark select option { - background-color: rgb(38, 38, 38); /* gray-850 */ - color: rgb(255, 255, 255); + @apply bg-gray-850 text-white; } @keyframes shimmer { From 09a506d286e476535af10af46dba15a180c4806d Mon Sep 17 00:00:00 2001 From: _00_ <131402327+rgaricano@users.noreply.github.com> Date: Sun, 26 Oct 2025 15:05:42 +0100 Subject: [PATCH 21/57] Refactor dark mode select styles-more specific Refactor dark mode select styles to be more specific and avoid interference with already classed select elements. --- src/app.css | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/app.css b/src/app.css index c8c0b0470e..cf5d0360cc 100644 --- a/src/app.css +++ b/src/app.css @@ -152,12 +152,12 @@ select { -webkit-appearance: none; } -.dark select { - @apply bg-gray-900 text-gray-300; -} +.dark select:not([class*="bg-transparent"]) { + @apply bg-gray-900 text-gray-300; +} -.dark select option { - @apply bg-gray-850 text-white; +.dark select option { + @apply bg-gray-850 text-white; } @keyframes shimmer { From 2db1ae471aeab56acdfd3689fdd22b8421a9ec46 Mon Sep 17 00:00:00 2001 From: wei840222 Date: Sun, 26 Oct 2025 15:00:37 +0800 Subject: [PATCH 22/57] refactor: replace requests with Firecrawl SDK in search and requests Firecrawl SDK in scrape rather than langchain_community FireCrawlLoader --- backend/open_webui/retrieval/web/firecrawl.py | 27 ++-- backend/open_webui/retrieval/web/utils.py | 119 +++++++++++------- backend/requirements.txt | 2 +- pyproject.toml | 2 +- 4 files changed, 87 insertions(+), 63 deletions(-) diff --git a/backend/open_webui/retrieval/web/firecrawl.py b/backend/open_webui/retrieval/web/firecrawl.py index a85fc51fbd..acad014d70 100644 --- a/backend/open_webui/retrieval/web/firecrawl.py +++ b/backend/open_webui/retrieval/web/firecrawl.py @@ -1,11 +1,11 @@ import logging from typing import Optional, List -from urllib.parse import urljoin -import requests from open_webui.retrieval.web.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS +from firecrawl import Firecrawl + log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -18,27 +18,18 @@ def search_firecrawl( filter_list: Optional[List[str]] = None, ) -> List[SearchResult]: try: - firecrawl_search_url = urljoin(firecrawl_url, "/v1/search") - response = requests.post( - firecrawl_search_url, - headers={ - "User-Agent": "Open WebUI (https://github.com/open-webui/open-webui) RAG Bot", - "Authorization": f"Bearer {firecrawl_api_key}", - }, - json={ - "query": query, - "limit": count, - }, + firecrawl = Firecrawl(api_key=firecrawl_api_key, api_url=firecrawl_url) + response = firecrawl.search( + query=query, limit=count, ignore_invalid_urls=True, timeout=count * 3 ) - response.raise_for_status() - results = response.json().get("data", []) + results = response.web if filter_list: results = get_filtered_results(results, filter_list) results = [ SearchResult( - link=result.get("url"), - title=result.get("title"), - snippet=result.get("description"), + link=result.url, + title=result.title, + snippet=result.description, ) for result in results[:count] ] diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 61356adb56..f5c89b4b58 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -4,7 +4,6 @@ import socket import ssl import urllib.parse import urllib.request -from collections import defaultdict from datetime import datetime, time, timedelta from typing import ( Any, @@ -21,7 +20,6 @@ import aiohttp import certifi import validators from langchain_community.document_loaders import PlaywrightURLLoader, WebBaseLoader -from langchain_community.document_loaders.firecrawl import FireCrawlLoader from langchain_community.document_loaders.base import BaseLoader from langchain_core.documents import Document from open_webui.retrieval.loaders.tavily import TavilyLoader @@ -39,7 +37,9 @@ from open_webui.config import ( EXTERNAL_WEB_LOADER_URL, EXTERNAL_WEB_LOADER_API_KEY, ) -from open_webui.env import SRC_LOG_LEVELS, AIOHTTP_CLIENT_SESSION_SSL +from open_webui.env import SRC_LOG_LEVELS + +from firecrawl import Firecrawl log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -189,13 +189,12 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin): (uses FIRE_CRAWL_API_KEY environment variable if not provided). api_url: Base URL for FireCrawl API. Defaults to official API endpoint. mode: Operation mode selection: - - 'crawl': Website crawling mode (default) - - 'scrape': Direct page scraping + - 'crawl': Website crawling mode + - 'scrape': Direct page scraping (default) - 'map': Site map generation proxy: Proxy override settings for the FireCrawl API. params: The parameters to pass to the Firecrawl API. - Examples include crawlerOptions. - For more details, visit: https://github.com/mendableai/firecrawl-py + For more details, visit: https://docs.firecrawl.dev/sdks/python#batch-scrape """ proxy_server = proxy.get("server") if proxy else None if trust_env and not proxy_server: @@ -215,50 +214,84 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin): self.api_key = api_key self.api_url = api_url self.mode = mode - self.params = params + self.params = params or {} def lazy_load(self) -> Iterator[Document]: - """Load documents concurrently using FireCrawl.""" - for url in self.web_paths: - try: - self._safe_process_url_sync(url) - loader = FireCrawlLoader( - url=url, - api_key=self.api_key, - api_url=self.api_url, - mode=self.mode, - params=self.params, + """Load documents using FireCrawl batch_scrape.""" + log.debug( + "Starting FireCrawl batch scrape for %d URLs, mode: %s, params: %s", + len(self.web_paths), + self.mode, + self.params, + ) + try: + firecrawl = Firecrawl(api_key=self.api_key, api_url=self.api_url) + result = firecrawl.batch_scrape( + self.web_paths, + formats=["markdown"], + skip_tls_verification=not self.verify_ssl, + ignore_invalid_urls=True, + remove_base64_images=True, + max_age=300000, # 5 minutes https://docs.firecrawl.dev/features/fast-scraping#common-maxage-values + wait_timeout=len(self.web_paths) * 3, + **self.params, + ) + + if result.status != "completed": + raise RuntimeError( + f"FireCrawl batch scrape did not complete successfully. result: {result}" ) - for document in loader.lazy_load(): - if not document.metadata.get("source"): - document.metadata["source"] = document.metadata.get("sourceURL") - yield document - except Exception as e: - if self.continue_on_failure: - log.exception(f"Error loading {url}: {e}") - continue + + for data in result.data: + metadata = data.metadata or {} + yield Document( + page_content=data.markdown or "", + metadata={"source": metadata.url or metadata.source_url or ""}, + ) + + except Exception as e: + if self.continue_on_failure: + log.exception(f"Error extracting content from URLs: {e}") + else: raise e async def alazy_load(self): """Async version of lazy_load.""" - for url in self.web_paths: - try: - await self._safe_process_url(url) - loader = FireCrawlLoader( - url=url, - api_key=self.api_key, - api_url=self.api_url, - mode=self.mode, - params=self.params, + log.debug( + "Starting FireCrawl batch scrape for %d URLs, mode: %s, params: %s", + len(self.web_paths), + self.mode, + self.params, + ) + try: + firecrawl = Firecrawl(api_key=self.api_key, api_url=self.api_url) + result = firecrawl.batch_scrape( + self.web_paths, + formats=["markdown"], + skip_tls_verification=not self.verify_ssl, + ignore_invalid_urls=True, + remove_base64_images=True, + max_age=300000, # 5 minutes https://docs.firecrawl.dev/features/fast-scraping#common-maxage-values + wait_timeout=len(self.web_paths) * 3, + **self.params, + ) + + if result.status != "completed": + raise RuntimeError( + f"FireCrawl batch scrape did not complete successfully. result: {result}" ) - async for document in loader.alazy_load(): - if not document.metadata.get("source"): - document.metadata["source"] = document.metadata.get("sourceURL") - yield document - except Exception as e: - if self.continue_on_failure: - log.exception(f"Error loading {url}: {e}") - continue + + for data in result.data: + metadata = data.metadata or {} + yield Document( + page_content=data.markdown or "", + metadata={"source": metadata.url or metadata.source_url or ""}, + ) + + except Exception as e: + if self.continue_on_failure: + log.exception(f"Error extracting content from URLs: {e}") + else: raise e diff --git a/backend/requirements.txt b/backend/requirements.txt index 9e7ff206d5..0fdcb618fd 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -133,7 +133,7 @@ pytest-docker~=3.1.1 ldap3==2.9.1 ## Firecrawl -firecrawl-py==1.12.0 +firecrawl-py==4.5.0 ## Trace opentelemetry-api==1.37.0 diff --git a/pyproject.toml b/pyproject.toml index 1f50f8783d..87e88a1b06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,7 +151,7 @@ all = [ "oracledb==3.2.0", "colbert-ai==0.2.21", - "firecrawl-py==1.12.0", + "firecrawl-py==4.5.0", ] [project.scripts] From 902246cdb0b74d47038253b50aa7d50a18769a00 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 26 Oct 2025 17:22:23 -0700 Subject: [PATCH 23/57] refac --- backend/open_webui/retrieval/utils.py | 71 ++++++++++++++------------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 69aee29ac2..08dcde34da 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -668,46 +668,51 @@ def get_sources_from_items( collection_names.append(f"file-{item['id']}") elif item.get("type") == "collection": - if ( - item.get("context") == "full" - or request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL + # Manual Full Mode Toggle for Collection + knowledge_base = Knowledges.get_knowledge_by_id(item.get("id")) + + if knowledge_base and ( + user.role == "admin" + or knowledge_base.user_id == user.id + or has_access(user.id, "read", knowledge_base.access_control) ): - # Manual Full Mode Toggle for Collection - knowledge_base = Knowledges.get_knowledge_by_id(item.get("id")) - - if knowledge_base and ( - user.role == "admin" - or knowledge_base.user_id == user.id - or has_access(user.id, "read", knowledge_base.access_control) + if ( + item.get("context") == "full" + or request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL ): + if knowledge_base and ( + user.role == "admin" + or knowledge_base.user_id == user.id + or has_access(user.id, "read", knowledge_base.access_control) + ): - file_ids = knowledge_base.data.get("file_ids", []) + file_ids = knowledge_base.data.get("file_ids", []) - documents = [] - metadatas = [] - for file_id in file_ids: - file_object = Files.get_file_by_id(file_id) + documents = [] + metadatas = [] + for file_id in file_ids: + file_object = Files.get_file_by_id(file_id) - if file_object: - documents.append(file_object.data.get("content", "")) - metadatas.append( - { - "file_id": file_id, - "name": file_object.filename, - "source": file_object.filename, - } - ) + if file_object: + documents.append(file_object.data.get("content", "")) + metadatas.append( + { + "file_id": file_id, + "name": file_object.filename, + "source": file_object.filename, + } + ) - query_result = { - "documents": [documents], - "metadatas": [metadatas], - } - else: - # Fallback to collection names - if item.get("legacy"): - collection_names = item.get("collection_names", []) + query_result = { + "documents": [documents], + "metadatas": [metadatas], + } else: - collection_names.append(item["id"]) + # Fallback to collection names + if item.get("legacy"): + collection_names = item.get("collection_names", []) + else: + collection_names.append(item["id"]) elif item.get("docs"): # BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL From ea1f27638612f494ceaaee96ce3376c6d4bed63e Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 26 Oct 2025 19:00:27 -0700 Subject: [PATCH 24/57] enh: sidebar models collapsible --- src/lib/components/layout/Sidebar.svelte | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/lib/components/layout/Sidebar.svelte b/src/lib/components/layout/Sidebar.svelte index e9e9d4ae1e..14e73f2046 100644 --- a/src/lib/components/layout/Sidebar.svelte +++ b/src/lib/components/layout/Sidebar.svelte @@ -891,7 +891,14 @@ {#if ($models ?? []).length > 0 && ($settings?.pinnedModels ?? []).length > 0} - + + + {/if} {#if $config?.features?.enable_channels && ($user?.role === 'admin' || $channels.length > 0)} From d68ba284db85670fe6cd4659a53dee2b19d7fc34 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 26 Oct 2025 19:23:55 -0700 Subject: [PATCH 25/57] refac: chat navbar menu --- src/lib/components/chat/Artifacts.svelte | 133 +++---------------- src/lib/components/chat/Chat.svelte | 69 +++++++++- src/lib/components/layout/Navbar/Menu.svelte | 33 ++--- src/lib/stores/index.ts | 4 +- src/lib/utils/index.ts | 62 ++++++++- 5 files changed, 165 insertions(+), 136 deletions(-) diff --git a/src/lib/components/chat/Artifacts.svelte b/src/lib/components/chat/Artifacts.svelte index 188b476839..bf0681dc19 100644 --- a/src/lib/components/chat/Artifacts.svelte +++ b/src/lib/components/chat/Artifacts.svelte @@ -4,7 +4,14 @@ const i18n = getContext('i18n'); const dispatch = createEventDispatcher(); - import { artifactCode, chatId, settings, showArtifacts, showControls } from '$lib/stores'; + import { + artifactCode, + chatId, + settings, + showArtifacts, + showControls, + artifactContents + } from '$lib/stores'; import { copyToClipboard, createMessagesList } from '$lib/utils'; import XMark from '../icons/XMark.svelte'; @@ -15,8 +22,6 @@ import Download from '../icons/Download.svelte'; export let overlay = false; - export let history; - let messages = []; let contents: Array<{ type: string; content: string }> = []; let selectedContentIdx = 0; @@ -24,121 +29,11 @@ let copied = false; let iframeElement: HTMLIFrameElement; - $: if (history) { - messages = createMessagesList(history, history.currentId); - getContents(); - } else { - messages = []; - getContents(); - } - - const getContents = () => { - contents = []; - messages.forEach((message) => { - if (message?.role !== 'user' && message?.content) { - const codeBlockContents = message.content.match(/```[\s\S]*?```/g); - let codeBlocks = []; - - let htmlContent = ''; - let cssContent = ''; - let jsContent = ''; - - if (codeBlockContents) { - codeBlockContents.forEach((block) => { - const lang = block.split('\n')[0].replace('```', '').trim().toLowerCase(); - const code = block.replace(/```[\s\S]*?\n/, '').replace(/```$/, ''); - codeBlocks.push({ lang, code }); - }); - - codeBlocks.forEach((block) => { - const { lang, code } = block; - - if (lang === 'html') { - htmlContent += code + '\n'; - } else if (lang === 'css') { - cssContent += code + '\n'; - } else if (lang === 'javascript' || lang === 'js') { - jsContent += code + '\n'; - } - }); - } else { - const inlineHtml = message.content.match(/[\s\S]*?<\/html>/gi); - const inlineCss = message.content.match(/ \ No newline at end of file + From 7229c6f1d4eca29d72032138b7af82979259a6fe Mon Sep 17 00:00:00 2001 From: silentoplayz Date: Thu, 23 Oct 2025 05:40:31 -0400 Subject: [PATCH 28/57] fix: display correct keys for international keyboards Updates the ShortcutsModal to dynamically display the correct physical keys for users with non-US keyboard layouts. The `ShortcutItem` component now uses `navigator.keyboard.getLayoutMap()` to resolve `KeyboardEvent.code` values (e.g., "Slash") to the character they produce on the user's active keyboard layout (e.g., "-"). This ensures the displayed shortcuts match the keys the user needs to press. A fallback is included for older browsers that do not support this API. --- src/lib/components/chat/ShortcutItem.svelte | 37 +++++++++++++++++---- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/src/lib/components/chat/ShortcutItem.svelte b/src/lib/components/chat/ShortcutItem.svelte index 494631a984..aaa4dd7de5 100644 --- a/src/lib/components/chat/ShortcutItem.svelte +++ b/src/lib/components/chat/ShortcutItem.svelte @@ -1,5 +1,5 @@ -
+
-
+
@@ -377,7 +377,7 @@ Based on the user's instruction, update and enhance the existing notes or select
{#if selectedContent} -
+
{selectedContent?.text} diff --git a/src/lib/components/notes/NoteEditor/Controls.svelte b/src/lib/components/notes/NoteEditor/Controls.svelte index df988c28d9..675056ee41 100644 --- a/src/lib/components/notes/NoteEditor/Controls.svelte +++ b/src/lib/components/notes/NoteEditor/Controls.svelte @@ -17,7 +17,7 @@ }; -
+
-
+
{#if files.length > 0} -
{$i18n.t('Files')}
+
{$i18n.t('Files')}
{#each files.filter((file) => file.type !== 'image') as file, fileIdx} diff --git a/src/lib/components/notes/NotePanel.svelte b/src/lib/components/notes/NotePanel.svelte index 676d86b83d..96e687854e 100644 --- a/src/lib/components/notes/NotePanel.svelte +++ b/src/lib/components/notes/NotePanel.svelte @@ -99,7 +99,7 @@ {#if show}
From 7c344d31fea46442494693e5f1f32f09f02567e8 Mon Sep 17 00:00:00 2001 From: sinejespersen Date: Mon, 27 Oct 2025 09:11:31 +0100 Subject: [PATCH 37/57] add danish translation --- src/lib/i18n/locales/da-DK/translation.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/i18n/locales/da-DK/translation.json b/src/lib/i18n/locales/da-DK/translation.json index 34e6b381eb..d07fba2fa1 100644 --- a/src/lib/i18n/locales/da-DK/translation.json +++ b/src/lib/i18n/locales/da-DK/translation.json @@ -1043,7 +1043,7 @@ "New Folder": "Ny mappe", "New Function": "Ny funktion", "New Knowledge": "", - "New Model": "", + "New Model": "Ny model", "New Note": "Ny note", "New Password": "Ny adgangskode", "New Prompt": "", From 655161840165d829e34ab5fbd69082427233fd17 Mon Sep 17 00:00:00 2001 From: Taylor Wilsdon Date: Sat, 18 Oct 2025 13:43:51 -0400 Subject: [PATCH 38/57] Add more granular information to oauth failure messages --- backend/open_webui/utils/oauth.py | 55 ++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/backend/open_webui/utils/oauth.py b/backend/open_webui/utils/oauth.py index e0bf7582c6..4fa493b1d5 100644 --- a/backend/open_webui/utils/oauth.py +++ b/backend/open_webui/utils/oauth.py @@ -74,6 +74,8 @@ from mcp.shared.auth import ( OAuthMetadata, ) +from authlib.oauth2.rfc6749.errors import OAuth2Error + class OAuthClientInformationFull(OAuthClientMetadata): issuer: Optional[str] = None # URL of the OAuth server that issued this client @@ -150,6 +152,37 @@ def decrypt_data(data: str): raise +def _build_oauth_callback_error_message(exc: Exception) -> str: + """ + Produce a user-facing callback error string with actionable context. + Keeps the message short and strips newlines for safe redirect usage. + """ + if isinstance(exc, OAuth2Error): + parts = [p for p in [exc.error, exc.description] if p] + detail = " - ".join(parts) + elif isinstance(exc, HTTPException): + detail = exc.detail if isinstance(exc.detail, str) else str(exc.detail) + elif isinstance(exc, aiohttp.ClientResponseError): + detail = f"Upstream provider returned {exc.status}: {exc.message}" + elif isinstance(exc, aiohttp.ClientError): + detail = str(exc) + elif isinstance(exc, KeyError): + missing = str(exc).strip("'") + if missing.lower() == "state": + detail = "Missing state parameter in callback (session may have expired)" + else: + detail = f"Missing expected key '{missing}' in OAuth response" + else: + detail = str(exc) + + detail = detail.replace("\n", " ").strip() + if not detail: + detail = exc.__class__.__name__ + + message = f"OAuth callback failed: {detail}" + return message[:197] + "..." if len(message) > 200 else message + + def is_in_blocked_groups(group_name: str, groups: list) -> bool: """ Check if a group name matches any blocked pattern. @@ -621,8 +654,14 @@ class OAuthClientManager: error_message = "Failed to obtain OAuth token" log.warning(error_message) except Exception as e: - error_message = "OAuth callback error" - log.warning(f"OAuth callback error: {e}") + error_message = _build_oauth_callback_error_message(e) + log.warning( + "OAuth callback error for user_id=%s client_id=%s: %s", + user_id, + client_id, + error_message, + exc_info=True, + ) redirect_url = ( str(request.app.state.config.WEBUI_URL or request.base_url) @@ -630,7 +669,9 @@ class OAuthClientManager: if error_message: log.debug(error_message) - redirect_url = f"{redirect_url}/?error={error_message}" + redirect_url = ( + f"{redirect_url}/?error={urllib.parse.quote_plus(error_message)}" + ) return RedirectResponse(url=redirect_url, headers=response.headers) response = RedirectResponse(url=redirect_url, headers=response.headers) @@ -1104,7 +1145,13 @@ class OAuthManager: try: token = await client.authorize_access_token(request) except Exception as e: - log.warning(f"OAuth callback error: {e}") + detailed_error = _build_oauth_callback_error_message(e) + log.warning( + "OAuth callback error during authorize_access_token for provider %s: %s", + provider, + detailed_error, + exc_info=True, + ) raise HTTPException(400, detail=ERROR_MESSAGES.INVALID_CRED) # Try to get userinfo from the token first, some providers include it there From 2c0e76beeb6c5d000c8862c39cffd3e61a6f4e3b Mon Sep 17 00:00:00 2001 From: Taylor Wilsdon Date: Sat, 18 Oct 2025 14:00:46 -0400 Subject: [PATCH 39/57] Added a targeted utility to wipe all OAuth sessions for a provider so the cleanup can remove stale access tokens across every user when a connection is updated --- backend/open_webui/models/oauth_sessions.py | 11 +++++ backend/open_webui/routers/configs.py | 51 ++++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/models/oauth_sessions.py b/backend/open_webui/models/oauth_sessions.py index 81ce220384..b0e465dbe7 100644 --- a/backend/open_webui/models/oauth_sessions.py +++ b/backend/open_webui/models/oauth_sessions.py @@ -262,5 +262,16 @@ class OAuthSessionTable: log.error(f"Error deleting OAuth sessions by user ID: {e}") return False + def delete_sessions_by_provider(self, provider: str) -> bool: + """Delete all OAuth sessions for a provider""" + try: + with get_db() as db: + db.query(OAuthSession).filter_by(provider=provider).delete() + db.commit() + return True + except Exception as e: + log.error(f"Error deleting OAuth sessions by provider {provider}: {e}") + return False + OAuthSessions = OAuthSessionTable() diff --git a/backend/open_webui/routers/configs.py b/backend/open_webui/routers/configs.py index e7fa13d1ff..e8e876eac7 100644 --- a/backend/open_webui/routers/configs.py +++ b/backend/open_webui/routers/configs.py @@ -1,4 +1,5 @@ import logging +import copy from fastapi import APIRouter, Depends, Request, HTTPException from pydantic import BaseModel, ConfigDict import aiohttp @@ -15,6 +16,7 @@ from open_webui.utils.tools import ( set_tool_servers, ) from open_webui.utils.mcp.client import MCPClient +from open_webui.models.oauth_sessions import OAuthSessions from open_webui.env import SRC_LOG_LEVELS @@ -165,12 +167,59 @@ async def set_tool_servers_config( form_data: ToolServersConfigForm, user=Depends(get_admin_user), ): - request.app.state.config.TOOL_SERVER_CONNECTIONS = [ + old_connections = copy.deepcopy( + request.app.state.config.TOOL_SERVER_CONNECTIONS or [] + ) + + new_connections = [ connection.model_dump() for connection in form_data.TOOL_SERVER_CONNECTIONS ] + old_mcp_connections = { + conn.get("info", {}).get("id"): conn + for conn in old_connections + if conn.get("type") == "mcp" + } + new_mcp_connections = { + conn.get("info", {}).get("id"): conn + for conn in new_connections + if conn.get("type") == "mcp" + } + + purge_oauth_clients = set() + + for server_id, old_conn in old_mcp_connections.items(): + if not server_id: + continue + + old_auth_type = old_conn.get("auth_type", "none") + new_conn = new_mcp_connections.get(server_id) + + if new_conn is None: + if old_auth_type == "oauth_2.1": + purge_oauth_clients.add(server_id) + continue + + new_auth_type = new_conn.get("auth_type", "none") + + if old_auth_type == "oauth_2.1": + if ( + new_auth_type != "oauth_2.1" + or old_conn.get("url") != new_conn.get("url") + or old_conn.get("info", {}).get("oauth_client_info") + != new_conn.get("info", {}).get("oauth_client_info") + ): + purge_oauth_clients.add(server_id) + + request.app.state.config.TOOL_SERVER_CONNECTIONS = new_connections + await set_tool_servers(request) + for server_id in purge_oauth_clients: + client_key = f"mcp:{server_id}" + request.app.state.oauth_client_manager.remove_client(client_key) + OAuthSessions.delete_sessions_by_provider(client_key) + for connection in request.app.state.config.TOOL_SERVER_CONNECTIONS: server_type = connection.get("type", "openapi") if server_type == "mcp": From 1bcbe1c0a064b0a45925979671f9f796b061195c Mon Sep 17 00:00:00 2001 From: Taylor Wilsdon Date: Sat, 18 Oct 2025 14:16:10 -0400 Subject: [PATCH 40/57] complete cleanup of oauth clients --- backend/open_webui/utils/oauth.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/open_webui/utils/oauth.py b/backend/open_webui/utils/oauth.py index 4fa493b1d5..85f694cb13 100644 --- a/backend/open_webui/utils/oauth.py +++ b/backend/open_webui/utils/oauth.py @@ -401,8 +401,19 @@ class OAuthClientManager: return self.clients[client_id] def remove_client(self, client_id): + removed = False if client_id in self.clients: del self.clients[client_id] + removed = True + if hasattr(self.oauth, "_clients"): + if client_id in self.oauth._clients: + self.oauth._clients.pop(client_id, None) + removed = True + if hasattr(self.oauth, "_registry"): + if client_id in self.oauth._registry: + self.oauth._registry.pop(client_id, None) + removed = True + if removed: log.info(f"Removed OAuth client {client_id}") return True From 3247f3521b7480688fe7bfefd4c80c8504aea61e Mon Sep 17 00:00:00 2001 From: Taylor Wilsdon Date: Sat, 18 Oct 2025 16:53:44 -0400 Subject: [PATCH 41/57] Added a preflight authorize check that automatically re-registers MCP OAuth clients when the stored client ID no longer exists on the server, so the browser flow never hits the stale-ID failure --- backend/open_webui/utils/oauth.py | 206 +++++++++++++++++++++++++++++- 1 file changed, 204 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/utils/oauth.py b/backend/open_webui/utils/oauth.py index 85f694cb13..d24a379ede 100644 --- a/backend/open_webui/utils/oauth.py +++ b/backend/open_webui/utils/oauth.py @@ -1,4 +1,5 @@ import base64 +import copy import hashlib import logging import mimetypes @@ -417,6 +418,205 @@ class OAuthClientManager: log.info(f"Removed OAuth client {client_id}") return True + def _find_mcp_connection(self, request, client_id: str): + try: + connections = request.app.state.config.TOOL_SERVER_CONNECTIONS or [] + except Exception: + connections = [] + + normalized_client_id = client_id.split(":")[-1] + + for idx, connection in enumerate(connections): + if not isinstance(connection, dict): + continue + if connection.get("type") != "mcp": + continue + + info = connection.get("info") or {} + server_id = info.get("id") + if not server_id: + continue + + normalized_server_id = server_id.split(":")[-1] + if normalized_server_id == normalized_client_id: + return idx, connection + + return None, None + + async def _preflight_authorization_url( + self, client, client_info: OAuthClientInformationFull + ) -> bool: + # Only perform preflight checks for Starlette OAuth clients + if not hasattr(client, "create_authorization_url"): + return True + + redirect_uri = None + if client_info.redirect_uris: + redirect_uri = str(client_info.redirect_uris[0]) + + try: + auth_data = await client.create_authorization_url(redirect_uri=redirect_uri) + authorize_url = auth_data.get("url") + if not authorize_url: + return True + except Exception as e: + log.debug( + "Skipping OAuth preflight for client %s: %s", + client_info.client_id, + e, + ) + return True + + try: + async with aiohttp.ClientSession(trust_env=True) as session: + async with session.get( + authorize_url, + allow_redirects=False, + ssl=AIOHTTP_CLIENT_SESSION_SSL, + ) as resp: + if resp.status < 400: + return True + + body_text = await resp.text() + error = None + error_description = "" + content_type = resp.headers.get("content-type", "") + + if "application/json" in content_type: + try: + payload = json.loads(body_text) + error = payload.get("error") + error_description = payload.get( + "error_description", "" + ) + except json.JSONDecodeError: + error = None + error_description = "" + else: + error_description = body_text + + combined = f"{error or ''} {error_description}".lower() + if "invalid_client" in combined or "invalid client" in combined or "client id" in combined: + log.warning( + "OAuth client preflight detected invalid registration for %s: %s %s", + client_info.client_id, + error, + error_description, + ) + return False + except Exception as e: + log.debug( + "Skipping OAuth preflight network check for client %s: %s", + client_info.client_id, + e, + ) + + return True + + async def _re_register_client(self, request, client_id: str) -> bool: + idx, connection = self._find_mcp_connection(request, client_id) + if idx is None or connection is None: + log.warning( + "Unable to locate MCP tool server configuration for client %s during re-registration", + client_id, + ) + return False + + server_url = connection.get("url") + oauth_server_key = (connection.get("config") or {}).get("oauth_server_key") + + try: + oauth_client_info = ( + await get_oauth_client_info_with_dynamic_client_registration( + request, + client_id, + server_url, + oauth_server_key, + ) + ) + except Exception as e: + log.error( + "Dynamic client re-registration failed for %s: %s", + client_id, + e, + ) + return False + + encrypted_info = encrypt_data(oauth_client_info.model_dump(mode="json")) + + updated_connections = copy.deepcopy( + request.app.state.config.TOOL_SERVER_CONNECTIONS or [] + ) + if idx >= len(updated_connections): + log.error( + "MCP tool server index %s out of range during OAuth client re-registration for %s", + idx, + client_id, + ) + return False + + updated_connection = copy.deepcopy(connection) + updated_connection.setdefault("info", {}) + updated_connection["info"]["oauth_client_info"] = encrypted_info + updated_connections[idx] = updated_connection + + try: + request.app.state.config.TOOL_SERVER_CONNECTIONS = updated_connections + except Exception as e: + log.error( + "Failed to persist updated OAuth client info for %s: %s", + client_id, + e, + ) + return False + + self.remove_client(client_id) + self.add_client(client_id, oauth_client_info) + OAuthSessions.delete_sessions_by_provider(client_id) + + log.info("Re-registered OAuth client %s for MCP tool server", client_id) + return True + + async def _ensure_valid_client_registration( + self, request, client_id: str + ) -> None: + if not client_id.startswith("mcp:"): + return + + client = self.get_client(client_id) + client_info = self.get_client_info(client_id) + if client is None or client_info is None: + raise HTTPException(status.HTTP_404_NOT_FOUND) + + is_valid = await self._preflight_authorization_url(client, client_info) + if is_valid: + return + + log.info( + "Detected invalid OAuth client %s; attempting re-registration", + client_id, + ) + re_registered = await self._re_register_client(request, client_id) + if not re_registered: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to re-register OAuth client", + ) + + client = self.get_client(client_id) + client_info = self.get_client_info(client_id) + if client is None or client_info is None: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="OAuth client unavailable after re-registration", + ) + + if not await self._preflight_authorization_url(client, client_info): + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="OAuth client registration is still invalid after re-registration", + ) + def get_client(self, client_id): client = self.clients.get(client_id) return client["client"] if client else None @@ -602,10 +802,11 @@ class OAuthClientManager: return None async def handle_authorize(self, request, client_id: str) -> RedirectResponse: + await self._ensure_valid_client_registration(request, client_id) + client = self.get_client(client_id) if client is None: raise HTTPException(404) - client_info = self.get_client_info(client_id) if client_info is None: raise HTTPException(404) @@ -613,7 +814,8 @@ class OAuthClientManager: redirect_uri = ( client_info.redirect_uris[0] if client_info.redirect_uris else None ) - return await client.authorize_redirect(request, str(redirect_uri)) + redirect_uri_str = str(redirect_uri) if redirect_uri else None + return await client.authorize_redirect(request, redirect_uri_str) async def handle_callback(self, request, client_id: str, user_id: str, response): client = self.get_client(client_id) From f0bf0e3074ce1aa814b3c952b2eb70dec1664d69 Mon Sep 17 00:00:00 2001 From: Taylor Wilsdon Date: Sun, 19 Oct 2025 16:58:09 -0400 Subject: [PATCH 42/57] black fmt --- backend/open_webui/utils/oauth.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/backend/open_webui/utils/oauth.py b/backend/open_webui/utils/oauth.py index d24a379ede..34fb441679 100644 --- a/backend/open_webui/utils/oauth.py +++ b/backend/open_webui/utils/oauth.py @@ -486,9 +486,7 @@ class OAuthClientManager: try: payload = json.loads(body_text) error = payload.get("error") - error_description = payload.get( - "error_description", "" - ) + error_description = payload.get("error_description", "") except json.JSONDecodeError: error = None error_description = "" @@ -496,7 +494,11 @@ class OAuthClientManager: error_description = body_text combined = f"{error or ''} {error_description}".lower() - if "invalid_client" in combined or "invalid client" in combined or "client id" in combined: + if ( + "invalid_client" in combined + or "invalid client" in combined + or "client id" in combined + ): log.warning( "OAuth client preflight detected invalid registration for %s: %s %s", client_info.client_id, @@ -577,9 +579,7 @@ class OAuthClientManager: log.info("Re-registered OAuth client %s for MCP tool server", client_id) return True - async def _ensure_valid_client_registration( - self, request, client_id: str - ) -> None: + async def _ensure_valid_client_registration(self, request, client_id: str) -> None: if not client_id.startswith("mcp:"): return From ec9359e360437c01a28878ff2194f46e76a57e1c Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Mon, 27 Oct 2025 15:31:25 -0700 Subject: [PATCH 43/57] refac --- backend/open_webui/routers/configs.py | 62 +++++++-------------------- backend/open_webui/utils/oauth.py | 37 ++++++++-------- 2 files changed, 32 insertions(+), 67 deletions(-) diff --git a/backend/open_webui/routers/configs.py b/backend/open_webui/routers/configs.py index e8e876eac7..43ef73f29b 100644 --- a/backend/open_webui/routers/configs.py +++ b/backend/open_webui/routers/configs.py @@ -167,59 +167,27 @@ async def set_tool_servers_config( form_data: ToolServersConfigForm, user=Depends(get_admin_user), ): - old_connections = copy.deepcopy( - request.app.state.config.TOOL_SERVER_CONNECTIONS or [] - ) + mcp_server_ids = [ + conn.get("info", {}).get("id") + for conn in form_data.TOOL_SERVER_CONNECTIONS + if conn.get("type") == "mcp" + ] - new_connections = [ + for server_id in mcp_server_ids: + # Remove existing OAuth clients for MCP tool servers that are no longer present + client_key = f"mcp:{server_id}" + try: + request.app.state.oauth_client_manager.remove_client(client_key) + except: + pass + + # Set new tool server connections + request.app.state.config.TOOL_SERVER_CONNECTIONS = [ connection.model_dump() for connection in form_data.TOOL_SERVER_CONNECTIONS ] - old_mcp_connections = { - conn.get("info", {}).get("id"): conn - for conn in old_connections - if conn.get("type") == "mcp" - } - new_mcp_connections = { - conn.get("info", {}).get("id"): conn - for conn in new_connections - if conn.get("type") == "mcp" - } - - purge_oauth_clients = set() - - for server_id, old_conn in old_mcp_connections.items(): - if not server_id: - continue - - old_auth_type = old_conn.get("auth_type", "none") - new_conn = new_mcp_connections.get(server_id) - - if new_conn is None: - if old_auth_type == "oauth_2.1": - purge_oauth_clients.add(server_id) - continue - - new_auth_type = new_conn.get("auth_type", "none") - - if old_auth_type == "oauth_2.1": - if ( - new_auth_type != "oauth_2.1" - or old_conn.get("url") != new_conn.get("url") - or old_conn.get("info", {}).get("oauth_client_info") - != new_conn.get("info", {}).get("oauth_client_info") - ): - purge_oauth_clients.add(server_id) - - request.app.state.config.TOOL_SERVER_CONNECTIONS = new_connections - await set_tool_servers(request) - for server_id in purge_oauth_clients: - client_key = f"mcp:{server_id}" - request.app.state.oauth_client_manager.remove_client(client_key) - OAuthSessions.delete_sessions_by_provider(client_key) - for connection in request.app.state.config.TOOL_SERVER_CONNECTIONS: server_type = connection.get("type", "openapi") if server_type == "mcp": diff --git a/backend/open_webui/utils/oauth.py b/backend/open_webui/utils/oauth.py index 34fb441679..30939eb20a 100644 --- a/backend/open_webui/utils/oauth.py +++ b/backend/open_webui/utils/oauth.py @@ -153,32 +153,32 @@ def decrypt_data(data: str): raise -def _build_oauth_callback_error_message(exc: Exception) -> str: +def _build_oauth_callback_error_message(e: Exception) -> str: """ Produce a user-facing callback error string with actionable context. Keeps the message short and strips newlines for safe redirect usage. """ - if isinstance(exc, OAuth2Error): - parts = [p for p in [exc.error, exc.description] if p] + if isinstance(e, OAuth2Error): + parts = [p for p in [e.error, e.description] if p] detail = " - ".join(parts) - elif isinstance(exc, HTTPException): - detail = exc.detail if isinstance(exc.detail, str) else str(exc.detail) - elif isinstance(exc, aiohttp.ClientResponseError): - detail = f"Upstream provider returned {exc.status}: {exc.message}" - elif isinstance(exc, aiohttp.ClientError): - detail = str(exc) - elif isinstance(exc, KeyError): - missing = str(exc).strip("'") + elif isinstance(e, HTTPException): + detail = e.detail if isinstance(e.detail, str) else str(e.detail) + elif isinstance(e, aiohttp.ClientResponseError): + detail = f"Upstream provider returned {e.status}: {e.message}" + elif isinstance(e, aiohttp.ClientError): + detail = str(e) + elif isinstance(e, KeyError): + missing = str(e).strip("'") if missing.lower() == "state": detail = "Missing state parameter in callback (session may have expired)" else: detail = f"Missing expected key '{missing}' in OAuth response" else: - detail = str(exc) + detail = str(e) detail = detail.replace("\n", " ").strip() if not detail: - detail = exc.__class__.__name__ + detail = e.__class__.__name__ message = f"OAuth callback failed: {detail}" return message[:197] + "..." if len(message) > 200 else message @@ -402,20 +402,18 @@ class OAuthClientManager: return self.clients[client_id] def remove_client(self, client_id): - removed = False if client_id in self.clients: del self.clients[client_id] - removed = True + log.info(f"Removed OAuth client {client_id}") + if hasattr(self.oauth, "_clients"): if client_id in self.oauth._clients: self.oauth._clients.pop(client_id, None) - removed = True + if hasattr(self.oauth, "_registry"): if client_id in self.oauth._registry: self.oauth._registry.pop(client_id, None) - removed = True - if removed: - log.info(f"Removed OAuth client {client_id}") + return True def _find_mcp_connection(self, request, client_id: str): @@ -574,7 +572,6 @@ class OAuthClientManager: self.remove_client(client_id) self.add_client(client_id, oauth_client_info) - OAuthSessions.delete_sessions_by_provider(client_id) log.info("Re-registered OAuth client %s for MCP tool server", client_id) return True From c6cbb05b84483b25f55c01bef6b0339552707ff4 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Mon, 27 Oct 2025 15:38:59 -0700 Subject: [PATCH 44/57] refac --- backend/open_webui/main.py | 1 + backend/open_webui/routers/configs.py | 25 +++++++++++++------------ backend/open_webui/utils/oauth.py | 4 +++- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 1405a43061..da89fd7de4 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -1941,6 +1941,7 @@ if len(app.state.config.TOOL_SERVER_CONNECTIONS) > 0: if tool_server_connection.get("type", "openapi") == "mcp": server_id = tool_server_connection.get("info", {}).get("id") auth_type = tool_server_connection.get("auth_type", "none") + if server_id and auth_type == "oauth_2.1": oauth_client_info = tool_server_connection.get("info", {}).get( "oauth_client_info", "" diff --git a/backend/open_webui/routers/configs.py b/backend/open_webui/routers/configs.py index 43ef73f29b..5c08fded23 100644 --- a/backend/open_webui/routers/configs.py +++ b/backend/open_webui/routers/configs.py @@ -167,19 +167,19 @@ async def set_tool_servers_config( form_data: ToolServersConfigForm, user=Depends(get_admin_user), ): - mcp_server_ids = [ - conn.get("info", {}).get("id") - for conn in form_data.TOOL_SERVER_CONNECTIONS - if conn.get("type") == "mcp" - ] + for connection in request.app.state.config.TOOL_SERVER_CONNECTIONS: + server_type = connection.get("type", "openapi") + auth_type = connection.get("auth_type", "none") - for server_id in mcp_server_ids: - # Remove existing OAuth clients for MCP tool servers that are no longer present - client_key = f"mcp:{server_id}" - try: - request.app.state.oauth_client_manager.remove_client(client_key) - except: - pass + if auth_type == "oauth_2.1": + # Remove existing OAuth clients for tool servers + server_id = connection.get("info", {}).get("id") + client_key = f"{server_type}:{server_id}" + + try: + request.app.state.oauth_client_manager.remove_client(client_key) + except: + pass # Set new tool server connections request.app.state.config.TOOL_SERVER_CONNECTIONS = [ @@ -193,6 +193,7 @@ async def set_tool_servers_config( if server_type == "mcp": server_id = connection.get("info", {}).get("id") auth_type = connection.get("auth_type", "none") + if auth_type == "oauth_2.1" and server_id: try: oauth_client_info = connection.get("info", {}).get( diff --git a/backend/open_webui/utils/oauth.py b/backend/open_webui/utils/oauth.py index 30939eb20a..03f7337774 100644 --- a/backend/open_webui/utils/oauth.py +++ b/backend/open_webui/utils/oauth.py @@ -582,6 +582,7 @@ class OAuthClientManager: client = self.get_client(client_id) client_info = self.get_client_info(client_id) + if client is None or client_info is None: raise HTTPException(status.HTTP_404_NOT_FOUND) @@ -593,6 +594,7 @@ class OAuthClientManager: "Detected invalid OAuth client %s; attempting re-registration", client_id, ) + re_registered = await self._re_register_client(request, client_id) if not re_registered: raise HTTPException( @@ -799,7 +801,7 @@ class OAuthClientManager: return None async def handle_authorize(self, request, client_id: str) -> RedirectResponse: - await self._ensure_valid_client_registration(request, client_id) + # await self._ensure_valid_client_registration(request, client_id) client = self.get_client(client_id) if client is None: From db658a730c32dcdb804d34beba07459caeda53f0 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Mon, 27 Oct 2025 16:46:04 -0700 Subject: [PATCH 45/57] refac --- backend/open_webui/main.py | 97 ++++++++++++++++- backend/open_webui/utils/oauth.py | 175 ++++-------------------------- 2 files changed, 116 insertions(+), 156 deletions(-) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index da89fd7de4..14ee4dc870 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -482,9 +482,11 @@ from open_webui.utils.auth import ( ) from open_webui.utils.plugin import install_tool_and_function_dependencies from open_webui.utils.oauth import ( + get_oauth_client_info_with_dynamic_client_registration, + encrypt_data, + decrypt_data, OAuthManager, OAuthClientManager, - decrypt_data, OAuthClientInformationFull, ) from open_webui.utils.security_headers import SecurityHeadersMiddleware @@ -1987,6 +1989,64 @@ except Exception as e: ) +async def register_client(self, request, client_id: str) -> bool: + server_type, server_id = client_id.split(":", 1) + + connection = None + connection_idx = None + + for idx, conn in enumerate(request.app.state.config.TOOL_SERVER_CONNECTIONS or []): + if conn.get("type", "openapi") == server_type: + info = conn.get("info", {}) + if info.get("id") == server_id: + connection = conn + connection_idx = idx + break + + if connection is None or connection_idx is None: + log.warning( + f"Unable to locate MCP tool server configuration for client {client_id} during re-registration" + ) + return False + + server_url = connection.get("url") + oauth_server_key = (connection.get("config") or {}).get("oauth_server_key") + + try: + oauth_client_info = ( + await get_oauth_client_info_with_dynamic_client_registration( + request, + client_id, + server_url, + oauth_server_key, + ) + ) + except Exception as e: + log.error(f"Dynamic client re-registration failed for {client_id}: {e}") + return False + + try: + request.app.state.config.TOOL_SERVER_CONNECTIONS[connection_idx] = { + **connection, + "info": { + **connection.get("info", {}), + "oauth_client_info": encrypt_data( + oauth_client_info.model_dump(mode="json") + ), + }, + } + except Exception as e: + log.error( + f"Failed to persist updated OAuth client info for tool server {client_id}: {e}" + ) + return False + + oauth_client_manager.remove_client(client_id) + oauth_client_manager.add_client(client_id, oauth_client_info) + log.info(f"Re-registered OAuth client {client_id} for tool server") + return True + + @app.get("/oauth/clients/{client_id}/authorize") async def oauth_client_authorize( client_id: str, @@ -1994,6 +2054,41 @@ async def oauth_client_authorize( response: Response, user=Depends(get_verified_user), ): + # ensure_valid_client_registration + client = oauth_client_manager.get_client(client_id) + client_info = oauth_client_manager.get_client_info(client_id) + if client is None or client_info is None: + raise HTTPException(status.HTTP_404_NOT_FOUND) + + if not await oauth_client_manager._preflight_authorization_url(client, client_info): + log.info( + "Detected invalid OAuth client %s; attempting re-registration", + client_id, + ) + + re_registered = await register_client(request, client_id) + if not re_registered: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Failed to re-register OAuth client", + ) + + client = oauth_client_manager.get_client(client_id) + client_info = oauth_client_manager.get_client_info(client_id) + if client is None or client_info is None: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="OAuth client unavailable after re-registration", + ) + + if not await oauth_client_manager._preflight_authorization_url( + client, client_info + ): + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="OAuth client registration is still invalid after re-registration", + ) + return await oauth_client_manager.handle_authorize(request, client_id=client_id) diff --git a/backend/open_webui/utils/oauth.py b/backend/open_webui/utils/oauth.py index 03f7337774..6889f377bc 100644 --- a/backend/open_webui/utils/oauth.py +++ b/backend/open_webui/utils/oauth.py @@ -416,34 +416,10 @@ class OAuthClientManager: return True - def _find_mcp_connection(self, request, client_id: str): - try: - connections = request.app.state.config.TOOL_SERVER_CONNECTIONS or [] - except Exception: - connections = [] - - normalized_client_id = client_id.split(":")[-1] - - for idx, connection in enumerate(connections): - if not isinstance(connection, dict): - continue - if connection.get("type") != "mcp": - continue - - info = connection.get("info") or {} - server_id = info.get("id") - if not server_id: - continue - - normalized_server_id = server_id.split(":")[-1] - if normalized_server_id == normalized_client_id: - return idx, connection - - return None, None - async def _preflight_authorization_url( self, client, client_info: OAuthClientInformationFull ) -> bool: + # TODO: Replace this logic with a more robust OAuth client registration validation # Only perform preflight checks for Starlette OAuth clients if not hasattr(client, "create_authorization_url"): return True @@ -454,168 +430,59 @@ class OAuthClientManager: try: auth_data = await client.create_authorization_url(redirect_uri=redirect_uri) - authorize_url = auth_data.get("url") - if not authorize_url: + authorization_url = auth_data.get("url") + + if not authorization_url: return True except Exception as e: log.debug( - "Skipping OAuth preflight for client %s: %s", - client_info.client_id, - e, + f"Skipping OAuth preflight for client {client_info.client_id}: {e}", ) return True try: async with aiohttp.ClientSession(trust_env=True) as session: async with session.get( - authorize_url, + authorization_url, allow_redirects=False, ssl=AIOHTTP_CLIENT_SESSION_SSL, ) as resp: if resp.status < 400: return True + response_text = await resp.text() - body_text = await resp.text() error = None error_description = "" - content_type = resp.headers.get("content-type", "") + content_type = resp.headers.get("content-type", "") if "application/json" in content_type: try: - payload = json.loads(body_text) + payload = json.loads(response_text) error = payload.get("error") error_description = payload.get("error_description", "") - except json.JSONDecodeError: - error = None - error_description = "" + except: + pass else: - error_description = body_text + error_description = response_text - combined = f"{error or ''} {error_description}".lower() - if ( - "invalid_client" in combined - or "invalid client" in combined - or "client id" in combined + error_message = f"{error or ''} {error_description or ''}".lower() + + if any( + keyword in error_message + for keyword in ("invalid_client", "invalid client", "client id") ): log.warning( - "OAuth client preflight detected invalid registration for %s: %s %s", - client_info.client_id, - error, - error_description, + f"OAuth client preflight detected invalid registration for {client_info.client_id}: {error} {error_description}" ) + return False except Exception as e: log.debug( - "Skipping OAuth preflight network check for client %s: %s", - client_info.client_id, - e, + f"Skipping OAuth preflight network check for client {client_info.client_id}: {e}" ) return True - async def _re_register_client(self, request, client_id: str) -> bool: - idx, connection = self._find_mcp_connection(request, client_id) - if idx is None or connection is None: - log.warning( - "Unable to locate MCP tool server configuration for client %s during re-registration", - client_id, - ) - return False - - server_url = connection.get("url") - oauth_server_key = (connection.get("config") or {}).get("oauth_server_key") - - try: - oauth_client_info = ( - await get_oauth_client_info_with_dynamic_client_registration( - request, - client_id, - server_url, - oauth_server_key, - ) - ) - except Exception as e: - log.error( - "Dynamic client re-registration failed for %s: %s", - client_id, - e, - ) - return False - - encrypted_info = encrypt_data(oauth_client_info.model_dump(mode="json")) - - updated_connections = copy.deepcopy( - request.app.state.config.TOOL_SERVER_CONNECTIONS or [] - ) - if idx >= len(updated_connections): - log.error( - "MCP tool server index %s out of range during OAuth client re-registration for %s", - idx, - client_id, - ) - return False - - updated_connection = copy.deepcopy(connection) - updated_connection.setdefault("info", {}) - updated_connection["info"]["oauth_client_info"] = encrypted_info - updated_connections[idx] = updated_connection - - try: - request.app.state.config.TOOL_SERVER_CONNECTIONS = updated_connections - except Exception as e: - log.error( - "Failed to persist updated OAuth client info for %s: %s", - client_id, - e, - ) - return False - - self.remove_client(client_id) - self.add_client(client_id, oauth_client_info) - - log.info("Re-registered OAuth client %s for MCP tool server", client_id) - return True - - async def _ensure_valid_client_registration(self, request, client_id: str) -> None: - if not client_id.startswith("mcp:"): - return - - client = self.get_client(client_id) - client_info = self.get_client_info(client_id) - - if client is None or client_info is None: - raise HTTPException(status.HTTP_404_NOT_FOUND) - - is_valid = await self._preflight_authorization_url(client, client_info) - if is_valid: - return - - log.info( - "Detected invalid OAuth client %s; attempting re-registration", - client_id, - ) - - re_registered = await self._re_register_client(request, client_id) - if not re_registered: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Failed to re-register OAuth client", - ) - - client = self.get_client(client_id) - client_info = self.get_client_info(client_id) - if client is None or client_info is None: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="OAuth client unavailable after re-registration", - ) - - if not await self._preflight_authorization_url(client, client_info): - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="OAuth client registration is still invalid after re-registration", - ) - def get_client(self, client_id): client = self.clients.get(client_id) return client["client"] if client else None @@ -801,8 +668,6 @@ class OAuthClientManager: return None async def handle_authorize(self, request, client_id: str) -> RedirectResponse: - # await self._ensure_valid_client_registration(request, client_id) - client = self.get_client(client_id) if client is None: raise HTTPException(404) From 21ca649ee4a3c986650a38d9ae074638b1a2a28f Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Mon, 27 Oct 2025 16:46:49 -0700 Subject: [PATCH 46/57] refac --- backend/open_webui/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 14ee4dc870..105fc5c337 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -2066,8 +2066,8 @@ async def oauth_client_authorize( client_id, ) - re_registered = await register_client(request, client_id) - if not re_registered: + registered = await register_client(request, client_id) + if not registered: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to re-register OAuth client", From 5400e72fccce6b91c183968310af5a1f97dea777 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Mon, 27 Oct 2025 23:42:00 -0700 Subject: [PATCH 47/57] refac --- .../chat/Messages/Markdown/MarkdownInlineTokens.svelte | 2 +- .../Messages/Markdown/MarkdownInlineTokens/TextToken.svelte | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/components/chat/Messages/Markdown/MarkdownInlineTokens.svelte b/src/lib/components/chat/Messages/Markdown/MarkdownInlineTokens.svelte index 4abcbd3246..85ba8740c1 100644 --- a/src/lib/components/chat/Messages/Markdown/MarkdownInlineTokens.svelte +++ b/src/lib/components/chat/Messages/Markdown/MarkdownInlineTokens.svelte @@ -24,7 +24,7 @@ export let onSourceClick: Function = () => {}; -{#each tokens as token} +{#each tokens as token, tokenIdx (tokenIdx)} {#if token.type === 'escape'} {unescapeHtml(token.text)} {:else if token.type === 'html'} diff --git a/src/lib/components/chat/Messages/Markdown/MarkdownInlineTokens/TextToken.svelte b/src/lib/components/chat/Messages/Markdown/MarkdownInlineTokens/TextToken.svelte index d5ae387afe..e7efd10064 100644 --- a/src/lib/components/chat/Messages/Markdown/MarkdownInlineTokens/TextToken.svelte +++ b/src/lib/components/chat/Messages/Markdown/MarkdownInlineTokens/TextToken.svelte @@ -13,7 +13,7 @@ {:else} {#each texts as text} - {text} + {text}{' '} {/each} {/if} From 4445c91d6da8740fa71adec8d03e647d3f34ee7e Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Mon, 27 Oct 2025 23:49:26 -0700 Subject: [PATCH 48/57] refac --- src/lib/components/workspace/Prompts.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/workspace/Prompts.svelte b/src/lib/components/workspace/Prompts.svelte index 079ce85bdb..8cac0d5298 100644 --- a/src/lib/components/workspace/Prompts.svelte +++ b/src/lib/components/workspace/Prompts.svelte @@ -33,7 +33,7 @@ let promptsImportInputElement: HTMLInputElement; let loaded = false; - let importFiles = ''; + let importFiles = null; let query = ''; let prompts = []; From 84daae984d94ac8600f3e4ad0c191654e7e030a8 Mon Sep 17 00:00:00 2001 From: Wang Weixuan Date: Tue, 28 Oct 2025 04:58:00 +0800 Subject: [PATCH 49/57] fix: use trusted env in web search loader Signed-off-by: Wang Weixuan --- backend/open_webui/retrieval/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 08dcde34da..da570330b3 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -71,6 +71,7 @@ def get_loader(request, url: str): url, verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, requests_per_second=request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS, + trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV, ) From 5732ee70219b928921e878b6e5a7a8ee79e881f0 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 28 Oct 2025 00:06:52 -0700 Subject: [PATCH 50/57] enh: ELEVENLABS_API_BASE_URL env var --- backend/open_webui/config.py | 4 ++++ backend/open_webui/routers/audio.py | 9 +++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index c794065974..d0a76e0238 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -3343,6 +3343,10 @@ DEEPGRAM_API_KEY = PersistentConfig( os.getenv("DEEPGRAM_API_KEY", ""), ) +# ElevenLabs configuration +ELEVENLABS_API_BASE_URL = os.getenv( + "ELEVENLABS_API_BASE_URL", "https://api.elevenlabs.io" +) AUDIO_STT_OPENAI_API_BASE_URL = PersistentConfig( "AUDIO_STT_OPENAI_API_BASE_URL", diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py index cb7a57b5b7..1213ffbd05 100644 --- a/backend/open_webui/routers/audio.py +++ b/backend/open_webui/routers/audio.py @@ -39,13 +39,14 @@ from open_webui.config import ( WHISPER_MODEL_DIR, CACHE_DIR, WHISPER_LANGUAGE, + ELEVENLABS_API_BASE_URL, ) from open_webui.constants import ERROR_MESSAGES from open_webui.env import ( + ENV, AIOHTTP_CLIENT_SESSION_SSL, AIOHTTP_CLIENT_TIMEOUT, - ENV, SRC_LOG_LEVELS, DEVICE_TYPE, ENABLE_FORWARD_USER_INFO_HEADERS, @@ -413,7 +414,7 @@ async def speech(request: Request, user=Depends(get_verified_user)): timeout=timeout, trust_env=True ) as session: async with session.post( - f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}", + f"{ELEVENLABS_API_BASE_URL}/v1/text-to-speech/{voice_id}", json={ "text": payload["input"], "model_id": request.app.state.config.TTS_MODEL, @@ -1037,7 +1038,7 @@ def get_available_models(request: Request) -> list[dict]: elif request.app.state.config.TTS_ENGINE == "elevenlabs": try: response = requests.get( - "https://api.elevenlabs.io/v1/models", + f"{ELEVENLABS_API_BASE_URL}/v1/models", headers={ "xi-api-key": request.app.state.config.TTS_API_KEY, "Content-Type": "application/json", @@ -1141,7 +1142,7 @@ def get_elevenlabs_voices(api_key: str) -> dict: try: # TODO: Add retries response = requests.get( - "https://api.elevenlabs.io/v1/voices", + f"{ELEVENLABS_API_BASE_URL}/v1/voices", headers={ "xi-api-key": api_key, "Content-Type": "application/json", From 8dce54f3fc3f35ad8fc30433a7076c75c1135541 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 28 Oct 2025 00:34:53 -0700 Subject: [PATCH 51/57] refac/fix: kb image upload handling --- backend/open_webui/routers/files.py | 4 ++++ src/lib/apis/files/index.ts | 4 ++++ .../workspace/Knowledge/KnowledgeBase.svelte | 19 ++++++++++--------- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index 84d8f841cf..2a5c3e5bb1 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -115,6 +115,10 @@ def process_uploaded_file(request, file, file_path, file_item, file_metadata, us request.app.state.config.CONTENT_EXTRACTION_ENGINE == "external" ): process_file(request, ProcessFileForm(file_id=file_item.id), user=user) + else: + raise Exception( + f"File type {file.content_type} is not supported for processing" + ) else: log.info( f"File type {file.content_type} is not provided, but trying to process anyway" diff --git a/src/lib/apis/files/index.ts b/src/lib/apis/files/index.ts index 6a1763edb8..8351393e3c 100644 --- a/src/lib/apis/files/index.ts +++ b/src/lib/apis/files/index.ts @@ -63,6 +63,10 @@ export const uploadFile = async (token: string, file: File, metadata?: object | console.error(data.error); res.error = data.error; } + + if (res?.data) { + res.data = data; + } } } } diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte index 7be697c96f..86ed49e420 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte @@ -185,12 +185,6 @@ if (uploadedFile) { console.log(uploadedFile); - - if (uploadedFile.error) { - console.warn('File upload warning:', uploadedFile.error); - toast.warning(uploadedFile.error); - } - knowledge.files = knowledge.files.map((item) => { if (item.itemId === tempItemId) { item.id = uploadedFile.id; @@ -200,10 +194,17 @@ delete item.itemId; return item; }); - if (syncMode) { - await syncFileHandler(uploadedFile.id); + + if (uploadedFile.error) { + console.warn('File upload warning:', uploadedFile.error); + toast.warning(uploadedFile.error); + knowledge.files = knowledge.files.filter((file) => file.id !== uploadedFile.id); } else { - await addFileHandler(uploadedFile.id); + if (syncMode) { + await syncFileHandler(uploadedFile.id); + } else { + await addFileHandler(uploadedFile.id); + } } } else { toast.error($i18n.t('Failed to upload file.')); From 23634d43737fa2840fec7ec21c85d99741b8fa4c Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Thu, 30 Oct 2025 12:53:01 +0200 Subject: [PATCH 52/57] Update Directory sync process in Knowledge --- src/lib/apis/knowledge/index.ts | 12 +++++-- .../workspace/Knowledge/KnowledgeBase.svelte | 36 ++++++++++++++++++- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts index f436cb86bb..dfbd671413 100644 --- a/src/lib/apis/knowledge/index.ts +++ b/src/lib/apis/knowledge/index.ts @@ -246,6 +246,7 @@ export const syncFileToKnowledgeById = async (token: string, id: string, fileId: return res; }; + export const updateFileFromKnowledgeById = async (token: string, id: string, fileId: string) => { let error = null; @@ -281,10 +282,17 @@ export const updateFileFromKnowledgeById = async (token: string, id: string, fil return res; }; -export const removeFileFromKnowledgeById = async (token: string, id: string, fileId: string) => { +export const removeFileFromKnowledgeById = async ( + token: string, + id: string, + fileId: string, + deleteFile: boolean = true +) => { let error = null; - const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/remove`, { + const res = await fetch( + `${WEBUI_API_BASE_URL}/knowledge/${id}/file/remove?delete_file=${deleteFile}`, + { method: 'POST', headers: { Accept: 'application/json', diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte index 86ed49e420..508428cd7d 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte @@ -79,6 +79,7 @@ let inputFiles = null; let syncMode = false; + let syncCollectedNames: Set = new Set(); let filteredItems = []; $: if (knowledge && knowledge.files) { @@ -129,6 +130,16 @@ const uploadFileHandler = async (file) => { console.log(file); + // When syncing a directory, remember each file's relative name used on upload. + if (syncMode) { + try { + // Track only base names to match server-side storage + const baseName = file.name?.split(/[\\/]/).pop() ?? file.name; + syncCollectedNames.add(baseName); + } catch (_) { + // no-op + } + } const tempItemId = uuidv4(); const fileItem = { @@ -389,8 +400,31 @@ // Helper function to maintain file paths within zip const syncDirectoryHandler = async () => { syncMode = true; + syncCollectedNames = new Set(); try { await uploadDirectoryHandler(); + + // After uploading and per-file syncs, remove KB files that are not present in the directory + const dirNames = new Set(Array.from(syncCollectedNames)); + const currentFiles = knowledge?.files ?? []; + const toRemove = currentFiles.filter((f) => !dirNames.has(f?.meta?.name ?? f?.filename)); + + for (const f of toRemove) { + // First remove from knowledge (and KB vectors) but keep file record + const updated = await removeFileFromKnowledgeById(localStorage.token, id, f.id, false).catch((e) => { + toast.error(`${e}`); + return null; + }); + if (updated) { + knowledge = updated; + } + + // Then delete the actual file (removes per-file vectors and storage) + await deleteFileById(localStorage.token, f.id).catch((e) => { + console.error(e); + }); + } + toast.success($i18n.t('Directory sync completed.')); } finally { syncMode = false; @@ -652,7 +686,7 @@ { syncDirectoryHandler(); From 4af97da3f7d612749b64b9666ecf5b9fd27c6742 Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Tue, 25 Nov 2025 12:21:10 +0200 Subject: [PATCH 53/57] Console Hygiene and fix Serial Blocking Logic --- .../workspace/Knowledge/KnowledgeBase.svelte | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte index f6089bc00e..1d856b0b99 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte @@ -124,7 +124,6 @@ const blob = new Blob([content], { type: 'text/plain' }); const file = blobToFile(blob, `${name}.txt`); - console.log(file); return file; }; @@ -408,20 +407,27 @@ const currentFiles = knowledge?.files ?? []; const toRemove = currentFiles.filter((f) => !dirNames.has(f?.meta?.name ?? f?.filename)); - for (const f of toRemove) { - // First remove from knowledge (and KB vectors) but keep file record - const updated = await removeFileFromKnowledgeById(localStorage.token, id, f.id, false).catch((e) => { - toast.error(`${e}`); - return null; - }); - if (updated) { - knowledge = updated; - } + await Promise.all( + toRemove.map(async (f) => { + // First remove from knowledge (and KB vectors) but keep file record + await removeFileFromKnowledgeById(localStorage.token, id, f.id, false).catch((e) => { + toast.error(`${e}`); + return null; + }); + // Then delete the actual file (removes per-file vectors and storage) + await deleteFileById(localStorage.token, f.id).catch((e) => { + console.error(e); + }); + }) + ); - // Then delete the actual file (removes per-file vectors and storage) - await deleteFileById(localStorage.token, f.id).catch((e) => { - console.error(e); - }); + // Refresh knowledge to ensure consistent state after concurrent operations + const refreshed = await getKnowledgeById(localStorage.token, id).catch((e) => { + toast.error(`${e}`); + return null; + }); + if (refreshed) { + knowledge = refreshed; } toast.success($i18n.t('Directory sync completed.')); @@ -466,7 +472,6 @@ const deleteFileHandler = async (fileId) => { try { - console.log('Starting file deletion process for:', fileId); // Remove from knowledge base only const updatedKnowledge = await removeFileFromKnowledgeById(localStorage.token, id, fileId); From f888904cb8d27ea0f6a1da19a4d2e7c0e06e628a Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Tue, 25 Nov 2025 13:36:35 +0200 Subject: [PATCH 54/57] Create file batch sync endpoint --- backend/open_webui/routers/knowledge.py | 59 +++++ backend/open_webui/utils/knowledge_sync.py | 225 ++++++++++++++++++ src/lib/apis/knowledge/index.ts | 31 +++ .../workspace/Knowledge/KnowledgeBase.svelte | 18 +- 4 files changed, 330 insertions(+), 3 deletions(-) create mode 100644 backend/open_webui/utils/knowledge_sync.py diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 6c08c9b0bc..6b9da553e9 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -18,6 +18,7 @@ from open_webui.routers.retrieval import ( BatchProcessFilesForm, ) from open_webui.storage.provider import Storage +from open_webui.utils.knowledge_sync import sync_files_to_knowledge from open_webui.constants import ERROR_MESSAGES from open_webui.utils.auth import get_verified_user @@ -355,6 +356,9 @@ async def update_knowledge_by_id( class KnowledgeFileIdForm(BaseModel): file_id: str +class KnowledgeFileIdsForm(BaseModel): + file_ids: List[str] + @router.post("/{id}/file/add", response_model=Optional[KnowledgeFilesResponse]) def add_file_to_knowledge_by_id( @@ -615,6 +619,61 @@ def sync_file_to_knowledge_by_id( ) +@router.post("/{id}/file/sync/batch", response_model=Optional[KnowledgeFilesResponse]) +def sync_files_to_knowledge_batch( + request: Request, + id: str, + form_data: KnowledgeFileIdsForm, + user=Depends(get_verified_user), +): + """ + Batch sync multiple files into a knowledge base. + Performing a single atomic update of the knowledge.data.file_ids. + """ + knowledge = Knowledges.get_knowledge_by_id(id=id) + if not knowledge: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + if ( + knowledge.user_id != user.id + and not has_access(user.id, "write", knowledge.access_control) + and user.role != "admin" + ): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, + ) + + try: + updated_knowledge, files_meta, warnings = sync_files_to_knowledge( + request=request, + knowledge_id=id, + new_file_ids=form_data.file_ids, + user=user, + ) + except HTTPException as e: + raise e + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + + if warnings: + return KnowledgeFilesResponse( + **updated_knowledge.model_dump(), + files=files_meta, + warnings=warnings, + ) + + return KnowledgeFilesResponse( + **updated_knowledge.model_dump(), + files=files_meta, + ) + @router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse]) def update_file_from_knowledge_by_id( request: Request, diff --git a/backend/open_webui/utils/knowledge_sync.py b/backend/open_webui/utils/knowledge_sync.py new file mode 100644 index 0000000000..7435dddb4b --- /dev/null +++ b/backend/open_webui/utils/knowledge_sync.py @@ -0,0 +1,225 @@ +import logging +import time +from typing import Optional + +from fastapi import HTTPException, Request, status + +from open_webui.internal.db import get_db +from open_webui.models.knowledge import Knowledge, Knowledges, KnowledgeModel +from open_webui.models.files import FileModel, FileMetadataResponse, Files +from open_webui.retrieval.vector.factory import VECTOR_DB_CLIENT +from open_webui.routers.retrieval import ( + process_file, + ProcessFileForm, + process_files_batch, + BatchProcessFilesForm, +) +from open_webui.storage.provider import Storage + +log = logging.getLogger(__name__) + + +def _update_knowledge_file_ids_atomic( + knowledge_id: str, remove_ids: set[str], add_ids: set[str] +) -> KnowledgeModel: + """ + Lock the knowledge row and atomically update file_ids by removing and adding + the provided sets. Prevents lost updates under concurrency. + """ + with get_db() as db: + row = ( + db.query(Knowledge) + .with_for_update() # row-level lock + .filter_by(id=knowledge_id) + .first() + ) + if not row: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail="Knowledge not found" + ) + + data = dict(row.data or {}) + current_ids = list(data.get("file_ids", [])) + new_set = set(current_ids) + if remove_ids: + new_set.difference_update(remove_ids) + if add_ids: + new_set.update(add_ids) + + data["file_ids"] = list(new_set) + + db.query(Knowledge).filter_by(id=knowledge_id).update( + {"data": data, "updated_at": int(time.time())} + ) + db.commit() + + # Return fresh model after commit + return Knowledges.get_knowledge_by_id(knowledge_id) + + +def sync_files_to_knowledge( + request: Request, knowledge_id: str, new_file_ids: list[str], user +) -> tuple[KnowledgeModel, list[FileMetadataResponse], Optional[dict]]: + """ + Batch sync a list of uploaded files into a knowledge base, handling: + - skip if same-named file with identical hash already present + - replace if same-named file with different hash exists + - add if no same-named file exists + + Steps: + 1) Ensure each incoming file is processed to compute hash/content. + 2) Compute skip/replace/add sets based on filename + hash comparison. + 3) Cleanup (vectors, storage, db) for skipped new files and replaced old files. + 4) Batch process embeddings for new additions (add + replace targets). + 5) Atomically update knowledge.data.file_ids under a row lock. + + Returns: (updated_knowledge_model, files_metadata, optional_warnings) + """ + knowledge = Knowledges.get_knowledge_by_id(id=knowledge_id) + if not knowledge: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail="Knowledge not found" + ) + + # Deduplicate incoming list by preserving order + seen: set[str] = set() + incoming_ids: list[str] = [] + for fid in new_file_ids: + if fid not in seen: + seen.add(fid) + incoming_ids.append(fid) + + existing_ids = (knowledge.data or {}).get("file_ids", []) + existing_files: list[FileModel] = ( + Files.get_files_by_ids(existing_ids) if existing_ids else [] + ) + + # Build lookup by filename for existing KB files + existing_by_name: dict[str, FileModel] = {} + for f in existing_files: + if f and f.filename: + existing_by_name[f.filename] = f + + to_skip_new_ids: set[str] = set() # identical by hash -> delete uploaded + to_replace_old_to_new: dict[str, str] = {} # old_id -> new_id + to_add_ids: set[str] = set() + + errors: list[str] = [] + + # Ensure each incoming file is processed enough to have hash/content + for fid in incoming_ids: + new_file = Files.get_file_by_id(fid) + if not new_file: + errors.append(f"File {fid} not found") + continue + + if not (new_file.hash and new_file.data and new_file.data.get("content")): + try: + # Process without specifying collection to generate content/hash + process_file(request, ProcessFileForm(file_id=new_file.id), user=user) + new_file = Files.get_file_by_id(new_file.id) # refresh + except Exception as e: + log.debug(e) + errors.append(f"Failed to process file {new_file.id}: {e}") + continue + + same_name_file = existing_by_name.get(new_file.filename) + + if same_name_file: + # If hashes match, skip (discard the new upload) + if ( + same_name_file.hash + and new_file.hash + and same_name_file.hash == new_file.hash + ): + to_skip_new_ids.add(new_file.id) + else: + # Hash differs -> replace old with new + to_replace_old_to_new[same_name_file.id] = new_file.id + else: + # No existing file with same name -> add + to_add_ids.add(new_file.id) + + # Clean up skipped new files (remove their own vectors/collections, storage, db) + for new_id in list(to_skip_new_ids): + try: + try: + VECTOR_DB_CLIENT.delete_collection(collection_name=f"file-{new_id}") + except Exception as ve: + log.debug(ve) + new_file = Files.get_file_by_id(new_id) + if new_file and new_file.path: + try: + Storage.delete_file(new_file.path) + except Exception as se: + log.debug(se) + Files.delete_file_by_id(new_id) + except Exception as e: + log.debug(e) + errors.append(f"Failed cleanup for skipped file {new_id}: {e}") + + # For replacements, remove old file's embeddings, collections, storage, and db record + for old_id, new_id in list(to_replace_old_to_new.items()): + try: + try: + VECTOR_DB_CLIENT.delete( + collection_name=knowledge_id, filter={"file_id": old_id} + ) + except Exception as ve: + log.debug(ve) + try: + if VECTOR_DB_CLIENT.has_collection( + collection_name=f"file-{old_id}" + ): + VECTOR_DB_CLIENT.delete_collection( + collection_name=f"file-{old_id}" + ) + except Exception as ce: + log.debug(ce) + + old_file = Files.get_file_by_id(old_id) + if old_file and old_file.path: + try: + Storage.delete_file(old_file.path) + except Exception as se: + log.debug(se) + Files.delete_file_by_id(old_id) + except Exception as e: + log.debug(e) + errors.append(f"Failed replace cleanup for old file {old_id}: {e}") + + # Process embeddings for additions (to_add + replace targets) into KB collection + add_targets: set[str] = set(to_add_ids) | set(to_replace_old_to_new.values()) + if add_targets: + add_files: list[FileModel] = Files.get_files_by_ids(list(add_targets)) + try: + process_files_batch( + request=request, + form_data=BatchProcessFilesForm( + files=add_files, collection_name=knowledge_id + ), + user=user, + ) + except Exception as e: + log.error(f"Batch processing failed: {e}") + errors.append(f"Batch processing failed: {e}") + + # Atomically update knowledge.data.file_ids under lock + updated_knowledge = _update_knowledge_file_ids_atomic( + knowledge_id=knowledge_id, + remove_ids=set(to_replace_old_to_new.keys()), + add_ids=add_targets, + ) + + # Prepare response files + final_ids = (updated_knowledge.data or {}).get("file_ids", []) + files_meta: list[FileMetadataResponse] = Files.get_file_metadatas_by_ids(final_ids) + + warnings = None + if errors: + warnings = { + "message": "Some sync operations encountered errors", + "errors": errors, + } + + return updated_knowledge, files_meta, warnings diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts index dfbd671413..8a470b1a87 100644 --- a/src/lib/apis/knowledge/index.ts +++ b/src/lib/apis/knowledge/index.ts @@ -388,6 +388,37 @@ export const deleteKnowledgeById = async (token: string, id: string) => { return res; }; +export const syncFilesToKnowledgeByIdBatch = async (token: string, id: string, fileIds: string[]) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/sync/batch`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + file_ids: fileIds + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.error(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const reindexKnowledgeFiles = async (token: string) => { let error = null; diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte index 1d856b0b99..1f40b4aa3b 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte @@ -31,7 +31,8 @@ removeFileFromKnowledgeById, updateFileFromKnowledgeById, updateKnowledgeById, - syncFileToKnowledgeById + syncFileToKnowledgeById, + syncFilesToKnowledgeByIdBatch } from '$lib/apis/knowledge'; import { blobToFile } from '$lib/utils'; @@ -80,6 +81,7 @@ let inputFiles = null; let syncMode = false; let syncCollectedNames: Set = new Set(); + let syncCollectedIds: string[] = []; let filteredItems = []; $: if (knowledge && knowledge.files) { @@ -210,7 +212,7 @@ knowledge.files = knowledge.files.filter((file) => file.id !== uploadedFile.id); } else { if (syncMode) { - await syncFileHandler(uploadedFile.id); + syncCollectedIds.push(uploadedFile.id); } else { await addFileHandler(uploadedFile.id); } @@ -399,10 +401,20 @@ const syncDirectoryHandler = async () => { syncMode = true; syncCollectedNames = new Set(); + syncCollectedIds = []; try { await uploadDirectoryHandler(); - // After uploading and per-file syncs, remove KB files that are not present in the directory + // After uploading, sync all new/updated files in one batch + const batchRes = await syncFilesToKnowledgeByIdBatch(localStorage.token, id, syncCollectedIds).catch((e) => { + toast.error(`${e}`); + return null; + }); + if (batchRes) { + knowledge = batchRes; + } + + // After batch sync, remove KB files that are not present in the directory const dirNames = new Set(Array.from(syncCollectedNames)); const currentFiles = knowledge?.files ?? []; const toRemove = currentFiles.filter((f) => !dirNames.has(f?.meta?.name ?? f?.filename)); From a940f3e10d4dd194ea640d12fcbd569c8115fa0a Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Tue, 25 Nov 2025 15:50:43 +0200 Subject: [PATCH 55/57] Remove single file sync endpoint --- backend/open_webui/routers/knowledge.py | 171 ------------------ src/lib/apis/knowledge/index.ts | 33 ---- .../workspace/Knowledge/KnowledgeBase.svelte | 15 +- 3 files changed, 1 insertion(+), 218 deletions(-) diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 6b9da553e9..4699394c8f 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -445,178 +445,7 @@ def add_file_to_knowledge_by_id( ) -@router.post("/{id}/file/sync", response_model=Optional[KnowledgeFilesResponse]) -def sync_file_to_knowledge_by_id( - request: Request, - id: str, - form_data: KnowledgeFileIdForm, - user=Depends(get_verified_user), -): - """ - Sync a single file into a knowledge base by filename with hash comparison: - - If a file with the same name exists and hashes match: skip (discard the new upload). - - If a file with the same name exists and hashes differ: replace old with new. - - If no same-named file exists: add new. - """ - log.info(f"[KB Sync] start kb_id={id} file_id={form_data.file_id}") - knowledge = Knowledges.get_knowledge_by_id(id=id) - if not knowledge: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.NOT_FOUND, - ) - - if ( - knowledge.user_id != user.id - and not has_access(user.id, "write", knowledge.access_control) - and user.role != "admin" - ): - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.ACCESS_PROHIBITED, - ) - - new_file = Files.get_file_by_id(form_data.file_id) - if not new_file: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.NOT_FOUND, - ) - - # Ensure the new file is processed so that hash/content exist - if not (new_file.hash and new_file.data and new_file.data.get("content")): - try: - process_file( - request, - ProcessFileForm(file_id=form_data.file_id), - user=user, - ) - new_file = Files.get_file_by_id(form_data.file_id) - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=str(e), - ) - - data = knowledge.data or {} - file_ids = data.get("file_ids", []) - - existing_files = Files.get_files_by_ids(file_ids) if file_ids else [] - same_name_file = next( - (f for f in existing_files if f.filename == new_file.filename), None - ) - - if same_name_file: - # If hashes match, skip (discard the new upload) and keep existing - if ( - same_name_file.hash - and new_file.hash - and same_name_file.hash == new_file.hash - ): - try: - # Cleanup new file's vector collection if exists - try: - VECTOR_DB_CLIENT.delete_collection( - collection_name=f"file-{new_file.id}" - ) - except Exception as e: - log.debug(e) - try: - if new_file.path: - Storage.delete_file(new_file.path) - except Exception as e: - log.debug(e) - Files.delete_file_by_id(new_file.id) - except Exception as e: - log.debug(e) - - log.info(f"[KB Sync] skip (hash match) kb_id={id} name={new_file.filename}") - files = Files.get_file_metadatas_by_ids(file_ids) - return KnowledgeFilesResponse( - **knowledge.model_dump(), - files=files, - ) - - # Hash is different: replace old with new - try: - # Remove old file's embeddings from KB collection - try: - VECTOR_DB_CLIENT.delete( - collection_name=knowledge.id, filter={"file_id": same_name_file.id} - ) - except Exception as e: - log.debug(e) - - # Remove old file's own collection and DB record - try: - if VECTOR_DB_CLIENT.has_collection( - collection_name=f"file-{same_name_file.id}" - ): - VECTOR_DB_CLIENT.delete_collection( - collection_name=f"file-{same_name_file.id}" - ) - except Exception as e: - log.debug(e) - try: - if same_name_file.path: - Storage.delete_file(same_name_file.path) - except Exception as e: - log.debug(e) - Files.delete_file_by_id(same_name_file.id) - - # Add new file to KB collection - process_file( - request, - ProcessFileForm(file_id=new_file.id, collection_name=id), - user=user, - ) - log.info( - f"[KB Sync] replace kb_id={id} old_id={same_name_file.id} " - f"new_id={new_file.id} name={new_file.filename}" - ) - - # Replace old id with new id in knowledge - file_ids = [fid for fid in file_ids if fid != same_name_file.id] - if new_file.id not in file_ids: - file_ids.append(new_file.id) - data["file_ids"] = file_ids - knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data) - - files = Files.get_file_metadatas_by_ids(file_ids) - return KnowledgeFilesResponse( - **knowledge.model_dump(), - files=files, - ) - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=str(e), - ) - else: - # No same-named file: add new - try: - process_file( - request, - ProcessFileForm(file_id=new_file.id, collection_name=id), - user=user, - ) - log.info(f"[KB Sync] add kb_id={id} name={new_file.filename}") - if new_file.id not in file_ids: - file_ids.append(new_file.id) - data["file_ids"] = file_ids - knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data) - - files = Files.get_file_metadatas_by_ids(file_ids) - return KnowledgeFilesResponse( - **knowledge.model_dump(), - files=files, - ) - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=str(e), - ) @router.post("/{id}/file/sync/batch", response_model=Optional[KnowledgeFilesResponse]) diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts index 8a470b1a87..7f21eb416c 100644 --- a/src/lib/apis/knowledge/index.ts +++ b/src/lib/apis/knowledge/index.ts @@ -212,39 +212,6 @@ export const addFileToKnowledgeById = async (token: string, id: string, fileId: return res; }; -export const syncFileToKnowledgeById = async (token: string, id: string, fileId: string) => { - let error = null; - - const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/sync`, { - method: 'POST', - headers: { - Accept: 'application/json', - 'Content-Type': 'application/json', - authorization: `Bearer ${token}` - }, - body: JSON.stringify({ - file_id: fileId - }) - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .then((json) => { - return json; - }) - .catch((err) => { - error = err.detail; - console.error(err); - return null; - }); - - if (error) { - throw error; - } - - return res; -}; export const updateFileFromKnowledgeById = async (token: string, id: string, fileId: string) => { diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte index 1f40b4aa3b..6191d8f5ba 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte @@ -31,7 +31,6 @@ removeFileFromKnowledgeById, updateFileFromKnowledgeById, updateKnowledgeById, - syncFileToKnowledgeById, syncFilesToKnowledgeByIdBatch } from '$lib/apis/knowledge'; import { blobToFile } from '$lib/utils'; @@ -130,7 +129,6 @@ }; const uploadFileHandler = async (file) => { - console.log(file); // When syncing a directory, remember each file's relative name used on upload. if (syncMode) { try { @@ -164,10 +162,6 @@ ($config?.file?.max_size ?? null) !== null && file.size > ($config?.file?.max_size ?? 0) * 1024 * 1024 ) { - console.log('File exceeds max size limit:', { - fileSize: file.size, - maxSize: ($config?.file?.max_size ?? 0) * 1024 * 1024 - }); toast.error( $i18n.t(`File size should not exceed {{maxSize}} MB.`, { maxSize: $config?.file?.max_size @@ -196,7 +190,6 @@ }); if (uploadedFile) { - console.log(uploadedFile); knowledge.files = knowledge.files.map((item) => { if (item.itemId === tempItemId) { item.id = uploadedFile.id; @@ -314,8 +307,6 @@ if (totalFiles > 0) { await processDirectory(dirHandle); - } else { - console.log('No files to upload.'); } }; @@ -466,7 +457,7 @@ }; const syncFileHandler = async (fileId) => { - const updatedKnowledge = await syncFileToKnowledgeById(localStorage.token, id, fileId).catch( + const updatedKnowledge = await syncFilesToKnowledgeByIdBatch(localStorage.token, id, [fileId]).catch( (e) => { toast.error(`${e}`); return null; @@ -488,7 +479,6 @@ // Remove from knowledge base only const updatedKnowledge = await removeFileFromKnowledgeById(localStorage.token, id, fileId); - console.log('Knowledge base updated:', updatedKnowledge); if (updatedKnowledge) { knowledge = updatedKnowledge; @@ -502,7 +492,6 @@ const updateFileContentHandler = async () => { if (isSaving) { - console.log('Save operation already in progress, skipping...'); return; } isSaving = true; @@ -533,7 +522,6 @@ }; const changeDebounceHandler = () => { - console.log('debounce'); if (debounceTimeout) { clearTimeout(debounceTimeout); } @@ -1010,7 +998,6 @@ selectedFileId = selectedFileId === e.detail ? null : e.detail; }} on:delete={(e) => { - console.log(e.detail); selectedFileId = null; deleteFileHandler(e.detail); From 62311e4b5afdb17a2be978f03d7976ca60745177 Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Tue, 25 Nov 2025 16:11:44 +0200 Subject: [PATCH 56/57] Fix Python styling --- backend/open_webui/routers/knowledge.py | 4 +--- backend/open_webui/utils/knowledge_sync.py | 8 ++------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index e1228a72b5..b8ac0de2d2 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -358,6 +358,7 @@ async def update_knowledge_by_id( class KnowledgeFileIdForm(BaseModel): file_id: str + class KnowledgeFileIdsForm(BaseModel): file_ids: List[str] @@ -447,9 +448,6 @@ def add_file_to_knowledge_by_id( ) - - - @router.post("/{id}/file/sync/batch", response_model=Optional[KnowledgeFilesResponse]) def sync_files_to_knowledge_batch( request: Request, diff --git a/backend/open_webui/utils/knowledge_sync.py b/backend/open_webui/utils/knowledge_sync.py index 7435dddb4b..3dcfa51e63 100644 --- a/backend/open_webui/utils/knowledge_sync.py +++ b/backend/open_webui/utils/knowledge_sync.py @@ -168,12 +168,8 @@ def sync_files_to_knowledge( except Exception as ve: log.debug(ve) try: - if VECTOR_DB_CLIENT.has_collection( - collection_name=f"file-{old_id}" - ): - VECTOR_DB_CLIENT.delete_collection( - collection_name=f"file-{old_id}" - ) + if VECTOR_DB_CLIENT.has_collection(collection_name=f"file-{old_id}"): + VECTOR_DB_CLIENT.delete_collection(collection_name=f"file-{old_id}") except Exception as ce: log.debug(ce) From 895a058fd5084e69cf863729e2acba77142bf18a Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Tue, 25 Nov 2025 16:13:32 +0200 Subject: [PATCH 57/57] Fix Python styling --- backend/open_webui/routers/knowledge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index b8ac0de2d2..8412383e21 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -503,6 +503,7 @@ def sync_files_to_knowledge_batch( files=files_meta, ) + @router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse]) def update_file_from_knowledge_by_id( request: Request,