From a940f3e10d4dd194ea640d12fcbd569c8115fa0a Mon Sep 17 00:00:00 2001 From: Stoyan Zlatev Date: Tue, 25 Nov 2025 15:50:43 +0200 Subject: [PATCH] Remove single file sync endpoint --- backend/open_webui/routers/knowledge.py | 171 ------------------ src/lib/apis/knowledge/index.ts | 33 ---- .../workspace/Knowledge/KnowledgeBase.svelte | 15 +- 3 files changed, 1 insertion(+), 218 deletions(-) diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 6b9da553e9..4699394c8f 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -445,178 +445,7 @@ def add_file_to_knowledge_by_id( ) -@router.post("/{id}/file/sync", response_model=Optional[KnowledgeFilesResponse]) -def sync_file_to_knowledge_by_id( - request: Request, - id: str, - form_data: KnowledgeFileIdForm, - user=Depends(get_verified_user), -): - """ - Sync a single file into a knowledge base by filename with hash comparison: - - If a file with the same name exists and hashes match: skip (discard the new upload). - - If a file with the same name exists and hashes differ: replace old with new. - - If no same-named file exists: add new. - """ - log.info(f"[KB Sync] start kb_id={id} file_id={form_data.file_id}") - knowledge = Knowledges.get_knowledge_by_id(id=id) - if not knowledge: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.NOT_FOUND, - ) - - if ( - knowledge.user_id != user.id - and not has_access(user.id, "write", knowledge.access_control) - and user.role != "admin" - ): - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.ACCESS_PROHIBITED, - ) - - new_file = Files.get_file_by_id(form_data.file_id) - if not new_file: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.NOT_FOUND, - ) - - # Ensure the new file is processed so that hash/content exist - if not (new_file.hash and new_file.data and new_file.data.get("content")): - try: - process_file( - request, - ProcessFileForm(file_id=form_data.file_id), - user=user, - ) - new_file = Files.get_file_by_id(form_data.file_id) - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=str(e), - ) - - data = knowledge.data or {} - file_ids = data.get("file_ids", []) - - existing_files = Files.get_files_by_ids(file_ids) if file_ids else [] - same_name_file = next( - (f for f in existing_files if f.filename == new_file.filename), None - ) - - if same_name_file: - # If hashes match, skip (discard the new upload) and keep existing - if ( - same_name_file.hash - and new_file.hash - and same_name_file.hash == new_file.hash - ): - try: - # Cleanup new file's vector collection if exists - try: - VECTOR_DB_CLIENT.delete_collection( - collection_name=f"file-{new_file.id}" - ) - except Exception as e: - log.debug(e) - try: - if new_file.path: - Storage.delete_file(new_file.path) - except Exception as e: - log.debug(e) - Files.delete_file_by_id(new_file.id) - except Exception as e: - log.debug(e) - - log.info(f"[KB Sync] skip (hash match) kb_id={id} name={new_file.filename}") - files = Files.get_file_metadatas_by_ids(file_ids) - return KnowledgeFilesResponse( - **knowledge.model_dump(), - files=files, - ) - - # Hash is different: replace old with new - try: - # Remove old file's embeddings from KB collection - try: - VECTOR_DB_CLIENT.delete( - collection_name=knowledge.id, filter={"file_id": same_name_file.id} - ) - except Exception as e: - log.debug(e) - - # Remove old file's own collection and DB record - try: - if VECTOR_DB_CLIENT.has_collection( - collection_name=f"file-{same_name_file.id}" - ): - VECTOR_DB_CLIENT.delete_collection( - collection_name=f"file-{same_name_file.id}" - ) - except Exception as e: - log.debug(e) - try: - if same_name_file.path: - Storage.delete_file(same_name_file.path) - except Exception as e: - log.debug(e) - Files.delete_file_by_id(same_name_file.id) - - # Add new file to KB collection - process_file( - request, - ProcessFileForm(file_id=new_file.id, collection_name=id), - user=user, - ) - log.info( - f"[KB Sync] replace kb_id={id} old_id={same_name_file.id} " - f"new_id={new_file.id} name={new_file.filename}" - ) - - # Replace old id with new id in knowledge - file_ids = [fid for fid in file_ids if fid != same_name_file.id] - if new_file.id not in file_ids: - file_ids.append(new_file.id) - data["file_ids"] = file_ids - knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data) - - files = Files.get_file_metadatas_by_ids(file_ids) - return KnowledgeFilesResponse( - **knowledge.model_dump(), - files=files, - ) - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=str(e), - ) - else: - # No same-named file: add new - try: - process_file( - request, - ProcessFileForm(file_id=new_file.id, collection_name=id), - user=user, - ) - log.info(f"[KB Sync] add kb_id={id} name={new_file.filename}") - if new_file.id not in file_ids: - file_ids.append(new_file.id) - data["file_ids"] = file_ids - knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data) - - files = Files.get_file_metadatas_by_ids(file_ids) - return KnowledgeFilesResponse( - **knowledge.model_dump(), - files=files, - ) - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=str(e), - ) @router.post("/{id}/file/sync/batch", response_model=Optional[KnowledgeFilesResponse]) diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts index 8a470b1a87..7f21eb416c 100644 --- a/src/lib/apis/knowledge/index.ts +++ b/src/lib/apis/knowledge/index.ts @@ -212,39 +212,6 @@ export const addFileToKnowledgeById = async (token: string, id: string, fileId: return res; }; -export const syncFileToKnowledgeById = async (token: string, id: string, fileId: string) => { - let error = null; - - const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/sync`, { - method: 'POST', - headers: { - Accept: 'application/json', - 'Content-Type': 'application/json', - authorization: `Bearer ${token}` - }, - body: JSON.stringify({ - file_id: fileId - }) - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .then((json) => { - return json; - }) - .catch((err) => { - error = err.detail; - console.error(err); - return null; - }); - - if (error) { - throw error; - } - - return res; -}; export const updateFileFromKnowledgeById = async (token: string, id: string, fileId: string) => { diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte index 1f40b4aa3b..6191d8f5ba 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte @@ -31,7 +31,6 @@ removeFileFromKnowledgeById, updateFileFromKnowledgeById, updateKnowledgeById, - syncFileToKnowledgeById, syncFilesToKnowledgeByIdBatch } from '$lib/apis/knowledge'; import { blobToFile } from '$lib/utils'; @@ -130,7 +129,6 @@ }; const uploadFileHandler = async (file) => { - console.log(file); // When syncing a directory, remember each file's relative name used on upload. if (syncMode) { try { @@ -164,10 +162,6 @@ ($config?.file?.max_size ?? null) !== null && file.size > ($config?.file?.max_size ?? 0) * 1024 * 1024 ) { - console.log('File exceeds max size limit:', { - fileSize: file.size, - maxSize: ($config?.file?.max_size ?? 0) * 1024 * 1024 - }); toast.error( $i18n.t(`File size should not exceed {{maxSize}} MB.`, { maxSize: $config?.file?.max_size @@ -196,7 +190,6 @@ }); if (uploadedFile) { - console.log(uploadedFile); knowledge.files = knowledge.files.map((item) => { if (item.itemId === tempItemId) { item.id = uploadedFile.id; @@ -314,8 +307,6 @@ if (totalFiles > 0) { await processDirectory(dirHandle); - } else { - console.log('No files to upload.'); } }; @@ -466,7 +457,7 @@ }; const syncFileHandler = async (fileId) => { - const updatedKnowledge = await syncFileToKnowledgeById(localStorage.token, id, fileId).catch( + const updatedKnowledge = await syncFilesToKnowledgeByIdBatch(localStorage.token, id, [fileId]).catch( (e) => { toast.error(`${e}`); return null; @@ -488,7 +479,6 @@ // Remove from knowledge base only const updatedKnowledge = await removeFileFromKnowledgeById(localStorage.token, id, fileId); - console.log('Knowledge base updated:', updatedKnowledge); if (updatedKnowledge) { knowledge = updatedKnowledge; @@ -502,7 +492,6 @@ const updateFileContentHandler = async () => { if (isSaving) { - console.log('Save operation already in progress, skipping...'); return; } isSaving = true; @@ -533,7 +522,6 @@ }; const changeDebounceHandler = () => { - console.log('debounce'); if (debounceTimeout) { clearTimeout(debounceTimeout); } @@ -1010,7 +998,6 @@ selectedFileId = selectedFileId === e.detail ? null : e.detail; }} on:delete={(e) => { - console.log(e.detail); selectedFileId = null; deleteFileHandler(e.detail);