diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 71722d706e..1b296fb1eb 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -441,6 +441,177 @@ def add_file_to_knowledge_by_id( ) +@router.post("/{id}/file/sync", response_model=Optional[KnowledgeFilesResponse]) +def sync_file_to_knowledge_by_id( + request: Request, + id: str, + form_data: KnowledgeFileIdForm, + user=Depends(get_verified_user), +): + """ + Sync a single file into a knowledge base by filename with hash comparison: + - If a file with the same name exists and hashes match: skip (discard the new upload). + - If a file with the same name exists and hashes differ: replace old with new. + - If no same-named file exists: add new. + """ + log.info(f"[KB Sync] start kb_id={id} file_id={form_data.file_id}") + knowledge = Knowledges.get_knowledge_by_id(id=id) + + if not knowledge: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + if ( + knowledge.user_id != user.id + and not has_access(user.id, "write", knowledge.access_control) + and user.role != "admin" + ): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, + ) + + new_file = Files.get_file_by_id(form_data.file_id) + if not new_file: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + # Ensure the new file is processed so that hash/content exist + if not (new_file.hash and new_file.data and new_file.data.get("content")): + try: + process_file( + request, + ProcessFileForm(file_id=form_data.file_id), + user=user, + ) + new_file = Files.get_file_by_id(form_data.file_id) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + + data = knowledge.data or {} + file_ids = data.get("file_ids", []) + + existing_files = Files.get_files_by_ids(file_ids) if file_ids else [] + same_name_file = next( + (f for f in existing_files if f.filename == new_file.filename), None + ) + + if same_name_file: + # If hashes match, skip (discard the new upload) and keep existing + if ( + same_name_file.hash + and new_file.hash + and same_name_file.hash == new_file.hash + ): + try: + # Cleanup new file's vector collection if exists + try: + VECTOR_DB_CLIENT.delete_collection( + collection_name=f"file-{new_file.id}" + ) + except Exception as e: + log.debug(e) + try: + if new_file.path: + Storage.delete_file(new_file.path) + except Exception as e: + log.debug(e) + Files.delete_file_by_id(new_file.id) + except Exception as e: + log.debug(e) + + log.info(f"[KB Sync] skip (hash match) kb_id={id} name={new_file.filename}") + files = Files.get_file_metadatas_by_ids(file_ids) + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + + # Hash is different: replace old with new + try: + # Remove old file's embeddings from KB collection + try: + VECTOR_DB_CLIENT.delete( + collection_name=knowledge.id, filter={"file_id": same_name_file.id} + ) + except Exception as e: + log.debug(e) + + # Remove old file's own collection and DB record + try: + if VECTOR_DB_CLIENT.has_collection( + collection_name=f"file-{same_name_file.id}" + ): + VECTOR_DB_CLIENT.delete_collection( + collection_name=f"file-{same_name_file.id}" + ) + except Exception as e: + log.debug(e) + try: + if same_name_file.path: + Storage.delete_file(same_name_file.path) + except Exception as e: + log.debug(e) + Files.delete_file_by_id(same_name_file.id) + + # Add new file to KB collection + process_file( + request, + ProcessFileForm(file_id=new_file.id, collection_name=id), + user=user, + ) + log.info(f"[KB Sync] replace kb_id={id} old_id={same_name_file.id} new_id={new_file.id} name={new_file.filename}") + + # Replace old id with new id in knowledge + file_ids = [fid for fid in file_ids if fid != same_name_file.id] + if new_file.id not in file_ids: + file_ids.append(new_file.id) + data["file_ids"] = file_ids + knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data) + + files = Files.get_file_metadatas_by_ids(file_ids) + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + else: + # No same-named file: add new + try: + process_file( + request, + ProcessFileForm(file_id=new_file.id, collection_name=id), + user=user, + ) + log.info(f"[KB Sync] add kb_id={id} name={new_file.filename}") + if new_file.id not in file_ids: + file_ids.append(new_file.id) + data["file_ids"] = file_ids + knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data) + + files = Files.get_file_metadatas_by_ids(file_ids) + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + + @router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse]) def update_file_from_knowledge_by_id( request: Request, diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts index c01c986a2a..f436cb86bb 100644 --- a/src/lib/apis/knowledge/index.ts +++ b/src/lib/apis/knowledge/index.ts @@ -212,6 +212,40 @@ export const addFileToKnowledgeById = async (token: string, id: string, fileId: return res; }; +export const syncFileToKnowledgeById = async (token: string, id: string, fileId: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/sync`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + file_id: fileId + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + console.error(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const updateFileFromKnowledgeById = async (token: string, id: string, fileId: string) => { let error = null; diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte index 3c494e7609..d66971d6ee 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte @@ -29,9 +29,9 @@ getKnowledgeById, getKnowledgeBases, removeFileFromKnowledgeById, - resetKnowledgeById, updateFileFromKnowledgeById, - updateKnowledgeById + updateKnowledgeById, + syncFileToKnowledgeById } from '$lib/apis/knowledge'; import { blobToFile } from '$lib/utils'; @@ -78,6 +78,7 @@ let showAccessControlModal = false; let inputFiles = null; + let syncMode = false; let filteredItems = []; $: if (knowledge && knowledge.files) { @@ -199,7 +200,11 @@ delete item.itemId; return item; }); - await addFileHandler(uploadedFile.id); + if (syncMode) { + await syncFileHandler(uploadedFile.id); + } else { + await addFileHandler(uploadedFile.id); + } } else { toast.error($i18n.t('Failed to upload file.')); } @@ -382,20 +387,12 @@ // Helper function to maintain file paths within zip const syncDirectoryHandler = async () => { - if ((knowledge?.files ?? []).length > 0) { - const res = await resetKnowledgeById(localStorage.token, id).catch((e) => { - toast.error(`${e}`); - }); - - if (res) { - knowledge = res; - toast.success($i18n.t('Knowledge reset successfully.')); - - // Upload directory - uploadDirectoryHandler(); - } - } else { - uploadDirectoryHandler(); + syncMode = true; + try { + await uploadDirectoryHandler(); + toast.success($i18n.t('Directory sync completed.')); + } finally { + syncMode = false; } }; @@ -416,6 +413,23 @@ } }; + const syncFileHandler = async (fileId) => { + const updatedKnowledge = await syncFileToKnowledgeById(localStorage.token, id, fileId).catch( + (e) => { + toast.error(`${e}`); + return null; + } + ); + + if (updatedKnowledge) { + knowledge = updatedKnowledge; + toast.success($i18n.t('File synced successfully.')); + } else { + toast.error($i18n.t('Failed to sync file.')); + knowledge.files = knowledge.files.filter((file) => file.id !== fileId); + } + }; + const deleteFileHandler = async (fileId) => { try { console.log('Starting file deletion process for:', fileId); @@ -637,7 +651,7 @@ { syncDirectoryHandler();