Update Knowledge directory sync process

This commit is contained in:
Stoyan Zlatev 2025-10-27 19:48:26 +02:00
parent 267794638c
commit d3acc2be35
3 changed files with 237 additions and 18 deletions

View file

@ -441,6 +441,177 @@ def add_file_to_knowledge_by_id(
) )
@router.post("/{id}/file/sync", response_model=Optional[KnowledgeFilesResponse])
def sync_file_to_knowledge_by_id(
request: Request,
id: str,
form_data: KnowledgeFileIdForm,
user=Depends(get_verified_user),
):
"""
Sync a single file into a knowledge base by filename with hash comparison:
- If a file with the same name exists and hashes match: skip (discard the new upload).
- If a file with the same name exists and hashes differ: replace old with new.
- If no same-named file exists: add new.
"""
log.info(f"[KB Sync] start kb_id={id} file_id={form_data.file_id}")
knowledge = Knowledges.get_knowledge_by_id(id=id)
if not knowledge:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.NOT_FOUND,
)
if (
knowledge.user_id != user.id
and not has_access(user.id, "write", knowledge.access_control)
and user.role != "admin"
):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
)
new_file = Files.get_file_by_id(form_data.file_id)
if not new_file:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.NOT_FOUND,
)
# Ensure the new file is processed so that hash/content exist
if not (new_file.hash and new_file.data and new_file.data.get("content")):
try:
process_file(
request,
ProcessFileForm(file_id=form_data.file_id),
user=user,
)
new_file = Files.get_file_by_id(form_data.file_id)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e),
)
data = knowledge.data or {}
file_ids = data.get("file_ids", [])
existing_files = Files.get_files_by_ids(file_ids) if file_ids else []
same_name_file = next(
(f for f in existing_files if f.filename == new_file.filename), None
)
if same_name_file:
# If hashes match, skip (discard the new upload) and keep existing
if (
same_name_file.hash
and new_file.hash
and same_name_file.hash == new_file.hash
):
try:
# Cleanup new file's vector collection if exists
try:
VECTOR_DB_CLIENT.delete_collection(
collection_name=f"file-{new_file.id}"
)
except Exception as e:
log.debug(e)
try:
if new_file.path:
Storage.delete_file(new_file.path)
except Exception as e:
log.debug(e)
Files.delete_file_by_id(new_file.id)
except Exception as e:
log.debug(e)
log.info(f"[KB Sync] skip (hash match) kb_id={id} name={new_file.filename}")
files = Files.get_file_metadatas_by_ids(file_ids)
return KnowledgeFilesResponse(
**knowledge.model_dump(),
files=files,
)
# Hash is different: replace old with new
try:
# Remove old file's embeddings from KB collection
try:
VECTOR_DB_CLIENT.delete(
collection_name=knowledge.id, filter={"file_id": same_name_file.id}
)
except Exception as e:
log.debug(e)
# Remove old file's own collection and DB record
try:
if VECTOR_DB_CLIENT.has_collection(
collection_name=f"file-{same_name_file.id}"
):
VECTOR_DB_CLIENT.delete_collection(
collection_name=f"file-{same_name_file.id}"
)
except Exception as e:
log.debug(e)
try:
if same_name_file.path:
Storage.delete_file(same_name_file.path)
except Exception as e:
log.debug(e)
Files.delete_file_by_id(same_name_file.id)
# Add new file to KB collection
process_file(
request,
ProcessFileForm(file_id=new_file.id, collection_name=id),
user=user,
)
log.info(f"[KB Sync] replace kb_id={id} old_id={same_name_file.id} new_id={new_file.id} name={new_file.filename}")
# Replace old id with new id in knowledge
file_ids = [fid for fid in file_ids if fid != same_name_file.id]
if new_file.id not in file_ids:
file_ids.append(new_file.id)
data["file_ids"] = file_ids
knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data)
files = Files.get_file_metadatas_by_ids(file_ids)
return KnowledgeFilesResponse(
**knowledge.model_dump(),
files=files,
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e),
)
else:
# No same-named file: add new
try:
process_file(
request,
ProcessFileForm(file_id=new_file.id, collection_name=id),
user=user,
)
log.info(f"[KB Sync] add kb_id={id} name={new_file.filename}")
if new_file.id not in file_ids:
file_ids.append(new_file.id)
data["file_ids"] = file_ids
knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data)
files = Files.get_file_metadatas_by_ids(file_ids)
return KnowledgeFilesResponse(
**knowledge.model_dump(),
files=files,
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=str(e),
)
@router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse]) @router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse])
def update_file_from_knowledge_by_id( def update_file_from_knowledge_by_id(
request: Request, request: Request,

View file

@ -212,6 +212,40 @@ export const addFileToKnowledgeById = async (token: string, id: string, fileId:
return res; return res;
}; };
export const syncFileToKnowledgeById = async (token: string, id: string, fileId: string) => {
let error = null;
const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/sync`, {
method: 'POST',
headers: {
Accept: 'application/json',
'Content-Type': 'application/json',
authorization: `Bearer ${token}`
},
body: JSON.stringify({
file_id: fileId
})
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.then((json) => {
return json;
})
.catch((err) => {
error = err.detail;
console.error(err);
return null;
});
if (error) {
throw error;
}
return res;
};
export const updateFileFromKnowledgeById = async (token: string, id: string, fileId: string) => { export const updateFileFromKnowledgeById = async (token: string, id: string, fileId: string) => {
let error = null; let error = null;

View file

@ -29,9 +29,9 @@
getKnowledgeById, getKnowledgeById,
getKnowledgeBases, getKnowledgeBases,
removeFileFromKnowledgeById, removeFileFromKnowledgeById,
resetKnowledgeById,
updateFileFromKnowledgeById, updateFileFromKnowledgeById,
updateKnowledgeById updateKnowledgeById,
syncFileToKnowledgeById
} from '$lib/apis/knowledge'; } from '$lib/apis/knowledge';
import { blobToFile } from '$lib/utils'; import { blobToFile } from '$lib/utils';
@ -78,6 +78,7 @@
let showAccessControlModal = false; let showAccessControlModal = false;
let inputFiles = null; let inputFiles = null;
let syncMode = false;
let filteredItems = []; let filteredItems = [];
$: if (knowledge && knowledge.files) { $: if (knowledge && knowledge.files) {
@ -199,7 +200,11 @@
delete item.itemId; delete item.itemId;
return item; return item;
}); });
await addFileHandler(uploadedFile.id); if (syncMode) {
await syncFileHandler(uploadedFile.id);
} else {
await addFileHandler(uploadedFile.id);
}
} else { } else {
toast.error($i18n.t('Failed to upload file.')); toast.error($i18n.t('Failed to upload file.'));
} }
@ -382,20 +387,12 @@
// Helper function to maintain file paths within zip // Helper function to maintain file paths within zip
const syncDirectoryHandler = async () => { const syncDirectoryHandler = async () => {
if ((knowledge?.files ?? []).length > 0) { syncMode = true;
const res = await resetKnowledgeById(localStorage.token, id).catch((e) => { try {
toast.error(`${e}`); await uploadDirectoryHandler();
}); toast.success($i18n.t('Directory sync completed.'));
} finally {
if (res) { syncMode = false;
knowledge = res;
toast.success($i18n.t('Knowledge reset successfully.'));
// Upload directory
uploadDirectoryHandler();
}
} else {
uploadDirectoryHandler();
} }
}; };
@ -416,6 +413,23 @@
} }
}; };
const syncFileHandler = async (fileId) => {
const updatedKnowledge = await syncFileToKnowledgeById(localStorage.token, id, fileId).catch(
(e) => {
toast.error(`${e}`);
return null;
}
);
if (updatedKnowledge) {
knowledge = updatedKnowledge;
toast.success($i18n.t('File synced successfully.'));
} else {
toast.error($i18n.t('Failed to sync file.'));
knowledge.files = knowledge.files.filter((file) => file.id !== fileId);
}
};
const deleteFileHandler = async (fileId) => { const deleteFileHandler = async (fileId) => {
try { try {
console.log('Starting file deletion process for:', fileId); console.log('Starting file deletion process for:', fileId);
@ -637,7 +651,7 @@
<SyncConfirmDialog <SyncConfirmDialog
bind:show={showSyncConfirmModal} bind:show={showSyncConfirmModal}
message={$i18n.t( message={$i18n.t(
'This will reset the knowledge base and sync all files. Do you wish to continue?' 'This will sync a directory: all modified files will be reuploaded. Do you wish to continue?'
)} )}
on:confirm={() => { on:confirm={() => {
syncDirectoryHandler(); syncDirectoryHandler();