mirror of
https://github.com/open-webui/open-webui.git
synced 2025-12-13 21:05:19 +00:00
Update Knowledge directory sync process
This commit is contained in:
parent
267794638c
commit
d3acc2be35
3 changed files with 237 additions and 18 deletions
|
|
@ -441,6 +441,177 @@ def add_file_to_knowledge_by_id(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/{id}/file/sync", response_model=Optional[KnowledgeFilesResponse])
|
||||||
|
def sync_file_to_knowledge_by_id(
|
||||||
|
request: Request,
|
||||||
|
id: str,
|
||||||
|
form_data: KnowledgeFileIdForm,
|
||||||
|
user=Depends(get_verified_user),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Sync a single file into a knowledge base by filename with hash comparison:
|
||||||
|
- If a file with the same name exists and hashes match: skip (discard the new upload).
|
||||||
|
- If a file with the same name exists and hashes differ: replace old with new.
|
||||||
|
- If no same-named file exists: add new.
|
||||||
|
"""
|
||||||
|
log.info(f"[KB Sync] start kb_id={id} file_id={form_data.file_id}")
|
||||||
|
knowledge = Knowledges.get_knowledge_by_id(id=id)
|
||||||
|
|
||||||
|
if not knowledge:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
knowledge.user_id != user.id
|
||||||
|
and not has_access(user.id, "write", knowledge.access_control)
|
||||||
|
and user.role != "admin"
|
||||||
|
):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
|
||||||
|
)
|
||||||
|
|
||||||
|
new_file = Files.get_file_by_id(form_data.file_id)
|
||||||
|
if not new_file:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=ERROR_MESSAGES.NOT_FOUND,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Ensure the new file is processed so that hash/content exist
|
||||||
|
if not (new_file.hash and new_file.data and new_file.data.get("content")):
|
||||||
|
try:
|
||||||
|
process_file(
|
||||||
|
request,
|
||||||
|
ProcessFileForm(file_id=form_data.file_id),
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
new_file = Files.get_file_by_id(form_data.file_id)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=str(e),
|
||||||
|
)
|
||||||
|
|
||||||
|
data = knowledge.data or {}
|
||||||
|
file_ids = data.get("file_ids", [])
|
||||||
|
|
||||||
|
existing_files = Files.get_files_by_ids(file_ids) if file_ids else []
|
||||||
|
same_name_file = next(
|
||||||
|
(f for f in existing_files if f.filename == new_file.filename), None
|
||||||
|
)
|
||||||
|
|
||||||
|
if same_name_file:
|
||||||
|
# If hashes match, skip (discard the new upload) and keep existing
|
||||||
|
if (
|
||||||
|
same_name_file.hash
|
||||||
|
and new_file.hash
|
||||||
|
and same_name_file.hash == new_file.hash
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
# Cleanup new file's vector collection if exists
|
||||||
|
try:
|
||||||
|
VECTOR_DB_CLIENT.delete_collection(
|
||||||
|
collection_name=f"file-{new_file.id}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
log.debug(e)
|
||||||
|
try:
|
||||||
|
if new_file.path:
|
||||||
|
Storage.delete_file(new_file.path)
|
||||||
|
except Exception as e:
|
||||||
|
log.debug(e)
|
||||||
|
Files.delete_file_by_id(new_file.id)
|
||||||
|
except Exception as e:
|
||||||
|
log.debug(e)
|
||||||
|
|
||||||
|
log.info(f"[KB Sync] skip (hash match) kb_id={id} name={new_file.filename}")
|
||||||
|
files = Files.get_file_metadatas_by_ids(file_ids)
|
||||||
|
return KnowledgeFilesResponse(
|
||||||
|
**knowledge.model_dump(),
|
||||||
|
files=files,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Hash is different: replace old with new
|
||||||
|
try:
|
||||||
|
# Remove old file's embeddings from KB collection
|
||||||
|
try:
|
||||||
|
VECTOR_DB_CLIENT.delete(
|
||||||
|
collection_name=knowledge.id, filter={"file_id": same_name_file.id}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
log.debug(e)
|
||||||
|
|
||||||
|
# Remove old file's own collection and DB record
|
||||||
|
try:
|
||||||
|
if VECTOR_DB_CLIENT.has_collection(
|
||||||
|
collection_name=f"file-{same_name_file.id}"
|
||||||
|
):
|
||||||
|
VECTOR_DB_CLIENT.delete_collection(
|
||||||
|
collection_name=f"file-{same_name_file.id}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
log.debug(e)
|
||||||
|
try:
|
||||||
|
if same_name_file.path:
|
||||||
|
Storage.delete_file(same_name_file.path)
|
||||||
|
except Exception as e:
|
||||||
|
log.debug(e)
|
||||||
|
Files.delete_file_by_id(same_name_file.id)
|
||||||
|
|
||||||
|
# Add new file to KB collection
|
||||||
|
process_file(
|
||||||
|
request,
|
||||||
|
ProcessFileForm(file_id=new_file.id, collection_name=id),
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
log.info(f"[KB Sync] replace kb_id={id} old_id={same_name_file.id} new_id={new_file.id} name={new_file.filename}")
|
||||||
|
|
||||||
|
# Replace old id with new id in knowledge
|
||||||
|
file_ids = [fid for fid in file_ids if fid != same_name_file.id]
|
||||||
|
if new_file.id not in file_ids:
|
||||||
|
file_ids.append(new_file.id)
|
||||||
|
data["file_ids"] = file_ids
|
||||||
|
knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data)
|
||||||
|
|
||||||
|
files = Files.get_file_metadatas_by_ids(file_ids)
|
||||||
|
return KnowledgeFilesResponse(
|
||||||
|
**knowledge.model_dump(),
|
||||||
|
files=files,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=str(e),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# No same-named file: add new
|
||||||
|
try:
|
||||||
|
process_file(
|
||||||
|
request,
|
||||||
|
ProcessFileForm(file_id=new_file.id, collection_name=id),
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
log.info(f"[KB Sync] add kb_id={id} name={new_file.filename}")
|
||||||
|
if new_file.id not in file_ids:
|
||||||
|
file_ids.append(new_file.id)
|
||||||
|
data["file_ids"] = file_ids
|
||||||
|
knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data)
|
||||||
|
|
||||||
|
files = Files.get_file_metadatas_by_ids(file_ids)
|
||||||
|
return KnowledgeFilesResponse(
|
||||||
|
**knowledge.model_dump(),
|
||||||
|
files=files,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
detail=str(e),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse])
|
@router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse])
|
||||||
def update_file_from_knowledge_by_id(
|
def update_file_from_knowledge_by_id(
|
||||||
request: Request,
|
request: Request,
|
||||||
|
|
|
||||||
|
|
@ -212,6 +212,40 @@ export const addFileToKnowledgeById = async (token: string, id: string, fileId:
|
||||||
return res;
|
return res;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const syncFileToKnowledgeById = async (token: string, id: string, fileId: string) => {
|
||||||
|
let error = null;
|
||||||
|
|
||||||
|
const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/sync`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
Accept: 'application/json',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
authorization: `Bearer ${token}`
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
file_id: fileId
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.then(async (res) => {
|
||||||
|
if (!res.ok) throw await res.json();
|
||||||
|
return res.json();
|
||||||
|
})
|
||||||
|
.then((json) => {
|
||||||
|
return json;
|
||||||
|
})
|
||||||
|
.catch((err) => {
|
||||||
|
error = err.detail;
|
||||||
|
console.error(err);
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
};
|
||||||
|
|
||||||
export const updateFileFromKnowledgeById = async (token: string, id: string, fileId: string) => {
|
export const updateFileFromKnowledgeById = async (token: string, id: string, fileId: string) => {
|
||||||
let error = null;
|
let error = null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,9 +29,9 @@
|
||||||
getKnowledgeById,
|
getKnowledgeById,
|
||||||
getKnowledgeBases,
|
getKnowledgeBases,
|
||||||
removeFileFromKnowledgeById,
|
removeFileFromKnowledgeById,
|
||||||
resetKnowledgeById,
|
|
||||||
updateFileFromKnowledgeById,
|
updateFileFromKnowledgeById,
|
||||||
updateKnowledgeById
|
updateKnowledgeById,
|
||||||
|
syncFileToKnowledgeById
|
||||||
} from '$lib/apis/knowledge';
|
} from '$lib/apis/knowledge';
|
||||||
import { blobToFile } from '$lib/utils';
|
import { blobToFile } from '$lib/utils';
|
||||||
|
|
||||||
|
|
@ -78,6 +78,7 @@
|
||||||
let showAccessControlModal = false;
|
let showAccessControlModal = false;
|
||||||
|
|
||||||
let inputFiles = null;
|
let inputFiles = null;
|
||||||
|
let syncMode = false;
|
||||||
|
|
||||||
let filteredItems = [];
|
let filteredItems = [];
|
||||||
$: if (knowledge && knowledge.files) {
|
$: if (knowledge && knowledge.files) {
|
||||||
|
|
@ -199,7 +200,11 @@
|
||||||
delete item.itemId;
|
delete item.itemId;
|
||||||
return item;
|
return item;
|
||||||
});
|
});
|
||||||
|
if (syncMode) {
|
||||||
|
await syncFileHandler(uploadedFile.id);
|
||||||
|
} else {
|
||||||
await addFileHandler(uploadedFile.id);
|
await addFileHandler(uploadedFile.id);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
toast.error($i18n.t('Failed to upload file.'));
|
toast.error($i18n.t('Failed to upload file.'));
|
||||||
}
|
}
|
||||||
|
|
@ -382,20 +387,12 @@
|
||||||
|
|
||||||
// Helper function to maintain file paths within zip
|
// Helper function to maintain file paths within zip
|
||||||
const syncDirectoryHandler = async () => {
|
const syncDirectoryHandler = async () => {
|
||||||
if ((knowledge?.files ?? []).length > 0) {
|
syncMode = true;
|
||||||
const res = await resetKnowledgeById(localStorage.token, id).catch((e) => {
|
try {
|
||||||
toast.error(`${e}`);
|
await uploadDirectoryHandler();
|
||||||
});
|
toast.success($i18n.t('Directory sync completed.'));
|
||||||
|
} finally {
|
||||||
if (res) {
|
syncMode = false;
|
||||||
knowledge = res;
|
|
||||||
toast.success($i18n.t('Knowledge reset successfully.'));
|
|
||||||
|
|
||||||
// Upload directory
|
|
||||||
uploadDirectoryHandler();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
uploadDirectoryHandler();
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -416,6 +413,23 @@
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const syncFileHandler = async (fileId) => {
|
||||||
|
const updatedKnowledge = await syncFileToKnowledgeById(localStorage.token, id, fileId).catch(
|
||||||
|
(e) => {
|
||||||
|
toast.error(`${e}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (updatedKnowledge) {
|
||||||
|
knowledge = updatedKnowledge;
|
||||||
|
toast.success($i18n.t('File synced successfully.'));
|
||||||
|
} else {
|
||||||
|
toast.error($i18n.t('Failed to sync file.'));
|
||||||
|
knowledge.files = knowledge.files.filter((file) => file.id !== fileId);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const deleteFileHandler = async (fileId) => {
|
const deleteFileHandler = async (fileId) => {
|
||||||
try {
|
try {
|
||||||
console.log('Starting file deletion process for:', fileId);
|
console.log('Starting file deletion process for:', fileId);
|
||||||
|
|
@ -637,7 +651,7 @@
|
||||||
<SyncConfirmDialog
|
<SyncConfirmDialog
|
||||||
bind:show={showSyncConfirmModal}
|
bind:show={showSyncConfirmModal}
|
||||||
message={$i18n.t(
|
message={$i18n.t(
|
||||||
'This will reset the knowledge base and sync all files. Do you wish to continue?'
|
'This will sync a directory: all modified files will be reuploaded. Do you wish to continue?'
|
||||||
)}
|
)}
|
||||||
on:confirm={() => {
|
on:confirm={() => {
|
||||||
syncDirectoryHandler();
|
syncDirectoryHandler();
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue