wip: knowledge

This commit is contained in:
Timothy Jaeryang Baek 2025-08-14 16:44:52 +04:00
parent 652dcabd86
commit eb86ac7a2b
5 changed files with 52 additions and 46 deletions

View file

@ -100,7 +100,7 @@ class KnowledgeForm(BaseModel):
class KnowledgeTable: class KnowledgeTable:
def insert_new_knowledge( async def insert_new_knowledge(
self, user_id: str, form_data: KnowledgeForm self, user_id: str, form_data: KnowledgeForm
) -> Optional[KnowledgeModel]: ) -> Optional[KnowledgeModel]:
async with get_db() as db: async with get_db() as db:
@ -116,9 +116,9 @@ class KnowledgeTable:
try: try:
result = Knowledge(**knowledge.model_dump()) result = Knowledge(**knowledge.model_dump())
db.add(result) await db.add(result)
db.commit() await db.commit()
db.refresh(result) await db.refresh(result)
if result: if result:
return KnowledgeModel.model_validate(result) return KnowledgeModel.model_validate(result)
else: else:
@ -130,7 +130,7 @@ class KnowledgeTable:
async with get_db() as db: async with get_db() as db:
knowledge_bases = [] knowledge_bases = []
for knowledge in ( for knowledge in (
db.query(Knowledge).order_by(Knowledge.updated_at.desc()).all() await db.query(Knowledge).order_by(Knowledge.updated_at.desc()).all()
): ):
user = await Users.get_user_by_id(knowledge.user_id) user = await Users.get_user_by_id(knowledge.user_id)
knowledge_bases.append( knowledge_bases.append(
@ -146,7 +146,7 @@ class KnowledgeTable:
async def get_knowledge_bases_by_user_id( async def get_knowledge_bases_by_user_id(
self, user_id: str, permission: str = "write" self, user_id: str, permission: str = "write"
) -> list[KnowledgeUserModel]: ) -> list[KnowledgeUserModel]:
knowledge_bases = self.get_knowledge_bases() knowledge_bases = await self.get_knowledge_bases()
return [ return [
knowledge_base knowledge_base
for knowledge_base in knowledge_bases for knowledge_base in knowledge_bases
@ -180,38 +180,38 @@ class KnowledgeTable:
log.exception(e) log.exception(e)
return None return None
def update_knowledge_data_by_id( async def update_knowledge_data_by_id(
self, id: str, data: dict self, id: str, data: dict
) -> Optional[KnowledgeModel]: ) -> Optional[KnowledgeModel]:
try: try:
async with get_db() as db: async with get_db() as db:
knowledge = self.get_knowledge_by_id(id=id) knowledge = await self.get_knowledge_by_id(id=id)
db.query(Knowledge).filter_by(id=id).update( await db.query(Knowledge).filter_by(id=id).update(
{ {
"data": data, "data": data,
"updated_at": int(time.time()), "updated_at": int(time.time()),
} }
) )
db.commit() await db.commit()
return self.get_knowledge_by_id(id=id) return await self.get_knowledge_by_id(id=id)
except Exception as e: except Exception as e:
log.exception(e) log.exception(e)
return None return None
def delete_knowledge_by_id(self, id: str) -> bool: async def delete_knowledge_by_id(self, id: str) -> bool:
try: try:
async with get_db() as db: async with get_db() as db:
db.query(Knowledge).filter_by(id=id).delete() await db.query(Knowledge).filter_by(id=id).delete()
db.commit() await db.commit()
return True return True
except Exception: except Exception:
return False return False
def delete_all_knowledge(self) -> bool: async def delete_all_knowledge(self) -> bool:
async with get_db() as db: async with get_db() as db:
try: try:
db.query(Knowledge).delete() await db.query(Knowledge).delete()
db.commit() await db.commit()
return True return True
except Exception: except Exception:

View file

@ -577,7 +577,7 @@ def get_sources_from_items(
or request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL or request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL
): ):
# Manual Full Mode Toggle for Collection # Manual Full Mode Toggle for Collection
knowledge_base = Knowledges.get_knowledge_by_id(item.get("id")) knowledge_base = await Knowledges.get_knowledge_by_id(item.get("id"))
if knowledge_base and ( if knowledge_base and (
user.role == "admin" user.role == "admin"

View file

@ -68,7 +68,7 @@ def has_access_to_file(
knowledge_base_id = file.meta.get("collection_name") if file.meta else None knowledge_base_id = file.meta.get("collection_name") if file.meta else None
if knowledge_base_id: if knowledge_base_id:
knowledge_bases = Knowledges.get_knowledge_bases_by_user_id( knowledge_bases = await Knowledges.get_knowledge_bases_by_user_id(
user.id, access_type user.id, access_type
) )
for knowledge_base in knowledge_bases: for knowledge_base in knowledge_bases:

View file

@ -44,16 +44,18 @@ async def get_knowledge(user=Depends(get_verified_user)):
knowledge_bases = [] knowledge_bases = []
if user.role == "admin" and ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS: if user.role == "admin" and ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS:
knowledge_bases = Knowledges.get_knowledge_bases() knowledge_bases = await Knowledges.get_knowledge_bases()
else: else:
knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(user.id, "read") knowledge_bases = await Knowledges.get_knowledge_bases_by_user_id(
user.id, "read"
)
# Get files for each knowledge base # Get files for each knowledge base
knowledge_with_files = [] knowledge_with_files = []
for knowledge_base in knowledge_bases: for knowledge_base in knowledge_bases:
files = [] files = []
if knowledge_base.data: if knowledge_base.data:
files = Files.get_file_metadatas_by_ids( files = await Files.get_file_metadatas_by_ids(
knowledge_base.data.get("file_ids", []) knowledge_base.data.get("file_ids", [])
) )
@ -71,11 +73,11 @@ async def get_knowledge(user=Depends(get_verified_user)):
file_ids.remove(missing_file) file_ids.remove(missing_file)
data["file_ids"] = file_ids data["file_ids"] = file_ids
Knowledges.update_knowledge_data_by_id( await Knowledges.update_knowledge_data_by_id(
id=knowledge_base.id, data=data id=knowledge_base.id, data=data
) )
files = Files.get_file_metadatas_by_ids(file_ids) files = await Files.get_file_metadatas_by_ids(file_ids)
knowledge_with_files.append( knowledge_with_files.append(
KnowledgeUserResponse( KnowledgeUserResponse(
@ -92,9 +94,11 @@ async def get_knowledge_list(user=Depends(get_verified_user)):
knowledge_bases = [] knowledge_bases = []
if user.role == "admin" and ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS: if user.role == "admin" and ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS:
knowledge_bases = Knowledges.get_knowledge_bases() knowledge_bases = await Knowledges.get_knowledge_bases()
else: else:
knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(user.id, "write") knowledge_bases = await Knowledges.get_knowledge_bases_by_user_id(
user.id, "write"
)
# Get files for each knowledge base # Get files for each knowledge base
knowledge_with_files = [] knowledge_with_files = []
@ -119,11 +123,11 @@ async def get_knowledge_list(user=Depends(get_verified_user)):
file_ids.remove(missing_file) file_ids.remove(missing_file)
data["file_ids"] = file_ids data["file_ids"] = file_ids
Knowledges.update_knowledge_data_by_id( await Knowledges.update_knowledge_data_by_id(
id=knowledge_base.id, data=data id=knowledge_base.id, data=data
) )
files = Files.get_file_metadatas_by_ids(file_ids) files = await Files.get_file_metadatas_by_ids(file_ids)
knowledge_with_files.append( knowledge_with_files.append(
KnowledgeUserResponse( KnowledgeUserResponse(
@ -151,7 +155,7 @@ async def create_new_knowledge(
detail=ERROR_MESSAGES.UNAUTHORIZED, detail=ERROR_MESSAGES.UNAUTHORIZED,
) )
knowledge = Knowledges.insert_new_knowledge(user.id, form_data) knowledge = await Knowledges.insert_new_knowledge(user.id, form_data)
if knowledge: if knowledge:
return knowledge return knowledge
@ -175,7 +179,7 @@ async def reindex_knowledge_files(request: Request, user=Depends(get_verified_us
detail=ERROR_MESSAGES.UNAUTHORIZED, detail=ERROR_MESSAGES.UNAUTHORIZED,
) )
knowledge_bases = Knowledges.get_knowledge_bases() knowledge_bases = await Knowledges.get_knowledge_bases()
log.info(f"Starting reindexing for {len(knowledge_bases)} knowledge bases") log.info(f"Starting reindexing for {len(knowledge_bases)} knowledge bases")
@ -188,7 +192,7 @@ async def reindex_knowledge_files(request: Request, user=Depends(get_verified_us
f"Knowledge base {knowledge_base.id} has no data or invalid data ({knowledge_base.data!r}). Deleting." f"Knowledge base {knowledge_base.id} has no data or invalid data ({knowledge_base.data!r}). Deleting."
) )
try: try:
Knowledges.delete_knowledge_by_id(id=knowledge_base.id) await Knowledges.delete_knowledge_by_id(id=knowledge_base.id)
deleted_knowledge_bases.append(knowledge_base.id) deleted_knowledge_bases.append(knowledge_base.id)
except Exception as e: except Exception as e:
log.error( log.error(
@ -254,7 +258,7 @@ class KnowledgeFilesResponse(KnowledgeResponse):
@router.get("/{id}", response_model=Optional[KnowledgeFilesResponse]) @router.get("/{id}", response_model=Optional[KnowledgeFilesResponse])
async def get_knowledge_by_id(id: str, user=Depends(get_verified_user)): async def get_knowledge_by_id(id: str, user=Depends(get_verified_user)):
knowledge = Knowledges.get_knowledge_by_id(id=id) knowledge = await Knowledges.get_knowledge_by_id(id=id)
if knowledge: if knowledge:
@ -289,7 +293,7 @@ async def update_knowledge_by_id(
form_data: KnowledgeForm, form_data: KnowledgeForm,
user=Depends(get_verified_user), user=Depends(get_verified_user),
): ):
knowledge = Knowledges.get_knowledge_by_id(id=id) knowledge = await Knowledges.get_knowledge_by_id(id=id)
if not knowledge: if not knowledge:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
@ -306,7 +310,7 @@ async def update_knowledge_by_id(
detail=ERROR_MESSAGES.ACCESS_PROHIBITED, detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
) )
knowledge = Knowledges.update_knowledge_by_id(id=id, form_data=form_data) knowledge = await Knowledges.update_knowledge_by_id(id=id, form_data=form_data)
if knowledge: if knowledge:
file_ids = knowledge.data.get("file_ids", []) if knowledge.data else [] file_ids = knowledge.data.get("file_ids", []) if knowledge.data else []
files = Files.get_files_by_ids(file_ids) files = Files.get_files_by_ids(file_ids)
@ -338,7 +342,7 @@ def add_file_to_knowledge_by_id(
form_data: KnowledgeFileIdForm, form_data: KnowledgeFileIdForm,
user=Depends(get_verified_user), user=Depends(get_verified_user),
): ):
knowledge = Knowledges.get_knowledge_by_id(id=id) knowledge = await Knowledges.get_knowledge_by_id(id=id)
if not knowledge: if not knowledge:
raise HTTPException( raise HTTPException(
@ -390,7 +394,7 @@ def add_file_to_knowledge_by_id(
file_ids.append(form_data.file_id) file_ids.append(form_data.file_id)
data["file_ids"] = file_ids data["file_ids"] = file_ids
knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data) knowledge = await Knowledges.update_knowledge_data_by_id(id=id, data=data)
if knowledge: if knowledge:
files = Files.get_file_metadatas_by_ids(file_ids) files = Files.get_file_metadatas_by_ids(file_ids)
@ -423,7 +427,7 @@ def update_file_from_knowledge_by_id(
form_data: KnowledgeFileIdForm, form_data: KnowledgeFileIdForm,
user=Depends(get_verified_user), user=Depends(get_verified_user),
): ):
knowledge = Knowledges.get_knowledge_by_id(id=id) knowledge = await Knowledges.get_knowledge_by_id(id=id)
if not knowledge: if not knowledge:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
@ -494,7 +498,7 @@ def remove_file_from_knowledge_by_id(
form_data: KnowledgeFileIdForm, form_data: KnowledgeFileIdForm,
user=Depends(get_verified_user), user=Depends(get_verified_user),
): ):
knowledge = Knowledges.get_knowledge_by_id(id=id) knowledge = await Knowledges.get_knowledge_by_id(id=id)
if not knowledge: if not knowledge:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
@ -549,7 +553,7 @@ def remove_file_from_knowledge_by_id(
file_ids.remove(form_data.file_id) file_ids.remove(form_data.file_id)
data["file_ids"] = file_ids data["file_ids"] = file_ids
knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data) knowledge = await Knowledges.update_knowledge_data_by_id(id=id, data=data)
if knowledge: if knowledge:
files = Files.get_file_metadatas_by_ids(file_ids) files = Files.get_file_metadatas_by_ids(file_ids)
@ -582,7 +586,7 @@ def remove_file_from_knowledge_by_id(
@router.delete("/{id}/delete", response_model=bool) @router.delete("/{id}/delete", response_model=bool)
async def delete_knowledge_by_id(id: str, user=Depends(get_verified_user)): async def delete_knowledge_by_id(id: str, user=Depends(get_verified_user)):
knowledge = Knowledges.get_knowledge_by_id(id=id) knowledge = await Knowledges.get_knowledge_by_id(id=id)
if not knowledge: if not knowledge:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
@ -634,7 +638,7 @@ async def delete_knowledge_by_id(id: str, user=Depends(get_verified_user)):
except Exception as e: except Exception as e:
log.debug(e) log.debug(e)
pass pass
result = Knowledges.delete_knowledge_by_id(id=id) result = await Knowledges.delete_knowledge_by_id(id=id)
return result return result
@ -645,7 +649,7 @@ async def delete_knowledge_by_id(id: str, user=Depends(get_verified_user)):
@router.post("/{id}/reset", response_model=Optional[KnowledgeResponse]) @router.post("/{id}/reset", response_model=Optional[KnowledgeResponse])
async def reset_knowledge_by_id(id: str, user=Depends(get_verified_user)): async def reset_knowledge_by_id(id: str, user=Depends(get_verified_user)):
knowledge = Knowledges.get_knowledge_by_id(id=id) knowledge = await Knowledges.get_knowledge_by_id(id=id)
if not knowledge: if not knowledge:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
@ -668,7 +672,9 @@ async def reset_knowledge_by_id(id: str, user=Depends(get_verified_user)):
log.debug(e) log.debug(e)
pass pass
knowledge = Knowledges.update_knowledge_data_by_id(id=id, data={"file_ids": []}) knowledge = await Knowledges.update_knowledge_data_by_id(
id=id, data={"file_ids": []}
)
return knowledge return knowledge
@ -688,7 +694,7 @@ def add_files_to_knowledge_batch(
""" """
Add multiple files to a knowledge base Add multiple files to a knowledge base
""" """
knowledge = Knowledges.get_knowledge_by_id(id=id) knowledge = await Knowledges.get_knowledge_by_id(id=id)
if not knowledge: if not knowledge:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
@ -741,7 +747,7 @@ def add_files_to_knowledge_batch(
existing_file_ids.append(file_id) existing_file_ids.append(file_id)
data["file_ids"] = existing_file_ids data["file_ids"] = existing_file_ids
knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data) knowledge = await Knowledges.update_knowledge_data_by_id(id=id, data=data)
# If there were any errors, include them in the response # If there were any errors, include them in the response
if result.errors: if result.errors:

View file

@ -2196,7 +2196,7 @@ def delete_entries_from_collection(form_data: DeleteForm, user=Depends(get_admin
@router.post("/reset/db") @router.post("/reset/db")
def reset_vector_db(user=Depends(get_admin_user)): def reset_vector_db(user=Depends(get_admin_user)):
VECTOR_DB_CLIENT.reset() VECTOR_DB_CLIENT.reset()
Knowledges.delete_all_knowledge() await Knowledges.delete_all_knowledge()
@router.post("/reset/uploads") @router.post("/reset/uploads")