open-webui/backend/open_webui/models/knowledge.py

371 lines
11 KiB
Python
Raw Normal View History

2024-10-02 00:35:35 +00:00
import json
import logging
import time
from typing import Optional
2024-10-02 04:32:59 +00:00
import uuid
2024-10-02 00:35:35 +00:00
2024-12-10 08:54:13 +00:00
from open_webui.internal.db import Base, get_db
2024-10-02 00:35:35 +00:00
from open_webui.env import SRC_LOG_LEVELS
2025-12-02 15:53:32 +00:00
from open_webui.models.files import File, FileModel, FileMetadataResponse
from open_webui.models.groups import Groups
2024-12-10 08:54:13 +00:00
from open_webui.models.users import Users, UserResponse
2024-10-02 00:35:35 +00:00
from pydantic import BaseModel, ConfigDict
2025-12-02 15:53:32 +00:00
from sqlalchemy import (
BigInteger,
Column,
ForeignKey,
String,
Text,
JSON,
UniqueConstraint,
)
2024-10-02 00:35:35 +00:00
2024-11-17 00:51:55 +00:00
from open_webui.utils.access_control import has_access
2024-10-02 00:35:35 +00:00
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["MODELS"])
####################
2024-10-02 05:45:04 +00:00
# Knowledge DB Schema
2024-10-02 00:35:35 +00:00
####################
2024-10-02 05:45:04 +00:00
class Knowledge(Base):
__tablename__ = "knowledge"
2024-10-02 00:35:35 +00:00
id = Column(Text, unique=True, primary_key=True)
user_id = Column(Text)
name = Column(Text)
description = Column(Text)
meta = Column(JSON, nullable=True)
2024-11-15 02:57:25 +00:00
access_control = Column(JSON, nullable=True) # Controls data access levels.
2024-11-15 04:13:43 +00:00
# Defines access control rules for this entry.
# - `None`: Public access, available to all users with the "user" role.
# - `{}`: Private access, restricted exclusively to the owner.
# - Custom permissions: Specific access control for reading and writing;
# Can specify group or user-level restrictions:
# {
# "read": {
# "group_ids": ["group_id1", "group_id2"],
# "user_ids": ["user_id1", "user_id2"]
# },
# "write": {
# "group_ids": ["group_id1", "group_id2"],
# "user_ids": ["user_id1", "user_id2"]
# }
# }
2024-11-15 02:57:25 +00:00
2024-10-02 00:35:35 +00:00
created_at = Column(BigInteger)
updated_at = Column(BigInteger)
2024-10-02 05:45:04 +00:00
class KnowledgeModel(BaseModel):
2024-10-02 00:35:35 +00:00
model_config = ConfigDict(from_attributes=True)
id: str
user_id: str
name: str
description: str
meta: Optional[dict] = None
2024-11-15 04:13:43 +00:00
access_control: Optional[dict] = None
2024-11-15 02:57:25 +00:00
2024-10-02 00:35:35 +00:00
created_at: int # timestamp in epoch
updated_at: int # timestamp in epoch
2025-12-02 15:53:32 +00:00
class KnowledgeFile(Base):
__tablename__ = "knowledge_file"
id = Column(Text, unique=True, primary_key=True)
knowledge_id = Column(
Text, ForeignKey("knowledge.id", ondelete="CASCADE"), nullable=False
)
file_id = Column(Text, ForeignKey("file.id", ondelete="CASCADE"), nullable=False)
user_id = Column(Text, nullable=False)
created_at = Column(BigInteger, nullable=False)
updated_at = Column(BigInteger, nullable=False)
__table_args__ = (
UniqueConstraint(
"knowledge_id", "file_id", name="uq_knowledge_file_knowledge_file"
),
)
class KnowledgeFileModel(BaseModel):
id: str
knowledge_id: str
file_id: str
user_id: str
created_at: int # timestamp in epoch
updated_at: int # timestamp in epoch
model_config = ConfigDict(from_attributes=True)
2024-10-02 00:35:35 +00:00
####################
# Forms
####################
2024-11-18 13:51:01 +00:00
class KnowledgeUserModel(KnowledgeModel):
user: Optional[UserResponse] = None
class KnowledgeResponse(KnowledgeModel):
files: Optional[list[FileMetadataResponse | dict]] = None
2024-11-17 04:47:45 +00:00
2024-10-02 00:35:35 +00:00
2024-11-18 13:51:01 +00:00
class KnowledgeUserResponse(KnowledgeUserModel):
files: Optional[list[FileMetadataResponse | dict]] = None
2024-10-02 00:35:35 +00:00
2024-10-02 05:45:04 +00:00
class KnowledgeForm(BaseModel):
2024-10-02 00:35:35 +00:00
name: str
description: str
2024-11-17 04:47:45 +00:00
access_control: Optional[dict] = None
2024-10-03 03:42:10 +00:00
2024-10-02 05:45:04 +00:00
class KnowledgeTable:
def insert_new_knowledge(
self, user_id: str, form_data: KnowledgeForm
) -> Optional[KnowledgeModel]:
2024-10-02 00:35:35 +00:00
with get_db() as db:
2024-10-02 05:45:04 +00:00
knowledge = KnowledgeModel(
2024-10-02 00:35:35 +00:00
**{
**form_data.model_dump(),
2024-10-02 04:32:59 +00:00
"id": str(uuid.uuid4()),
2024-10-02 00:35:35 +00:00
"user_id": user_id,
"created_at": int(time.time()),
"updated_at": int(time.time()),
}
)
try:
2024-10-02 05:45:04 +00:00
result = Knowledge(**knowledge.model_dump())
2024-10-02 00:35:35 +00:00
db.add(result)
db.commit()
db.refresh(result)
if result:
2024-10-02 05:45:04 +00:00
return KnowledgeModel.model_validate(result)
2024-10-02 00:35:35 +00:00
else:
return None
except Exception:
return None
2024-11-18 13:51:01 +00:00
def get_knowledge_bases(self) -> list[KnowledgeUserModel]:
2024-10-02 00:35:35 +00:00
with get_db() as db:
all_knowledge = (
db.query(Knowledge).order_by(Knowledge.updated_at.desc()).all()
)
user_ids = list(set(knowledge.user_id for knowledge in all_knowledge))
users = Users.get_users_by_user_ids(user_ids) if user_ids else []
users_dict = {user.id: user for user in users}
2024-11-20 00:47:35 +00:00
knowledge_bases = []
for knowledge in all_knowledge:
user = users_dict.get(knowledge.user_id)
2024-11-20 00:47:35 +00:00
knowledge_bases.append(
KnowledgeUserModel.model_validate(
{
**KnowledgeModel.model_validate(knowledge).model_dump(),
"user": user.model_dump() if user else None,
}
)
2024-11-18 13:51:01 +00:00
)
2024-11-20 00:47:35 +00:00
return knowledge_bases
2024-10-02 00:35:35 +00:00
def check_access_by_user_id(self, id, user_id, permission="write") -> bool:
knowledge = self.get_knowledge_by_id(id)
if not knowledge:
return False
if knowledge.user_id == user_id:
return True
user_group_ids = {group.id for group in Groups.get_groups_by_member_id(user_id)}
return has_access(user_id, permission, knowledge.access_control, user_group_ids)
2024-11-17 00:51:55 +00:00
def get_knowledge_bases_by_user_id(
self, user_id: str, permission: str = "write"
2024-11-18 13:51:01 +00:00
) -> list[KnowledgeUserModel]:
2024-11-17 00:51:55 +00:00
knowledge_bases = self.get_knowledge_bases()
user_group_ids = {group.id for group in Groups.get_groups_by_member_id(user_id)}
2024-11-17 00:51:55 +00:00
return [
knowledge_base
for knowledge_base in knowledge_bases
if knowledge_base.user_id == user_id
or has_access(
user_id, permission, knowledge_base.access_control, user_group_ids
)
2024-11-17 00:51:55 +00:00
]
2024-10-02 05:45:04 +00:00
def get_knowledge_by_id(self, id: str) -> Optional[KnowledgeModel]:
2024-10-02 00:35:35 +00:00
try:
with get_db() as db:
2024-10-02 05:45:04 +00:00
knowledge = db.query(Knowledge).filter_by(id=id).first()
return KnowledgeModel.model_validate(knowledge) if knowledge else None
2024-10-02 00:35:35 +00:00
except Exception:
return None
def get_knowledges_by_file_id(self, file_id: str) -> list[KnowledgeModel]:
try:
with get_db() as db:
knowledges = (
db.query(Knowledge)
.join(KnowledgeFile, Knowledge.id == KnowledgeFile.knowledge_id)
.filter(KnowledgeFile.file_id == file_id)
.all()
)
return [
KnowledgeModel.model_validate(knowledge) for knowledge in knowledges
]
except Exception:
return []
2025-12-02 15:53:32 +00:00
def get_files_by_id(self, knowledge_id: str) -> list[FileModel]:
try:
with get_db() as db:
files = (
db.query(File)
.join(KnowledgeFile, File.id == KnowledgeFile.file_id)
.filter(KnowledgeFile.knowledge_id == knowledge_id)
.all()
)
return [FileModel.model_validate(file) for file in files]
except Exception:
return []
def get_file_metadatas_by_id(self, knowledge_id: str) -> list[FileMetadataResponse]:
try:
with get_db() as db:
files = self.get_files_by_id(knowledge_id)
return [FileMetadataResponse(**file.model_dump()) for file in files]
except Exception:
return []
def add_file_to_knowledge_by_id(
self, knowledge_id: str, file_id: str, user_id: str
) -> Optional[KnowledgeFileModel]:
with get_db() as db:
knowledge_file = KnowledgeFileModel(
**{
"id": str(uuid.uuid4()),
"knowledge_id": knowledge_id,
"file_id": file_id,
"user_id": user_id,
"created_at": int(time.time()),
"updated_at": int(time.time()),
}
)
try:
result = KnowledgeFile(**knowledge_file.model_dump())
db.add(result)
db.commit()
db.refresh(result)
if result:
return KnowledgeFileModel.model_validate(result)
else:
return None
except Exception:
return None
def remove_file_from_knowledge_by_id(self, knowledge_id: str, file_id: str) -> bool:
try:
with get_db() as db:
db.query(KnowledgeFile).filter_by(
knowledge_id=knowledge_id, file_id=file_id
).delete()
db.commit()
return True
except Exception:
return False
def reset_knowledge_by_id(self, id: str) -> Optional[KnowledgeModel]:
try:
with get_db() as db:
# Delete all knowledge_file entries for this knowledge_id
db.query(KnowledgeFile).filter_by(knowledge_id=id).delete()
db.commit()
# Update the knowledge entry's updated_at timestamp
db.query(Knowledge).filter_by(id=id).update(
{
"updated_at": int(time.time()),
}
)
db.commit()
return self.get_knowledge_by_id(id=id)
except Exception as e:
log.exception(e)
return None
2024-10-02 05:45:04 +00:00
def update_knowledge_by_id(
2024-11-17 04:47:45 +00:00
self, id: str, form_data: KnowledgeForm, overwrite: bool = False
) -> Optional[KnowledgeModel]:
try:
with get_db() as db:
knowledge = self.get_knowledge_by_id(id=id)
db.query(Knowledge).filter_by(id=id).update(
{
**form_data.model_dump(),
"updated_at": int(time.time()),
}
)
db.commit()
return self.get_knowledge_by_id(id=id)
except Exception as e:
log.exception(e)
return None
def update_knowledge_data_by_id(
self, id: str, data: dict
2024-10-02 05:45:04 +00:00
) -> Optional[KnowledgeModel]:
2024-10-02 00:35:35 +00:00
try:
with get_db() as db:
2024-10-03 13:46:20 +00:00
knowledge = self.get_knowledge_by_id(id=id)
2024-10-02 05:45:04 +00:00
db.query(Knowledge).filter_by(id=id).update(
2024-10-02 00:35:35 +00:00
{
2024-11-17 04:47:45 +00:00
"data": data,
2024-10-03 03:42:10 +00:00
"updated_at": int(time.time()),
2024-10-02 00:35:35 +00:00
}
)
db.commit()
2024-10-03 03:42:10 +00:00
return self.get_knowledge_by_id(id=id)
2024-10-02 00:35:35 +00:00
except Exception as e:
log.exception(e)
return None
2024-10-02 05:45:04 +00:00
def delete_knowledge_by_id(self, id: str) -> bool:
2024-10-02 00:35:35 +00:00
try:
with get_db() as db:
2024-10-02 05:45:04 +00:00
db.query(Knowledge).filter_by(id=id).delete()
2024-10-02 00:35:35 +00:00
db.commit()
return True
except Exception:
return False
2024-10-13 10:02:02 +00:00
def delete_all_knowledge(self) -> bool:
with get_db() as db:
try:
db.query(Knowledge).delete()
db.commit()
return True
except Exception:
return False
2024-10-02 00:35:35 +00:00
2024-10-02 05:45:04 +00:00
Knowledges = KnowledgeTable()