diff --git a/.prettierignore b/.prettierignore index 82c4912572..83bbde598b 100644 --- a/.prettierignore +++ b/.prettierignore @@ -3,8 +3,6 @@ pnpm-lock.yaml package-lock.json yarn.lock -kubernetes/ - # Copy of .gitignore .DS_Store node_modules diff --git a/INSTALLATION.md b/INSTALLATION.md deleted file mode 100644 index 4298b173e9..0000000000 --- a/INSTALLATION.md +++ /dev/null @@ -1,35 +0,0 @@ -### Installing Both Ollama and Open WebUI Using Kustomize - -For cpu-only pod - -```bash -kubectl apply -f ./kubernetes/manifest/base -``` - -For gpu-enabled pod - -```bash -kubectl apply -k ./kubernetes/manifest -``` - -### Installing Both Ollama and Open WebUI Using Helm - -Package Helm file first - -```bash -helm package ./kubernetes/helm/ -``` - -For cpu-only pod - -```bash -helm install ollama-webui ./ollama-webui-*.tgz -``` - -For gpu-enabled pod - -```bash -helm install ollama-webui ./ollama-webui-*.tgz --set ollama.resources.limits.nvidia.com/gpu="1" -``` - -Check the `kubernetes/helm/values.yaml` file to know which parameters are available for customization diff --git a/LICENSE b/LICENSE index 3991050972..faa0129c65 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2023-2025 Timothy Jaeryang Baek (Open WebUI) +Copyright (c) 2023- Open WebUI Inc. [Created by Timothy Jaeryang Baek] All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 41e88df5d2..84c99841d4 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -629,6 +629,12 @@ OAUTH_ACCESS_TOKEN_REQUEST_INCLUDE_CLIENT_ID = ( == "true" ) +OAUTH_AUDIENCE = PersistentConfig( + "OAUTH_AUDIENCE", + "oauth.audience", + os.environ.get("OAUTH_AUDIENCE", ""), +) + def load_oauth_providers(): OAUTH_PROVIDERS.clear() @@ -1300,7 +1306,7 @@ USER_PERMISSIONS_WORKSPACE_MODELS_ALLOW_PUBLIC_SHARING = ( USER_PERMISSIONS_WORKSPACE_KNOWLEDGE_ALLOW_SHARING = ( os.environ.get( - "USER_PERMISSIONS_WORKSPACE_KNOWLEDGE_ALLOW_PUBLIC_SHARING", "False" + "USER_PERMISSIONS_WORKSPACE_KNOWLEDGE_ALLOW_SHARING", "False" ).lower() == "true" ) @@ -1339,7 +1345,7 @@ USER_PERMISSIONS_WORKSPACE_TOOLS_ALLOW_PUBLIC_SHARING = ( USER_PERMISSIONS_NOTES_ALLOW_SHARING = ( - os.environ.get("USER_PERMISSIONS_NOTES_ALLOW_PUBLIC_SHARING", "False").lower() + os.environ.get("USER_PERMISSIONS_NOTES_ALLOW_SHARING", "False").lower() == "true" ) @@ -2994,6 +3000,12 @@ WEB_LOADER_CONCURRENT_REQUESTS = PersistentConfig( int(os.getenv("WEB_LOADER_CONCURRENT_REQUESTS", "10")), ) +WEB_LOADER_TIMEOUT = PersistentConfig( + "WEB_LOADER_TIMEOUT", + "rag.web.loader.timeout", + os.getenv("WEB_LOADER_TIMEOUT", ""), +) + ENABLE_WEB_LOADER_SSL_VERIFICATION = PersistentConfig( "ENABLE_WEB_LOADER_SSL_VERIFICATION", diff --git a/backend/open_webui/env.py b/backend/open_webui/env.py index e3c50ea8d1..d49a79b3b1 100644 --- a/backend/open_webui/env.py +++ b/backend/open_webui/env.py @@ -395,6 +395,13 @@ try: except ValueError: REDIS_SENTINEL_MAX_RETRY_COUNT = 2 + +REDIS_SOCKET_CONNECT_TIMEOUT = os.environ.get("REDIS_SOCKET_CONNECT_TIMEOUT", "") +try: + REDIS_SOCKET_CONNECT_TIMEOUT = float(REDIS_SOCKET_CONNECT_TIMEOUT) +except ValueError: + REDIS_SOCKET_CONNECT_TIMEOUT = None + #################################### # UVICORN WORKERS #################################### @@ -620,9 +627,16 @@ ENABLE_WEBSOCKET_SUPPORT = ( WEBSOCKET_MANAGER = os.environ.get("WEBSOCKET_MANAGER", "") WEBSOCKET_REDIS_OPTIONS = os.environ.get("WEBSOCKET_REDIS_OPTIONS", "") + + if WEBSOCKET_REDIS_OPTIONS == "": - log.debug("No WEBSOCKET_REDIS_OPTIONS provided, defaulting to None") - WEBSOCKET_REDIS_OPTIONS = None + if REDIS_SOCKET_CONNECT_TIMEOUT: + WEBSOCKET_REDIS_OPTIONS = { + "socket_connect_timeout": REDIS_SOCKET_CONNECT_TIMEOUT + } + else: + log.debug("No WEBSOCKET_REDIS_OPTIONS provided, defaulting to None") + WEBSOCKET_REDIS_OPTIONS = None else: try: WEBSOCKET_REDIS_OPTIONS = json.loads(WEBSOCKET_REDIS_OPTIONS) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 21a1aee043..5609289166 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -208,6 +208,7 @@ from open_webui.config import ( FIRECRAWL_API_KEY, WEB_LOADER_ENGINE, WEB_LOADER_CONCURRENT_REQUESTS, + WEB_LOADER_TIMEOUT, WHISPER_MODEL, WHISPER_VAD_FILTER, WHISPER_LANGUAGE, @@ -922,6 +923,7 @@ app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = WEB_SEARCH_CONCURRENT_REQUESTS app.state.config.WEB_LOADER_ENGINE = WEB_LOADER_ENGINE app.state.config.WEB_LOADER_CONCURRENT_REQUESTS = WEB_LOADER_CONCURRENT_REQUESTS +app.state.config.WEB_LOADER_TIMEOUT = WEB_LOADER_TIMEOUT app.state.config.WEB_SEARCH_TRUST_ENV = WEB_SEARCH_TRUST_ENV app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( @@ -1031,6 +1033,7 @@ app.state.EMBEDDING_FUNCTION = get_embedding_function( if app.state.config.RAG_EMBEDDING_ENGINE == "azure_openai" else None ), + enable_async=app.state.config.ENABLE_ASYNC_EMBEDDING, ) app.state.RERANKING_FUNCTION = get_reranking_function( diff --git a/backend/open_webui/migrations/versions/6283dc0e4d8d_add_channel_file_table.py b/backend/open_webui/migrations/versions/6283dc0e4d8d_add_channel_file_table.py new file mode 100644 index 0000000000..59fe57a421 --- /dev/null +++ b/backend/open_webui/migrations/versions/6283dc0e4d8d_add_channel_file_table.py @@ -0,0 +1,54 @@ +"""Add channel file table + +Revision ID: 6283dc0e4d8d +Revises: 3e0e00844bb0 +Create Date: 2025-12-10 15:11:39.424601 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +import open_webui.internal.db + + +# revision identifiers, used by Alembic. +revision: str = "6283dc0e4d8d" +down_revision: Union[str, None] = "3e0e00844bb0" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.create_table( + "channel_file", + sa.Column("id", sa.Text(), primary_key=True), + sa.Column("user_id", sa.Text(), nullable=False), + sa.Column( + "channel_id", + sa.Text(), + sa.ForeignKey("channel.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column( + "file_id", + sa.Text(), + sa.ForeignKey("file.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column("created_at", sa.BigInteger(), nullable=False), + sa.Column("updated_at", sa.BigInteger(), nullable=False), + # indexes + sa.Index("ix_channel_file_channel_id", "channel_id"), + sa.Index("ix_channel_file_file_id", "file_id"), + sa.Index("ix_channel_file_user_id", "user_id"), + # unique constraints + sa.UniqueConstraint( + "channel_id", "file_id", name="uq_channel_file_channel_file" + ), # prevent duplicate entries + ) + + +def downgrade() -> None: + op.drop_table("channel_file") diff --git a/backend/open_webui/migrations/versions/81cc2ce44d79_update_channel_file_and_knowledge_table.py b/backend/open_webui/migrations/versions/81cc2ce44d79_update_channel_file_and_knowledge_table.py new file mode 100644 index 0000000000..181b280666 --- /dev/null +++ b/backend/open_webui/migrations/versions/81cc2ce44d79_update_channel_file_and_knowledge_table.py @@ -0,0 +1,49 @@ +"""Update channel file and knowledge table + +Revision ID: 81cc2ce44d79 +Revises: 6283dc0e4d8d +Create Date: 2025-12-10 16:07:58.001282 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +import open_webui.internal.db + + +# revision identifiers, used by Alembic. +revision: str = "81cc2ce44d79" +down_revision: Union[str, None] = "6283dc0e4d8d" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Add message_id column to channel_file table + with op.batch_alter_table("channel_file", schema=None) as batch_op: + batch_op.add_column( + sa.Column( + "message_id", + sa.Text(), + sa.ForeignKey( + "message.id", ondelete="CASCADE", name="fk_channel_file_message_id" + ), + nullable=True, + ) + ) + + # Add data column to knowledge table + with op.batch_alter_table("knowledge", schema=None) as batch_op: + batch_op.add_column(sa.Column("data", sa.JSON(), nullable=True)) + + +def downgrade() -> None: + # Remove message_id column from channel_file table + with op.batch_alter_table("channel_file", schema=None) as batch_op: + batch_op.drop_column("message_id") + + # Remove data column from knowledge table + with op.batch_alter_table("knowledge", schema=None) as batch_op: + batch_op.drop_column("data") diff --git a/backend/open_webui/models/channels.py b/backend/open_webui/models/channels.py index 754f6e3dfa..362222a284 100644 --- a/backend/open_webui/models/channels.py +++ b/backend/open_webui/models/channels.py @@ -10,7 +10,18 @@ from pydantic import BaseModel, ConfigDict from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy import BigInteger, Boolean, Column, String, Text, JSON, case, cast +from sqlalchemy import ( + BigInteger, + Boolean, + Column, + ForeignKey, + String, + Text, + JSON, + UniqueConstraint, + case, + cast, +) from sqlalchemy import or_, func, select, and_, text from sqlalchemy.sql import exists @@ -137,6 +148,41 @@ class ChannelMemberModel(BaseModel): updated_at: Optional[int] = None # timestamp in epoch (time_ns) +class ChannelFile(Base): + __tablename__ = "channel_file" + + id = Column(Text, unique=True, primary_key=True) + user_id = Column(Text, nullable=False) + + channel_id = Column( + Text, ForeignKey("channel.id", ondelete="CASCADE"), nullable=False + ) + message_id = Column( + Text, ForeignKey("message.id", ondelete="CASCADE"), nullable=True + ) + file_id = Column(Text, ForeignKey("file.id", ondelete="CASCADE"), nullable=False) + + created_at = Column(BigInteger, nullable=False) + updated_at = Column(BigInteger, nullable=False) + + __table_args__ = ( + UniqueConstraint("channel_id", "file_id", name="uq_channel_file_channel_file"), + ) + + +class ChannelFileModel(BaseModel): + model_config = ConfigDict(from_attributes=True) + + id: str + + channel_id: str + file_id: str + user_id: str + + created_at: int # timestamp in epoch (time_ns) + updated_at: int # timestamp in epoch (time_ns) + + class ChannelWebhook(Base): __tablename__ = "channel_webhook" @@ -642,6 +688,135 @@ class ChannelTable: channel = db.query(Channel).filter(Channel.id == id).first() return ChannelModel.model_validate(channel) if channel else None + def get_channels_by_file_id(self, file_id: str) -> list[ChannelModel]: + with get_db() as db: + channel_files = ( + db.query(ChannelFile).filter(ChannelFile.file_id == file_id).all() + ) + channel_ids = [cf.channel_id for cf in channel_files] + channels = db.query(Channel).filter(Channel.id.in_(channel_ids)).all() + return [ChannelModel.model_validate(channel) for channel in channels] + + def get_channels_by_file_id_and_user_id( + self, file_id: str, user_id: str + ) -> list[ChannelModel]: + with get_db() as db: + # 1. Determine which channels have this file + channel_file_rows = ( + db.query(ChannelFile).filter(ChannelFile.file_id == file_id).all() + ) + channel_ids = [row.channel_id for row in channel_file_rows] + + if not channel_ids: + return [] + + # 2. Load all channel rows that still exist + channels = ( + db.query(Channel) + .filter( + Channel.id.in_(channel_ids), + Channel.deleted_at.is_(None), + Channel.archived_at.is_(None), + ) + .all() + ) + if not channels: + return [] + + # Preload user's group membership + user_group_ids = [g.id for g in Groups.get_groups_by_member_id(user_id)] + + allowed_channels = [] + + for channel in channels: + # --- Case A: group or dm => user must be an active member --- + if channel.type in ["group", "dm"]: + membership = ( + db.query(ChannelMember) + .filter( + ChannelMember.channel_id == channel.id, + ChannelMember.user_id == user_id, + ChannelMember.is_active.is_(True), + ) + .first() + ) + if membership: + allowed_channels.append(ChannelModel.model_validate(channel)) + continue + + # --- Case B: standard channel => rely on ACL permissions --- + query = db.query(Channel).filter(Channel.id == channel.id) + + query = self._has_permission( + db, + query, + {"user_id": user_id, "group_ids": user_group_ids}, + permission="read", + ) + + allowed = query.first() + if allowed: + allowed_channels.append(ChannelModel.model_validate(allowed)) + + return allowed_channels + + def get_channel_by_id_and_user_id( + self, id: str, user_id: str + ) -> Optional[ChannelModel]: + with get_db() as db: + # Fetch the channel + channel: Channel = ( + db.query(Channel) + .filter( + Channel.id == id, + Channel.deleted_at.is_(None), + Channel.archived_at.is_(None), + ) + .first() + ) + + if not channel: + return None + + # If the channel is a group or dm, read access requires membership (active) + if channel.type in ["group", "dm"]: + membership = ( + db.query(ChannelMember) + .filter( + ChannelMember.channel_id == id, + ChannelMember.user_id == user_id, + ChannelMember.is_active.is_(True), + ) + .first() + ) + if membership: + return ChannelModel.model_validate(channel) + else: + return None + + # For channels that are NOT group/dm, fall back to ACL-based read access + query = db.query(Channel).filter(Channel.id == id) + + # Determine user groups + user_group_ids = [ + group.id for group in Groups.get_groups_by_member_id(user_id) + ] + + # Apply ACL rules + query = self._has_permission( + db, + query, + {"user_id": user_id, "group_ids": user_group_ids}, + permission="read", + ) + + channel_allowed = query.first() + return ( + ChannelModel.model_validate(channel_allowed) + if channel_allowed + else None + ) + def update_channel_by_id( self, id: str, form_data: ChannelForm ) -> Optional[ChannelModel]: @@ -663,6 +838,65 @@ class ChannelTable: db.commit() return ChannelModel.model_validate(channel) if channel else None + def add_file_to_channel_by_id( + self, channel_id: str, file_id: str, user_id: str + ) -> Optional[ChannelFileModel]: + with get_db() as db: + channel_file = ChannelFileModel( + **{ + "id": str(uuid.uuid4()), + "channel_id": channel_id, + "file_id": file_id, + "user_id": user_id, + "created_at": int(time.time()), + "updated_at": int(time.time()), + } + ) + + try: + result = ChannelFile(**channel_file.model_dump()) + db.add(result) + db.commit() + db.refresh(result) + if result: + return ChannelFileModel.model_validate(result) + else: + return None + except Exception: + return None + + def set_file_message_id_in_channel_by_id( + self, channel_id: str, file_id: str, message_id: str + ) -> bool: + try: + with get_db() as db: + channel_file = ( + db.query(ChannelFile) + .filter_by(channel_id=channel_id, file_id=file_id) + .first() + ) + if not channel_file: + return False + + channel_file.message_id = message_id + channel_file.updated_at = int(time.time()) + + db.commit() + return True + except Exception: + return False + + def remove_file_from_channel_by_id(self, channel_id: str, file_id: str) -> bool: + try: + with get_db() as db: + db.query(ChannelFile).filter_by( + channel_id=channel_id, file_id=file_id + ).delete() + db.commit() + return True + except Exception: + return False + def delete_channel_by_id(self, id: str): with get_db() as db: db.query(Channel).filter(Channel.id == id).delete() diff --git a/backend/open_webui/models/chats.py b/backend/open_webui/models/chats.py index 187a4522c9..381b625200 100644 --- a/backend/open_webui/models/chats.py +++ b/backend/open_webui/models/chats.py @@ -126,6 +126,49 @@ class ChatTitleIdResponse(BaseModel): created_at: int +class ChatListResponse(BaseModel): + items: list[ChatModel] + total: int + + +class ChatUsageStatsResponse(BaseModel): + id: str # chat id + + models: dict = {} # models used in the chat with their usage counts + message_count: int # number of messages in the chat + + history_models: dict = {} # models used in the chat history with their usage counts + history_message_count: int # number of messages in the chat history + history_user_message_count: int # number of user messages in the chat history + history_assistant_message_count: ( + int # number of assistant messages in the chat history + ) + + average_response_time: ( + float # average response time of assistant messages in seconds + ) + average_user_message_content_length: ( + float # average length of user message contents + ) + average_assistant_message_content_length: ( + float # average length of assistant message contents + ) + + tags: list[str] = [] # tags associated with the chat + + last_message_at: int # timestamp of the last message + updated_at: int + created_at: int + + model_config = ConfigDict(extra="allow") + + +class ChatUsageStatsListResponse(BaseModel): + items: list[ChatUsageStatsResponse] + total: int + model_config = ConfigDict(extra="allow") + + class ChatTable: def _clean_null_bytes(self, obj): """ @@ -675,14 +718,31 @@ class ChatTable: ) return [ChatModel.model_validate(chat) for chat in all_chats] - def get_chats_by_user_id(self, user_id: str) -> list[ChatModel]: + def get_chats_by_user_id( + self, user_id: str, skip: Optional[int] = None, limit: Optional[int] = None + ) -> ChatListResponse: with get_db() as db: - all_chats = ( + query = ( db.query(Chat) .filter_by(user_id=user_id) .order_by(Chat.updated_at.desc()) ) - return [ChatModel.model_validate(chat) for chat in all_chats] + + total = query.count() + + if skip is not None: + query = query.offset(skip) + if limit is not None: + query = query.limit(limit) + + all_chats = query.all() + + return ChatListResponse( + **{ + "items": [ChatModel.model_validate(chat) for chat in all_chats], + "total": total, + } + ) def get_pinned_chats_by_user_id(self, user_id: str) -> list[ChatModel]: with get_db() as db: diff --git a/backend/open_webui/models/files.py b/backend/open_webui/models/files.py index 1ed743df87..0eb106501a 100644 --- a/backend/open_webui/models/files.py +++ b/backend/open_webui/models/files.py @@ -104,6 +104,11 @@ class FileUpdateForm(BaseModel): meta: Optional[dict] = None +class FileListResponse(BaseModel): + items: list[FileModel] + total: int + + class FilesTable: def insert_new_file(self, user_id: str, form_data: FileForm) -> Optional[FileModel]: with get_db() as db: @@ -238,6 +243,7 @@ class FilesTable: try: file = db.query(File).filter_by(id=id).first() file.hash = hash + file.updated_at = int(time.time()) db.commit() return FileModel.model_validate(file) @@ -249,6 +255,7 @@ class FilesTable: try: file = db.query(File).filter_by(id=id).first() file.data = {**(file.data if file.data else {}), **data} + file.updated_at = int(time.time()) db.commit() return FileModel.model_validate(file) except Exception as e: @@ -260,6 +267,7 @@ class FilesTable: try: file = db.query(File).filter_by(id=id).first() file.meta = {**(file.meta if file.meta else {}), **meta} + file.updated_at = int(time.time()) db.commit() return FileModel.model_validate(file) except Exception: diff --git a/backend/open_webui/models/knowledge.py b/backend/open_webui/models/knowledge.py index 2c72401181..36dd0b4649 100644 --- a/backend/open_webui/models/knowledge.py +++ b/backend/open_webui/models/knowledge.py @@ -5,11 +5,17 @@ from typing import Optional import uuid from open_webui.internal.db import Base, get_db + from open_webui.env import SRC_LOG_LEVELS -from open_webui.models.files import File, FileModel, FileMetadataResponse +from open_webui.models.files import ( + File, + FileModel, + FileMetadataResponse, + FileModelResponse, +) from open_webui.models.groups import Groups -from open_webui.models.users import Users, UserResponse +from open_webui.models.users import User, UserModel, Users, UserResponse from pydantic import BaseModel, ConfigDict @@ -21,9 +27,12 @@ from sqlalchemy import ( Text, JSON, UniqueConstraint, + or_, ) from open_webui.utils.access_control import has_access +from open_webui.utils.db.access_control import has_permission + log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["MODELS"]) @@ -126,7 +135,7 @@ class KnowledgeResponse(KnowledgeModel): class KnowledgeUserResponse(KnowledgeUserModel): - files: Optional[list[FileMetadataResponse | dict]] = None + pass class KnowledgeForm(BaseModel): @@ -135,6 +144,20 @@ class KnowledgeForm(BaseModel): access_control: Optional[dict] = None +class FileUserResponse(FileModelResponse): + user: Optional[UserResponse] = None + + +class KnowledgeListResponse(BaseModel): + items: list[KnowledgeUserModel] + total: int + + +class KnowledgeFileListResponse(BaseModel): + items: list[FileUserResponse] + total: int + + class KnowledgeTable: def insert_new_knowledge( self, user_id: str, form_data: KnowledgeForm @@ -162,12 +185,13 @@ class KnowledgeTable: except Exception: return None - def get_knowledge_bases(self) -> list[KnowledgeUserModel]: + def get_knowledge_bases( + self, skip: int = 0, limit: int = 30 + ) -> list[KnowledgeUserModel]: with get_db() as db: all_knowledge = ( db.query(Knowledge).order_by(Knowledge.updated_at.desc()).all() ) - user_ids = list(set(knowledge.user_id for knowledge in all_knowledge)) users = Users.get_users_by_user_ids(user_ids) if user_ids else [] @@ -186,6 +210,126 @@ class KnowledgeTable: ) return knowledge_bases + def search_knowledge_bases( + self, user_id: str, filter: dict, skip: int = 0, limit: int = 30 + ) -> KnowledgeListResponse: + try: + with get_db() as db: + query = db.query(Knowledge, User).outerjoin( + User, User.id == Knowledge.user_id + ) + + if filter: + query_key = filter.get("query") + if query_key: + query = query.filter( + or_( + Knowledge.name.ilike(f"%{query_key}%"), + Knowledge.description.ilike(f"%{query_key}%"), + ) + ) + + view_option = filter.get("view_option") + if view_option == "created": + query = query.filter(Knowledge.user_id == user_id) + elif view_option == "shared": + query = query.filter(Knowledge.user_id != user_id) + + query = has_permission(db, Knowledge, query, filter) + + query = query.order_by(Knowledge.updated_at.desc()) + + total = query.count() + if skip: + query = query.offset(skip) + if limit: + query = query.limit(limit) + + items = query.all() + + knowledge_bases = [] + for knowledge_base, user in items: + knowledge_bases.append( + KnowledgeUserModel.model_validate( + { + **KnowledgeModel.model_validate( + knowledge_base + ).model_dump(), + "user": ( + UserModel.model_validate(user).model_dump() + if user + else None + ), + } + ) + ) + + return KnowledgeListResponse(items=knowledge_bases, total=total) + except Exception as e: + print(e) + return KnowledgeListResponse(items=[], total=0) + + def search_knowledge_files( + self, filter: dict, skip: int = 0, limit: int = 30 + ) -> KnowledgeFileListResponse: + """ + Scalable version: search files across all knowledge bases the user has + READ access to, without loading all KBs or using large IN() lists. + """ + try: + with get_db() as db: + # Base query: join Knowledge → KnowledgeFile → File + query = ( + db.query(File, User) + .join(KnowledgeFile, File.id == KnowledgeFile.file_id) + .join(Knowledge, KnowledgeFile.knowledge_id == Knowledge.id) + .outerjoin(User, User.id == KnowledgeFile.user_id) + ) + + # Apply access-control directly to the joined query + # This makes the database handle filtering, even with 10k+ KBs + query = has_permission(db, Knowledge, query, filter) + + # Apply filename search + if filter: + q = filter.get("query") + if q: + query = query.filter(File.filename.ilike(f"%{q}%")) + + # Order by file changes + query = query.order_by(File.updated_at.desc()) + + # Count before pagination + total = query.count() + + if skip: + query = query.offset(skip) + if limit: + query = query.limit(limit) + + rows = query.all() + + items = [] + for file, user in rows: + items.append( + FileUserResponse( + **FileModel.model_validate(file).model_dump(), + user=( + UserResponse( + **UserModel.model_validate(user).model_dump() + ) + if user + else None + ), + ) + ) + + return KnowledgeFileListResponse(items=items, total=total) + + except Exception as e: + print("search_knowledge_files error:", e) + return KnowledgeFileListResponse(items=[], total=0) + def check_access_by_user_id(self, id, user_id, permission="write") -> bool: knowledge = self.get_knowledge_by_id(id) if not knowledge: @@ -217,6 +361,21 @@ class KnowledgeTable: except Exception: return None + def get_knowledge_by_id_and_user_id( + self, id: str, user_id: str + ) -> Optional[KnowledgeModel]: + knowledge = self.get_knowledge_by_id(id) + if not knowledge: + return None + + if knowledge.user_id == user_id: + return knowledge + + user_group_ids = {group.id for group in Groups.get_groups_by_member_id(user_id)} + if has_access(user_id, "write", knowledge.access_control, user_group_ids): + return knowledge + return None + def get_knowledges_by_file_id(self, file_id: str) -> list[KnowledgeModel]: try: with get_db() as db: @@ -232,6 +391,88 @@ class KnowledgeTable: except Exception: return [] + def search_files_by_id( + self, + knowledge_id: str, + user_id: str, + filter: dict, + skip: int = 0, + limit: int = 30, + ) -> KnowledgeFileListResponse: + try: + with get_db() as db: + query = ( + db.query(File, User) + .join(KnowledgeFile, File.id == KnowledgeFile.file_id) + .outerjoin(User, User.id == KnowledgeFile.user_id) + .filter(KnowledgeFile.knowledge_id == knowledge_id) + ) + + if filter: + query_key = filter.get("query") + if query_key: + query = query.filter(or_(File.filename.ilike(f"%{query_key}%"))) + + view_option = filter.get("view_option") + if view_option == "created": + query = query.filter(KnowledgeFile.user_id == user_id) + elif view_option == "shared": + query = query.filter(KnowledgeFile.user_id != user_id) + + order_by = filter.get("order_by") + direction = filter.get("direction") + + if order_by == "name": + if direction == "asc": + query = query.order_by(File.filename.asc()) + else: + query = query.order_by(File.filename.desc()) + elif order_by == "created_at": + if direction == "asc": + query = query.order_by(File.created_at.asc()) + else: + query = query.order_by(File.created_at.desc()) + elif order_by == "updated_at": + if direction == "asc": + query = query.order_by(File.updated_at.asc()) + else: + query = query.order_by(File.updated_at.desc()) + else: + query = query.order_by(File.updated_at.desc()) + + else: + query = query.order_by(File.updated_at.desc()) + + # Count BEFORE pagination + total = query.count() + + if skip: + query = query.offset(skip) + if limit: + query = query.limit(limit) + + items = query.all() + + files = [] + for file, user in items: + files.append( + FileUserResponse( + **FileModel.model_validate(file).model_dump(), + user=( + UserResponse( + **UserModel.model_validate(user).model_dump() + ) + if user + else None + ), + ) + ) + + return KnowledgeFileListResponse(items=files, total=total) + except Exception as e: + print(e) + return KnowledgeFileListResponse(items=[], total=0) + def get_files_by_id(self, knowledge_id: str) -> list[FileModel]: try: with get_db() as db: diff --git a/backend/open_webui/models/messages.py b/backend/open_webui/models/messages.py index 98be21463d..5b068b6449 100644 --- a/backend/open_webui/models/messages.py +++ b/backend/open_webui/models/messages.py @@ -9,7 +9,7 @@ from open_webui.models.users import Users, User, UserNameResponse from open_webui.models.channels import Channels, ChannelMember -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, field_validator from sqlalchemy import BigInteger, Boolean, Column, String, Text, JSON from sqlalchemy import or_, func, select, and_, text from sqlalchemy.sql import exists @@ -108,11 +108,24 @@ class MessageUserResponse(MessageModel): user: Optional[UserNameResponse] = None +class MessageUserSlimResponse(MessageUserResponse): + data: bool | None = None + + @field_validator("data", mode="before") + def convert_data_to_bool(cls, v): + # No data or not a dict → False + if not isinstance(v, dict): + return False + + # True if ANY value in the dict is non-empty + return any(bool(val) for val in v.values()) + + class MessageReplyToResponse(MessageUserResponse): - reply_to_message: Optional[MessageUserResponse] = None + reply_to_message: Optional[MessageUserSlimResponse] = None -class MessageWithReactionsResponse(MessageUserResponse): +class MessageWithReactionsResponse(MessageUserSlimResponse): reactions: list[Reactions] diff --git a/backend/open_webui/models/notes.py b/backend/open_webui/models/notes.py index af75fab598..cfeddf4a8c 100644 --- a/backend/open_webui/models/notes.py +++ b/backend/open_webui/models/notes.py @@ -7,12 +7,15 @@ from functools import lru_cache from open_webui.internal.db import Base, get_db from open_webui.models.groups import Groups from open_webui.utils.access_control import has_access -from open_webui.models.users import Users, UserResponse +from open_webui.models.users import User, UserModel, Users, UserResponse from pydantic import BaseModel, ConfigDict from sqlalchemy import BigInteger, Boolean, Column, String, Text, JSON -from sqlalchemy import or_, func, select, and_, text +from sqlalchemy.dialects.postgresql import JSONB + + +from sqlalchemy import or_, func, select, and_, text, cast, or_, and_, func from sqlalchemy.sql import exists #################### @@ -75,7 +78,138 @@ class NoteUserResponse(NoteModel): user: Optional[UserResponse] = None +class NoteItemResponse(BaseModel): + id: str + title: str + data: Optional[dict] + updated_at: int + created_at: int + user: Optional[UserResponse] = None + + +class NoteListResponse(BaseModel): + items: list[NoteUserResponse] + total: int + + class NoteTable: + def _has_permission(self, db, query, filter: dict, permission: str = "read"): + group_ids = filter.get("group_ids", []) + user_id = filter.get("user_id") + dialect_name = db.bind.dialect.name + + conditions = [] + + # Handle read_only permission separately + if permission == "read_only": + # For read_only, we want items where: + # 1. User has explicit read permission (via groups or user-level) + # 2. BUT does NOT have write permission + # 3. Public items are NOT considered read_only + + read_conditions = [] + + # Group-level read permission + if group_ids: + group_read_conditions = [] + for gid in group_ids: + if dialect_name == "sqlite": + group_read_conditions.append( + Note.access_control["read"]["group_ids"].contains([gid]) + ) + elif dialect_name == "postgresql": + group_read_conditions.append( + cast( + Note.access_control["read"]["group_ids"], + JSONB, + ).contains([gid]) + ) + + if group_read_conditions: + read_conditions.append(or_(*group_read_conditions)) + + # Combine read conditions + if read_conditions: + has_read = or_(*read_conditions) + else: + # If no read conditions, return empty result + return query.filter(False) + + # Now exclude items where user has write permission + write_exclusions = [] + + # Exclude items owned by user (they have implicit write) + if user_id: + write_exclusions.append(Note.user_id != user_id) + + # Exclude items where user has explicit write permission via groups + if group_ids: + group_write_conditions = [] + for gid in group_ids: + if dialect_name == "sqlite": + group_write_conditions.append( + Note.access_control["write"]["group_ids"].contains([gid]) + ) + elif dialect_name == "postgresql": + group_write_conditions.append( + cast( + Note.access_control["write"]["group_ids"], + JSONB, + ).contains([gid]) + ) + + if group_write_conditions: + # User should NOT have write permission + write_exclusions.append(~or_(*group_write_conditions)) + + # Exclude public items (items without access_control) + write_exclusions.append(Note.access_control.isnot(None)) + write_exclusions.append(cast(Note.access_control, String) != "null") + + # Combine: has read AND does not have write AND not public + if write_exclusions: + query = query.filter(and_(has_read, *write_exclusions)) + else: + query = query.filter(has_read) + + return query + + # Original logic for other permissions (read, write, etc.) + # Public access conditions + if group_ids or user_id: + conditions.extend( + [ + Note.access_control.is_(None), + cast(Note.access_control, String) == "null", + ] + ) + + # User-level permission (owner has all permissions) + if user_id: + conditions.append(Note.user_id == user_id) + + # Group-level permission + if group_ids: + group_conditions = [] + for gid in group_ids: + if dialect_name == "sqlite": + group_conditions.append( + Note.access_control[permission]["group_ids"].contains([gid]) + ) + elif dialect_name == "postgresql": + group_conditions.append( + cast( + Note.access_control[permission]["group_ids"], + JSONB, + ).contains([gid]) + ) + conditions.append(or_(*group_conditions)) + + if conditions: + query = query.filter(or_(*conditions)) + + return query + def insert_new_note( self, form_data: NoteForm, @@ -110,15 +244,107 @@ class NoteTable: notes = query.all() return [NoteModel.model_validate(note) for note in notes] + def search_notes( + self, user_id: str, filter: dict = {}, skip: int = 0, limit: int = 30 + ) -> NoteListResponse: + with get_db() as db: + query = db.query(Note, User).outerjoin(User, User.id == Note.user_id) + if filter: + query_key = filter.get("query") + if query_key: + query = query.filter( + or_( + Note.title.ilike(f"%{query_key}%"), + cast(Note.data["content"]["md"], Text).ilike( + f"%{query_key}%" + ), + ) + ) + + view_option = filter.get("view_option") + if view_option == "created": + query = query.filter(Note.user_id == user_id) + elif view_option == "shared": + query = query.filter(Note.user_id != user_id) + + # Apply access control filtering + if "permission" in filter: + permission = filter["permission"] + else: + permission = "write" + + query = self._has_permission( + db, + query, + filter, + permission=permission, + ) + + order_by = filter.get("order_by") + direction = filter.get("direction") + + if order_by == "name": + if direction == "asc": + query = query.order_by(Note.title.asc()) + else: + query = query.order_by(Note.title.desc()) + elif order_by == "created_at": + if direction == "asc": + query = query.order_by(Note.created_at.asc()) + else: + query = query.order_by(Note.created_at.desc()) + elif order_by == "updated_at": + if direction == "asc": + query = query.order_by(Note.updated_at.asc()) + else: + query = query.order_by(Note.updated_at.desc()) + else: + query = query.order_by(Note.updated_at.desc()) + + else: + query = query.order_by(Note.updated_at.desc()) + + # Count BEFORE pagination + total = query.count() + + if skip: + query = query.offset(skip) + if limit: + query = query.limit(limit) + + items = query.all() + + notes = [] + for note, user in items: + notes.append( + NoteUserResponse( + **NoteModel.model_validate(note).model_dump(), + user=( + UserResponse(**UserModel.model_validate(user).model_dump()) + if user + else None + ), + ) + ) + + return NoteListResponse(items=notes, total=total) + def get_notes_by_user_id( self, user_id: str, + permission: str = "read", skip: Optional[int] = None, limit: Optional[int] = None, ) -> list[NoteModel]: with get_db() as db: - query = db.query(Note).filter(Note.user_id == user_id) - query = query.order_by(Note.updated_at.desc()) + user_group_ids = [ + group.id for group in Groups.get_groups_by_member_id(user_id) + ] + + query = db.query(Note).order_by(Note.updated_at.desc()) + query = self._has_permission( + db, query, {"user_id": user_id, "group_ids": user_group_ids}, permission + ) if skip is not None: query = query.offset(skip) @@ -128,56 +354,6 @@ class NoteTable: notes = query.all() return [NoteModel.model_validate(note) for note in notes] - def get_notes_by_permission( - self, - user_id: str, - permission: str = "write", - skip: Optional[int] = None, - limit: Optional[int] = None, - ) -> list[NoteModel]: - with get_db() as db: - user_groups = Groups.get_groups_by_member_id(user_id) - user_group_ids = {group.id for group in user_groups} - - # Order newest-first. We stream to keep memory usage low. - query = ( - db.query(Note) - .order_by(Note.updated_at.desc()) - .execution_options(stream_results=True) - .yield_per(256) - ) - - results: list[NoteModel] = [] - n_skipped = 0 - - for note in query: - # Fast-pass #1: owner - if note.user_id == user_id: - permitted = True - # Fast-pass #2: public/open - elif note.access_control is None: - # Technically this should mean public access for both read and write, but we'll only do read for now - # We might want to change this behavior later - permitted = permission == "read" - else: - permitted = has_access( - user_id, permission, note.access_control, user_group_ids - ) - - if not permitted: - continue - - # Apply skip AFTER permission filtering so it counts only accessible notes - if skip and n_skipped < skip: - n_skipped += 1 - continue - - results.append(NoteModel.model_validate(note)) - if limit is not None and len(results) >= limit: - break - - return results - def get_note_by_id(self, id: str) -> Optional[NoteModel]: with get_db() as db: note = db.query(Note).filter(Note.id == id).first() diff --git a/backend/open_webui/models/users.py b/backend/open_webui/models/users.py index 86f9d011e8..5807603a89 100644 --- a/backend/open_webui/models/users.py +++ b/backend/open_webui/models/users.py @@ -5,11 +5,11 @@ from open_webui.internal.db import Base, JSONField, get_db from open_webui.env import DATABASE_USER_ACTIVE_STATUS_UPDATE_INTERVAL + from open_webui.models.chats import Chats from open_webui.models.groups import Groups, GroupMember from open_webui.models.channels import ChannelMember - from open_webui.utils.misc import throttle diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index 1346cd065c..969ced62e0 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -144,19 +144,17 @@ class DoclingLoader: with open(self.file_path, "rb") as f: headers = {} if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" - - files = { - "files": ( - self.file_path, - f, - self.mime_type or "application/octet-stream", - ) - } + headers["X-Api-Key"] = f"Bearer {self.api_key}" r = requests.post( f"{self.url}/v1/convert/file", - files=files, + files={ + "files": ( + self.file_path, + f, + self.mime_type or "application/octet-stream", + ) + }, data={ "image_export_mode": "placeholder", **self.params, diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index bdbde0b3a9..ec0a8d8ed7 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -33,6 +33,7 @@ from open_webui.config import ( PLAYWRIGHT_WS_URL, PLAYWRIGHT_TIMEOUT, WEB_LOADER_ENGINE, + WEB_LOADER_TIMEOUT, FIRECRAWL_API_BASE_URL, FIRECRAWL_API_KEY, TAVILY_API_KEY, @@ -674,6 +675,20 @@ def get_web_loader( if WEB_LOADER_ENGINE.value == "" or WEB_LOADER_ENGINE.value == "safe_web": WebLoaderClass = SafeWebBaseLoader + + request_kwargs = {} + if WEB_LOADER_TIMEOUT.value: + try: + timeout_value = float(WEB_LOADER_TIMEOUT.value) + except ValueError: + timeout_value = None + + if timeout_value: + request_kwargs["timeout"] = timeout_value + + if request_kwargs: + web_loader_args["requests_kwargs"] = request_kwargs + if WEB_LOADER_ENGINE.value == "playwright": WebLoaderClass = SafePlaywrightURLLoader web_loader_args["playwright_timeout"] = PLAYWRIGHT_TIMEOUT.value diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py index 9c84f9c704..7caf57b0aa 100644 --- a/backend/open_webui/routers/audio.py +++ b/backend/open_webui/routers/audio.py @@ -33,6 +33,7 @@ from fastapi.responses import FileResponse from pydantic import BaseModel +from open_webui.utils.misc import strict_match_mime_type from open_webui.utils.auth import get_admin_user, get_verified_user from open_webui.utils.headers import include_user_info_headers from open_webui.config import ( @@ -1155,17 +1156,9 @@ def transcription( stt_supported_content_types = getattr( request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", [] - ) + ) or ["audio/*", "video/webm"] - if not any( - fnmatch(file.content_type, content_type) - for content_type in ( - stt_supported_content_types - if stt_supported_content_types - and any(t.strip() for t in stt_supported_content_types) - else ["audio/*", "video/webm"] - ) - ): + if not strict_match_mime_type(stt_supported_content_types, file.content_type): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED, diff --git a/backend/open_webui/routers/channels.py b/backend/open_webui/routers/channels.py index 0dff67da3e..2dbb047231 100644 --- a/backend/open_webui/routers/channels.py +++ b/backend/open_webui/routers/channels.py @@ -5,7 +5,7 @@ from typing import Optional from fastapi import APIRouter, Depends, HTTPException, Request, status, BackgroundTasks from pydantic import BaseModel - +from pydantic import field_validator from open_webui.socket.main import ( emit_to_users, @@ -39,6 +39,8 @@ from open_webui.models.messages import ( ) +from open_webui.utils.files import get_image_base64_from_file_id + from open_webui.config import ENABLE_ADMIN_CHAT_ACCESS, ENABLE_ADMIN_EXPORT from open_webui.constants import ERROR_MESSAGES from open_webui.env import SRC_LOG_LEVELS @@ -666,7 +668,16 @@ async def delete_channel_by_id( class MessageUserResponse(MessageResponse): - pass + data: bool | None = None + + @field_validator("data", mode="before") + def convert_data_to_bool(cls, v): + # No data or not a dict → False + if not isinstance(v, dict): + return False + + # True if ANY value in the dict is non-empty + return any(bool(val) for val in v.values()) @router.get("/{id}/messages", response_model=list[MessageUserResponse]) @@ -906,6 +917,10 @@ async def model_response_handler(request, channel, message, user): for file in thread_message_files: if file.get("type", "") == "image": images.append(file.get("url", "")) + elif file.get("content_type", "").startswith("image/"): + image = get_image_base64_from_file_id(file.get("id", "")) + if image: + images.append(image) thread_history_string = "\n\n".join(thread_history) system_message = { @@ -1078,6 +1093,15 @@ async def post_new_message( try: message, channel = await new_message_handler(request, id, form_data, user) + try: + if files := message.data.get("files", []): + for file in files: + Channels.set_file_message_id_in_channel_by_id( + channel.id, file.get("id", ""), message.id + ) + except Exception as e: + log.debug(e) + active_user_ids = get_user_ids_from_room(f"channel:{channel.id}") async def background_handler(): @@ -1108,7 +1132,7 @@ async def post_new_message( ############################ -@router.get("/{id}/messages/{message_id}", response_model=Optional[MessageUserResponse]) +@router.get("/{id}/messages/{message_id}", response_model=Optional[MessageResponse]) async def get_channel_message( id: str, message_id: str, user=Depends(get_verified_user) ): @@ -1142,7 +1166,7 @@ async def get_channel_message( status_code=status.HTTP_400_BAD_REQUEST, detail=ERROR_MESSAGES.DEFAULT() ) - return MessageUserResponse( + return MessageResponse( **{ **message.model_dump(), "user": UserNameResponse( @@ -1152,6 +1176,48 @@ async def get_channel_message( ) +############################ +# GetChannelMessageData +############################ + + +@router.get("/{id}/messages/{message_id}/data", response_model=Optional[dict]) +async def get_channel_message_data( + id: str, message_id: str, user=Depends(get_verified_user) +): + channel = Channels.get_channel_by_id(id) + if not channel: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, detail=ERROR_MESSAGES.NOT_FOUND + ) + + if channel.type in ["group", "dm"]: + if not Channels.is_user_channel_member(channel.id, user.id): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, detail=ERROR_MESSAGES.DEFAULT() + ) + else: + if user.role != "admin" and not has_access( + user.id, type="read", access_control=channel.access_control + ): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, detail=ERROR_MESSAGES.DEFAULT() + ) + + message = Messages.get_message_by_id(message_id) + if not message: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, detail=ERROR_MESSAGES.NOT_FOUND + ) + + if message.channel_id != id: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail=ERROR_MESSAGES.DEFAULT() + ) + + return message.data + + ############################ # PinChannelMessage ############################ diff --git a/backend/open_webui/routers/chats.py b/backend/open_webui/routers/chats.py index 78cd8bdb1a..1b0433587e 100644 --- a/backend/open_webui/routers/chats.py +++ b/backend/open_webui/routers/chats.py @@ -3,10 +3,12 @@ import logging from typing import Optional +from open_webui.utils.misc import get_message_list from open_webui.socket.main import get_event_emitter from open_webui.models.chats import ( ChatForm, ChatImportForm, + ChatUsageStatsListResponse, ChatsImportForm, ChatResponse, Chats, @@ -66,6 +68,132 @@ def get_session_user_chat_list( ) +############################ +# GetChatUsageStats +# EXPERIMENTAL: may be removed in future releases +############################ + + +@router.get("/stats/usage", response_model=ChatUsageStatsListResponse) +def get_session_user_chat_usage_stats( + items_per_page: Optional[int] = 50, + page: Optional[int] = 1, + user=Depends(get_verified_user), +): + try: + limit = items_per_page + skip = (page - 1) * limit + + result = Chats.get_chats_by_user_id(user.id, skip=skip, limit=limit) + + chats = result.items + total = result.total + + chat_stats = [] + for chat in chats: + messages_map = chat.chat.get("history", {}).get("messages", {}) + message_id = chat.chat.get("history", {}).get("currentId") + + if messages_map and message_id: + try: + history_models = {} + history_message_count = len(messages_map) + history_user_messages = [] + history_assistant_messages = [] + + for message in messages_map.values(): + if message.get("role", "") == "user": + history_user_messages.append(message) + elif message.get("role", "") == "assistant": + history_assistant_messages.append(message) + model = message.get("model", None) + if model: + if model not in history_models: + history_models[model] = 0 + history_models[model] += 1 + + average_user_message_content_length = ( + sum( + len(message.get("content", "")) + for message in history_user_messages + ) + / len(history_user_messages) + if len(history_user_messages) > 0 + else 0 + ) + average_assistant_message_content_length = ( + sum( + len(message.get("content", "")) + for message in history_assistant_messages + ) + / len(history_assistant_messages) + if len(history_assistant_messages) > 0 + else 0 + ) + + response_times = [] + for message in history_assistant_messages: + user_message_id = message.get("parentId", None) + if user_message_id and user_message_id in messages_map: + user_message = messages_map[user_message_id] + response_time = message.get( + "timestamp", 0 + ) - user_message.get("timestamp", 0) + + response_times.append(response_time) + + average_response_time = ( + sum(response_times) / len(response_times) + if len(response_times) > 0 + else 0 + ) + + message_list = get_message_list(messages_map, message_id) + message_count = len(message_list) + + models = {} + for message in reversed(message_list): + if message.get("role") == "assistant": + model = message.get("model", None) + if model: + if model not in models: + models[model] = 0 + models[model] += 1 + + annotation = message.get("annotation", {}) + + chat_stats.append( + { + "id": chat.id, + "models": models, + "message_count": message_count, + "history_models": history_models, + "history_message_count": history_message_count, + "history_user_message_count": len(history_user_messages), + "history_assistant_message_count": len( + history_assistant_messages + ), + "average_response_time": average_response_time, + "average_user_message_content_length": average_user_message_content_length, + "average_assistant_message_content_length": average_assistant_message_content_length, + "tags": chat.meta.get("tags", []), + "last_message_at": message_list[-1].get("timestamp", None), + "updated_at": chat.updated_at, + "created_at": chat.created_at, + } + ) + except Exception as e: + pass + + return ChatUsageStatsListResponse(items=chat_stats, total=total) + + except Exception as e: + log.exception(e) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail=ERROR_MESSAGES.DEFAULT() + ) + + ############################ # DeleteAllChats ############################ diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index 8af921bc7a..6eb7a19cbc 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -27,6 +27,7 @@ from open_webui.constants import ERROR_MESSAGES from open_webui.env import SRC_LOG_LEVELS from open_webui.retrieval.vector.factory import VECTOR_DB_CLIENT +from open_webui.models.channels import Channels from open_webui.models.users import Users from open_webui.models.files import ( FileForm, @@ -38,7 +39,6 @@ from open_webui.models.knowledge import Knowledges from open_webui.models.groups import Groups -from open_webui.routers.knowledge import get_knowledge, get_knowledge_list from open_webui.routers.retrieval import ProcessFileForm, process_file from open_webui.routers.audio import transcribe @@ -47,7 +47,7 @@ from open_webui.storage.provider import Storage from open_webui.utils.auth import get_admin_user, get_verified_user from open_webui.utils.access_control import has_access - +from open_webui.utils.misc import strict_match_mime_type from pydantic import BaseModel log = logging.getLogger(__name__) @@ -91,6 +91,10 @@ def has_access_to_file( if knowledge_base.id == knowledge_base_id: return True + channels = Channels.get_channels_by_file_id_and_user_id(file_id, user.id) + if access_type == "read" and channels: + return True + return False @@ -104,17 +108,9 @@ def process_uploaded_file(request, file, file_path, file_item, file_metadata, us if file.content_type: stt_supported_content_types = getattr( request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", [] - ) + ) or ["audio/*", "video/webm"] - if any( - fnmatch(file.content_type, content_type) - for content_type in ( - stt_supported_content_types - if stt_supported_content_types - and any(t.strip() for t in stt_supported_content_types) - else ["audio/*", "video/webm"] - ) - ): + if strict_match_mime_type(stt_supported_content_types, file.content_type): file_path = Storage.get_file(file_path) result = transcribe(request, file_path, file_metadata, user) @@ -138,6 +134,7 @@ def process_uploaded_file(request, file, file_path, file_item, file_metadata, us f"File type {file.content_type} is not provided, but trying to process anyway" ) process_file(request, ProcessFileForm(file_id=file_item.id), user=user) + except Exception as e: log.error(f"Error processing file: {file_item.id}") Files.update_file_data_by_id( @@ -179,7 +176,7 @@ def upload_file_handler( user=Depends(get_verified_user), background_tasks: Optional[BackgroundTasks] = None, ): - log.info(f"file.content_type: {file.content_type}") + log.info(f"file.content_type: {file.content_type} {process}") if isinstance(metadata, str): try: @@ -247,6 +244,13 @@ def upload_file_handler( ), ) + if "channel_id" in file_metadata: + channel = Channels.get_channel_by_id_and_user_id( + file_metadata["channel_id"], user.id + ) + if channel: + Channels.add_file_to_channel_by_id(channel.id, file_item.id, user.id) + if process: if background_tasks and process_in_background: background_tasks.add_task( diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 3bfc961ac3..0801deb470 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -4,7 +4,9 @@ from fastapi import APIRouter, Depends, HTTPException, status, Request, Query from fastapi.concurrency import run_in_threadpool import logging +from open_webui.models.groups import Groups from open_webui.models.knowledge import ( + KnowledgeFileListResponse, Knowledges, KnowledgeForm, KnowledgeResponse, @@ -39,41 +41,115 @@ router = APIRouter() # getKnowledgeBases ############################ - -@router.get("/", response_model=list[KnowledgeUserResponse]) -async def get_knowledge(user=Depends(get_verified_user)): - # Return knowledge bases with read access - knowledge_bases = [] - if user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL: - knowledge_bases = Knowledges.get_knowledge_bases() - else: - knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(user.id, "read") - - return [ - KnowledgeUserResponse( - **knowledge_base.model_dump(), - files=Knowledges.get_file_metadatas_by_id(knowledge_base.id), - ) - for knowledge_base in knowledge_bases - ] +PAGE_ITEM_COUNT = 30 -@router.get("/list", response_model=list[KnowledgeUserResponse]) -async def get_knowledge_list(user=Depends(get_verified_user)): - # Return knowledge bases with write access - knowledge_bases = [] - if user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL: - knowledge_bases = Knowledges.get_knowledge_bases() - else: - knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(user.id, "write") +class KnowledgeAccessResponse(KnowledgeUserResponse): + write_access: Optional[bool] = False - return [ - KnowledgeUserResponse( - **knowledge_base.model_dump(), - files=Knowledges.get_file_metadatas_by_id(knowledge_base.id), - ) - for knowledge_base in knowledge_bases - ] + +class KnowledgeAccessListResponse(BaseModel): + items: list[KnowledgeAccessResponse] + total: int + + +@router.get("/", response_model=KnowledgeAccessListResponse) +async def get_knowledge_bases(page: Optional[int] = 1, user=Depends(get_verified_user)): + page = max(page, 1) + limit = PAGE_ITEM_COUNT + skip = (page - 1) * limit + + filter = {} + if not user.role == "admin" or not BYPASS_ADMIN_ACCESS_CONTROL: + groups = Groups.get_groups_by_member_id(user.id) + if groups: + filter["group_ids"] = [group.id for group in groups] + + filter["user_id"] = user.id + + result = Knowledges.search_knowledge_bases( + user.id, filter=filter, skip=skip, limit=limit + ) + + return KnowledgeAccessListResponse( + items=[ + KnowledgeAccessResponse( + **knowledge_base.model_dump(), + write_access=( + user.id == knowledge_base.user_id + or has_access(user.id, "write", knowledge_base.access_control) + ), + ) + for knowledge_base in result.items + ], + total=result.total, + ) + + +@router.get("/search", response_model=KnowledgeAccessListResponse) +async def search_knowledge_bases( + query: Optional[str] = None, + view_option: Optional[str] = None, + page: Optional[int] = 1, + user=Depends(get_verified_user), +): + page = max(page, 1) + limit = PAGE_ITEM_COUNT + skip = (page - 1) * limit + + filter = {} + if query: + filter["query"] = query + if view_option: + filter["view_option"] = view_option + + if not user.role == "admin" or not BYPASS_ADMIN_ACCESS_CONTROL: + groups = Groups.get_groups_by_member_id(user.id) + if groups: + filter["group_ids"] = [group.id for group in groups] + + filter["user_id"] = user.id + + result = Knowledges.search_knowledge_bases( + user.id, filter=filter, skip=skip, limit=limit + ) + + return KnowledgeAccessListResponse( + items=[ + KnowledgeAccessResponse( + **knowledge_base.model_dump(), + write_access=( + user.id == knowledge_base.user_id + or has_access(user.id, "write", knowledge_base.access_control) + ), + ) + for knowledge_base in result.items + ], + total=result.total, + ) + + +@router.get("/search/files", response_model=KnowledgeFileListResponse) +async def search_knowledge_files( + query: Optional[str] = None, + page: Optional[int] = 1, + user=Depends(get_verified_user), +): + page = max(page, 1) + limit = PAGE_ITEM_COUNT + skip = (page - 1) * limit + + filter = {} + if query: + filter["query"] = query + + groups = Groups.get_groups_by_member_id(user.id) + if groups: + filter["group_ids"] = [group.id for group in groups] + + filter["user_id"] = user.id + + return Knowledges.search_knowledge_files(filter=filter, skip=skip, limit=limit) ############################ @@ -185,7 +261,8 @@ async def reindex_knowledge_files(request: Request, user=Depends(get_verified_us class KnowledgeFilesResponse(KnowledgeResponse): - files: list[FileMetadataResponse] + files: Optional[list[FileMetadataResponse]] = None + write_access: Optional[bool] = False @router.get("/{id}", response_model=Optional[KnowledgeFilesResponse]) @@ -201,7 +278,10 @@ async def get_knowledge_by_id(id: str, user=Depends(get_verified_user)): return KnowledgeFilesResponse( **knowledge.model_dump(), - files=Knowledges.get_file_metadatas_by_id(knowledge.id), + write_access=( + user.id == knowledge.user_id + or has_access(user.id, "write", knowledge.access_control) + ), ) else: raise HTTPException( @@ -264,6 +344,59 @@ async def update_knowledge_by_id( ) +############################ +# GetKnowledgeFilesById +############################ + + +@router.get("/{id}/files", response_model=KnowledgeFileListResponse) +async def get_knowledge_files_by_id( + id: str, + query: Optional[str] = None, + view_option: Optional[str] = None, + order_by: Optional[str] = None, + direction: Optional[str] = None, + page: Optional[int] = 1, + user=Depends(get_verified_user), +): + + knowledge = Knowledges.get_knowledge_by_id(id=id) + if not knowledge: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + if not ( + user.role == "admin" + or knowledge.user_id == user.id + or has_access(user.id, "read", knowledge.access_control) + ): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, + ) + + page = max(page, 1) + + limit = 30 + skip = (page - 1) * limit + + filter = {} + if query: + filter["query"] = query + if view_option: + filter["view_option"] = view_option + if order_by: + filter["order_by"] = order_by + if direction: + filter["direction"] = direction + + return Knowledges.search_files_by_id( + id, user.id, filter=filter, skip=skip, limit=limit + ) + + ############################ # AddFileToKnowledge ############################ @@ -309,11 +442,6 @@ def add_file_to_knowledge_by_id( detail=ERROR_MESSAGES.FILE_NOT_PROCESSED, ) - # Add file to knowledge base - Knowledges.add_file_to_knowledge_by_id( - knowledge_id=id, file_id=form_data.file_id, user_id=user.id - ) - # Add content to the vector database try: process_file( @@ -321,6 +449,11 @@ def add_file_to_knowledge_by_id( ProcessFileForm(file_id=form_data.file_id, collection_name=id), user=user, ) + + # Add file to knowledge base + Knowledges.add_file_to_knowledge_by_id( + knowledge_id=id, file_id=form_data.file_id, user_id=user.id + ) except Exception as e: log.debug(e) raise HTTPException( diff --git a/backend/open_webui/routers/notes.py b/backend/open_webui/routers/notes.py index 3858c4670f..74914ae5c6 100644 --- a/backend/open_webui/routers/notes.py +++ b/backend/open_webui/routers/notes.py @@ -8,11 +8,21 @@ from pydantic import BaseModel from open_webui.socket.main import sio - +from open_webui.models.groups import Groups from open_webui.models.users import Users, UserResponse -from open_webui.models.notes import Notes, NoteModel, NoteForm, NoteUserResponse +from open_webui.models.notes import ( + NoteListResponse, + Notes, + NoteModel, + NoteForm, + NoteUserResponse, +) -from open_webui.config import ENABLE_ADMIN_CHAT_ACCESS, ENABLE_ADMIN_EXPORT +from open_webui.config import ( + BYPASS_ADMIN_ACCESS_CONTROL, + ENABLE_ADMIN_CHAT_ACCESS, + ENABLE_ADMIN_EXPORT, +) from open_webui.constants import ERROR_MESSAGES from open_webui.env import SRC_LOG_LEVELS @@ -30,39 +40,17 @@ router = APIRouter() ############################ -@router.get("/", response_model=list[NoteUserResponse]) -async def get_notes(request: Request, user=Depends(get_verified_user)): - - if user.role != "admin" and not has_permission( - user.id, "features.notes", request.app.state.config.USER_PERMISSIONS - ): - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.UNAUTHORIZED, - ) - - notes = [ - NoteUserResponse( - **{ - **note.model_dump(), - "user": UserResponse(**Users.get_user_by_id(note.user_id).model_dump()), - } - ) - for note in Notes.get_notes_by_permission(user.id, "write") - ] - - return notes - - -class NoteTitleIdResponse(BaseModel): +class NoteItemResponse(BaseModel): id: str title: str + data: Optional[dict] updated_at: int created_at: int + user: Optional[UserResponse] = None -@router.get("/list", response_model=list[NoteTitleIdResponse]) -async def get_note_list( +@router.get("/", response_model=list[NoteItemResponse]) +async def get_notes( request: Request, page: Optional[int] = None, user=Depends(get_verified_user) ): if user.role != "admin" and not has_permission( @@ -80,15 +68,64 @@ async def get_note_list( skip = (page - 1) * limit notes = [ - NoteTitleIdResponse(**note.model_dump()) - for note in Notes.get_notes_by_permission( - user.id, "write", skip=skip, limit=limit + NoteUserResponse( + **{ + **note.model_dump(), + "user": UserResponse(**Users.get_user_by_id(note.user_id).model_dump()), + } ) + for note in Notes.get_notes_by_user_id(user.id, "read", skip=skip, limit=limit) ] - return notes +@router.get("/search", response_model=NoteListResponse) +async def search_notes( + request: Request, + query: Optional[str] = None, + view_option: Optional[str] = None, + permission: Optional[str] = None, + order_by: Optional[str] = None, + direction: Optional[str] = None, + page: Optional[int] = 1, + user=Depends(get_verified_user), +): + if user.role != "admin" and not has_permission( + user.id, "features.notes", request.app.state.config.USER_PERMISSIONS + ): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.UNAUTHORIZED, + ) + + limit = None + skip = None + if page is not None: + limit = 60 + skip = (page - 1) * limit + + filter = {} + if query: + filter["query"] = query + if view_option: + filter["view_option"] = view_option + if permission: + filter["permission"] = permission + if order_by: + filter["order_by"] = order_by + if direction: + filter["direction"] = direction + + if not user.role == "admin" or not BYPASS_ADMIN_ACCESS_CONTROL: + groups = Groups.get_groups_by_member_id(user.id) + if groups: + filter["group_ids"] = [group.id for group in groups] + + filter["user_id"] = user.id + + return Notes.search_notes(user.id, filter, skip=skip, limit=limit) + + ############################ # CreateNewNote ############################ @@ -98,7 +135,6 @@ async def get_note_list( async def create_new_note( request: Request, form_data: NoteForm, user=Depends(get_verified_user) ): - if user.role != "admin" and not has_permission( user.id, "features.notes", request.app.state.config.USER_PERMISSIONS ): @@ -122,7 +158,11 @@ async def create_new_note( ############################ -@router.get("/{id}", response_model=Optional[NoteModel]) +class NoteResponse(NoteModel): + write_access: bool = False + + +@router.get("/{id}", response_model=Optional[NoteResponse]) async def get_note_by_id(request: Request, id: str, user=Depends(get_verified_user)): if user.role != "admin" and not has_permission( user.id, "features.notes", request.app.state.config.USER_PERMISSIONS @@ -146,7 +186,15 @@ async def get_note_by_id(request: Request, id: str, user=Depends(get_verified_us status_code=status.HTTP_403_FORBIDDEN, detail=ERROR_MESSAGES.DEFAULT() ) - return note + write_access = ( + user.role == "admin" + or (user.id == note.user_id) + or has_access( + user.id, type="write", access_control=note.access_control, strict=False + ) + ) + + return NoteResponse(**note.model_dump(), write_access=write_access) ############################ diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index b7ed993895..08ffde1733 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -536,6 +536,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "SOUGOU_API_SID": request.app.state.config.SOUGOU_API_SID, "SOUGOU_API_SK": request.app.state.config.SOUGOU_API_SK, "WEB_LOADER_ENGINE": request.app.state.config.WEB_LOADER_ENGINE, + "WEB_LOADER_TIMEOUT": request.app.state.config.WEB_LOADER_TIMEOUT, "ENABLE_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, "PLAYWRIGHT_WS_URL": request.app.state.config.PLAYWRIGHT_WS_URL, "PLAYWRIGHT_TIMEOUT": request.app.state.config.PLAYWRIGHT_TIMEOUT, @@ -594,6 +595,7 @@ class WebConfig(BaseModel): SOUGOU_API_SID: Optional[str] = None SOUGOU_API_SK: Optional[str] = None WEB_LOADER_ENGINE: Optional[str] = None + WEB_LOADER_TIMEOUT: Optional[str] = None ENABLE_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None PLAYWRIGHT_WS_URL: Optional[str] = None PLAYWRIGHT_TIMEOUT: Optional[int] = None @@ -1071,6 +1073,8 @@ async def update_rag_config( # Web loader settings request.app.state.config.WEB_LOADER_ENGINE = form_data.web.WEB_LOADER_ENGINE + request.app.state.config.WEB_LOADER_TIMEOUT = form_data.web.WEB_LOADER_TIMEOUT + request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION = ( form_data.web.ENABLE_WEB_LOADER_SSL_VERIFICATION ) @@ -1206,6 +1210,7 @@ async def update_rag_config( "SOUGOU_API_SID": request.app.state.config.SOUGOU_API_SID, "SOUGOU_API_SK": request.app.state.config.SOUGOU_API_SK, "WEB_LOADER_ENGINE": request.app.state.config.WEB_LOADER_ENGINE, + "WEB_LOADER_TIMEOUT": request.app.state.config.WEB_LOADER_TIMEOUT, "ENABLE_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, "PLAYWRIGHT_WS_URL": request.app.state.config.PLAYWRIGHT_WS_URL, "PLAYWRIGHT_TIMEOUT": request.app.state.config.PLAYWRIGHT_TIMEOUT, @@ -1401,6 +1406,7 @@ def save_docs_to_vector_db( if request.app.state.config.RAG_EMBEDDING_ENGINE == "azure_openai" else None ), + enable_async=request.app.state.config.ENABLE_ASYNC_EMBEDDING, ) # Run async embedding in sync context diff --git a/backend/open_webui/routers/users.py b/backend/open_webui/routers/users.py index 3c1bbb72a8..dd2077206a 100644 --- a/backend/open_webui/routers/users.py +++ b/backend/open_webui/routers/users.py @@ -391,6 +391,7 @@ async def update_user_info_by_session_user( class UserActiveResponse(UserStatus): name: str profile_image_url: Optional[str] = None + groups: Optional[list] = [] is_active: bool model_config = ConfigDict(extra="allow") @@ -412,11 +413,12 @@ async def get_user_by_id(user_id: str, user=Depends(get_verified_user)): ) user = Users.get_user_by_id(user_id) - if user: + groups = Groups.get_groups_by_member_id(user_id) return UserActiveResponse( **{ **user.model_dump(), + "groups": [{"id": group.id, "name": group.name} for group in groups], "is_active": Users.is_user_active(user_id), } ) diff --git a/backend/open_webui/utils/db/access_control.py b/backend/open_webui/utils/db/access_control.py new file mode 100644 index 0000000000..d2e6151e5b --- /dev/null +++ b/backend/open_webui/utils/db/access_control.py @@ -0,0 +1,130 @@ +from pydantic import BaseModel, ConfigDict +from sqlalchemy import BigInteger, Boolean, Column, String, Text, JSON +from sqlalchemy.dialects.postgresql import JSONB + + +from sqlalchemy import or_, func, select, and_, text, cast, or_, and_, func + + +def has_permission(db, DocumentModel, query, filter: dict, permission: str = "read"): + group_ids = filter.get("group_ids", []) + user_id = filter.get("user_id") + dialect_name = db.bind.dialect.name + + conditions = [] + + # Handle read_only permission separately + if permission == "read_only": + # For read_only, we want items where: + # 1. User has explicit read permission (via groups or user-level) + # 2. BUT does NOT have write permission + # 3. Public items are NOT considered read_only + + read_conditions = [] + + # Group-level read permission + if group_ids: + group_read_conditions = [] + for gid in group_ids: + if dialect_name == "sqlite": + group_read_conditions.append( + DocumentModel.access_control["read"]["group_ids"].contains( + [gid] + ) + ) + elif dialect_name == "postgresql": + group_read_conditions.append( + cast( + DocumentModel.access_control["read"]["group_ids"], + JSONB, + ).contains([gid]) + ) + + if group_read_conditions: + read_conditions.append(or_(*group_read_conditions)) + + # Combine read conditions + if read_conditions: + has_read = or_(*read_conditions) + else: + # If no read conditions, return empty result + return query.filter(False) + + # Now exclude items where user has write permission + write_exclusions = [] + + # Exclude items owned by user (they have implicit write) + if user_id: + write_exclusions.append(DocumentModel.user_id != user_id) + + # Exclude items where user has explicit write permission via groups + if group_ids: + group_write_conditions = [] + for gid in group_ids: + if dialect_name == "sqlite": + group_write_conditions.append( + DocumentModel.access_control["write"]["group_ids"].contains( + [gid] + ) + ) + elif dialect_name == "postgresql": + group_write_conditions.append( + cast( + DocumentModel.access_control["write"]["group_ids"], + JSONB, + ).contains([gid]) + ) + + if group_write_conditions: + # User should NOT have write permission + write_exclusions.append(~or_(*group_write_conditions)) + + # Exclude public items (items without access_control) + write_exclusions.append(DocumentModel.access_control.isnot(None)) + write_exclusions.append(cast(DocumentModel.access_control, String) != "null") + + # Combine: has read AND does not have write AND not public + if write_exclusions: + query = query.filter(and_(has_read, *write_exclusions)) + else: + query = query.filter(has_read) + + return query + + # Original logic for other permissions (read, write, etc.) + # Public access conditions + if group_ids or user_id: + conditions.extend( + [ + DocumentModel.access_control.is_(None), + cast(DocumentModel.access_control, String) == "null", + ] + ) + + # User-level permission (owner has all permissions) + if user_id: + conditions.append(DocumentModel.user_id == user_id) + + # Group-level permission + if group_ids: + group_conditions = [] + for gid in group_ids: + if dialect_name == "sqlite": + group_conditions.append( + DocumentModel.access_control[permission]["group_ids"].contains( + [gid] + ) + ) + elif dialect_name == "postgresql": + group_conditions.append( + cast( + DocumentModel.access_control[permission]["group_ids"], + JSONB, + ).contains([gid]) + ) + conditions.append(or_(*group_conditions)) + + if conditions: + query = query.filter(or_(*conditions)) + + return query diff --git a/backend/open_webui/utils/files.py b/backend/open_webui/utils/files.py index 4f9564b7d4..cd94a41144 100644 --- a/backend/open_webui/utils/files.py +++ b/backend/open_webui/utils/files.py @@ -10,7 +10,11 @@ from fastapi import ( Request, UploadFile, ) +from typing import Optional +from pathlib import Path +from open_webui.storage.provider import Storage +from open_webui.models.files import Files from open_webui.routers.files import upload_file_handler import mimetypes @@ -113,3 +117,26 @@ def get_file_url_from_base64(request, base64_file_string, metadata, user): elif "data:audio/wav;base64" in base64_file_string: return get_audio_url_from_base64(request, base64_file_string, metadata, user) return None + + +def get_image_base64_from_file_id(id: str) -> Optional[str]: + file = Files.get_file_by_id(id) + if not file: + return None + + try: + file_path = Storage.get_file(file.path) + file_path = Path(file_path) + + # Check if the file already exists in the cache + if file_path.is_file(): + import base64 + + with open(file_path, "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()).decode("utf-8") + content_type, _ = mimetypes.guess_type(file_path.name) + return f"data:{content_type};base64,{encoded_string}" + else: + return None + except Exception as e: + return None diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 140d2bc85d..d397471dd9 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -716,17 +716,18 @@ async def chat_web_search_handler( return form_data -def get_last_images(message_list): +def get_images_from_messages(message_list): images = [] + for message in reversed(message_list): - images_flag = False + + message_images = [] for file in message.get("files", []): if file.get("type") == "image": - images.append(file.get("url")) - images_flag = True + message_images.append(file.get("url")) - if images_flag: - break + if message_images: + images.append(message_images) return images @@ -780,7 +781,16 @@ async def chat_image_generation_handler( user_message = get_last_user_message(message_list) prompt = user_message - input_images = get_last_images(message_list) + message_images = get_images_from_messages(message_list) + + # Limit to first 2 sets of images + # We may want to change this in the future to allow more images + input_images = [] + for idx, images in enumerate(message_images): + if idx >= 2: + break + for image in images: + input_images.append(image) system_message_content = "" diff --git a/backend/open_webui/utils/misc.py b/backend/open_webui/utils/misc.py index 5e3f3c4834..e0e21249f6 100644 --- a/backend/open_webui/utils/misc.py +++ b/backend/open_webui/utils/misc.py @@ -9,6 +9,7 @@ from pathlib import Path from typing import Callable, Optional, Sequence, Union import json import aiohttp +import mimeparse import collections.abc @@ -577,6 +578,37 @@ def throttle(interval: float = 10.0): return decorator +def strict_match_mime_type(supported: list[str] | str, header: str) -> Optional[str]: + """ + Strictly match the mime type with the supported mime types. + + :param supported: The supported mime types. + :param header: The header to match. + :return: The matched mime type or None if no match is found. + """ + + try: + if isinstance(supported, str): + supported = supported.split(",") + + supported = [s for s in supported if s.strip() and "/" in s] + + match = mimeparse.best_match(supported, header) + if not match: + return None + + _, _, match_params = mimeparse.parse_mime_type(match) + _, _, header_params = mimeparse.parse_mime_type(header) + for k, v in match_params.items(): + if header_params.get(k) != v: + return None + + return match + except Exception as e: + log.exception(f"Failed to match mime type {header}: {e}") + return None + + def extract_urls(text: str) -> list[str]: # Regex pattern to match URLs url_pattern = re.compile( @@ -624,14 +656,17 @@ def stream_chunks_handler(stream: aiohttp.StreamReader): yield line else: yield b"data: {}" + yield b"\n" else: # Normal mode: check if line exceeds limit if len(line) > max_buffer_size: skip_mode = True yield b"data: {}" + yield b"\n" log.info(f"Skip mode triggered, line size: {len(line)}") else: yield line + yield b"\n" # Save the last incomplete fragment buffer = lines[-1] @@ -646,5 +681,6 @@ def stream_chunks_handler(stream: aiohttp.StreamReader): # Process remaining buffer data if buffer and not skip_mode: yield buffer + yield b"\n" return yield_safe_stream_chunks() diff --git a/backend/open_webui/utils/oauth.py b/backend/open_webui/utils/oauth.py index 61c98ca744..1ef5268bae 100644 --- a/backend/open_webui/utils/oauth.py +++ b/backend/open_webui/utils/oauth.py @@ -55,6 +55,7 @@ from open_webui.config import ( OAUTH_ALLOWED_DOMAINS, OAUTH_UPDATE_PICTURE_ON_LOGIN, OAUTH_ACCESS_TOKEN_REQUEST_INCLUDE_CLIENT_ID, + OAUTH_AUDIENCE, WEBHOOK_URL, JWT_EXPIRES_IN, AppConfig, @@ -126,6 +127,7 @@ auth_manager_config.OAUTH_ALLOWED_DOMAINS = OAUTH_ALLOWED_DOMAINS auth_manager_config.WEBHOOK_URL = WEBHOOK_URL auth_manager_config.JWT_EXPIRES_IN = JWT_EXPIRES_IN auth_manager_config.OAUTH_UPDATE_PICTURE_ON_LOGIN = OAUTH_UPDATE_PICTURE_ON_LOGIN +auth_manager_config.OAUTH_AUDIENCE = OAUTH_AUDIENCE FERNET = None @@ -1270,7 +1272,12 @@ class OAuthManager: client = self.get_client(provider) if client is None: raise HTTPException(404) - return await client.authorize_redirect(request, redirect_uri) + + kwargs = {} + if (auth_manager_config.OAUTH_AUDIENCE): + kwargs["audience"] = auth_manager_config.OAUTH_AUDIENCE + + return await client.authorize_redirect(request, redirect_uri, **kwargs) async def handle_callback(self, request, provider, response): if provider not in OAUTH_PROVIDERS: diff --git a/backend/open_webui/utils/redis.py b/backend/open_webui/utils/redis.py index cc29ce6683..da6df2a7f9 100644 --- a/backend/open_webui/utils/redis.py +++ b/backend/open_webui/utils/redis.py @@ -7,6 +7,7 @@ import redis from open_webui.env import ( REDIS_CLUSTER, + REDIS_SOCKET_CONNECT_TIMEOUT, REDIS_SENTINEL_HOSTS, REDIS_SENTINEL_MAX_RETRY_COUNT, REDIS_SENTINEL_PORT, @@ -162,6 +163,7 @@ def get_redis_connection( username=redis_config["username"], password=redis_config["password"], decode_responses=decode_responses, + socket_connect_timeout=REDIS_SOCKET_CONNECT_TIMEOUT, ) connection = SentinelRedisProxy( sentinel, @@ -188,6 +190,7 @@ def get_redis_connection( username=redis_config["username"], password=redis_config["password"], decode_responses=decode_responses, + socket_connect_timeout=REDIS_SOCKET_CONNECT_TIMEOUT, ) connection = SentinelRedisProxy( sentinel, diff --git a/backend/requirements-min.txt b/backend/requirements-min.txt index f22ad7f0cf..bcf154279e 100644 --- a/backend/requirements-min.txt +++ b/backend/requirements-min.txt @@ -1,7 +1,7 @@ # Minimal requirements for backend to run # WIP: use this as a reference to build a minimal docker image -fastapi==0.123.0 +fastapi==0.124.0 uvicorn[standard]==0.37.0 pydantic==2.12.5 python-multipart==0.0.20 @@ -16,7 +16,7 @@ PyJWT[crypto]==2.10.1 authlib==1.6.5 requests==2.32.5 -aiohttp==3.12.15 +aiohttp==3.13.2 async-timeout aiocache aiofiles @@ -24,28 +24,28 @@ starlette-compress==1.6.1 httpx[socks,http2,zstd,cli,brotli]==0.28.1 starsessions[redis]==2.2.1 -sqlalchemy==2.0.38 +sqlalchemy==2.0.44 alembic==1.17.2 peewee==3.18.3 peewee-migrate==1.14.3 -pycrdt==0.12.25 +pycrdt==0.12.44 redis -APScheduler==3.10.4 -RestrictedPython==8.0 +APScheduler==3.11.1 +RestrictedPython==8.1 loguru==0.7.3 asgiref==3.11.0 -mcp==1.22.0 +mcp==1.23.1 openai langchain==0.3.27 langchain-community==0.3.29 fake-useragent==2.2.0 -chromadb==1.1.0 -black==25.11.0 +chromadb==1.3.5 +black==25.12.0 pydub chardet==5.2.0 diff --git a/backend/requirements.txt b/backend/requirements.txt index a1a8034959..b337a5f8b5 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,4 +1,4 @@ -fastapi==0.123.0 +fastapi==0.124.0 uvicorn[standard]==0.37.0 pydantic==2.12.5 python-multipart==0.0.20 @@ -13,44 +13,45 @@ PyJWT[crypto]==2.10.1 authlib==1.6.5 requests==2.32.5 -aiohttp==3.12.15 +aiohttp==3.13.2 async-timeout aiocache aiofiles starlette-compress==1.6.1 httpx[socks,http2,zstd,cli,brotli]==0.28.1 starsessions[redis]==2.2.1 +python-mimeparse==2.0.0 -sqlalchemy==2.0.38 +sqlalchemy==2.0.44 alembic==1.17.2 peewee==3.18.3 peewee-migrate==1.14.3 -pycrdt==0.12.25 +pycrdt==0.12.44 redis -APScheduler==3.10.4 -RestrictedPython==8.0 +APScheduler==3.11.1 +RestrictedPython==8.1 loguru==0.7.3 asgiref==3.11.0 # AI libraries tiktoken -mcp==1.22.0 +mcp==1.23.3 openai anthropic -google-genai==1.52.0 +google-genai==1.54.0 google-generativeai==0.8.5 langchain==0.3.27 langchain-community==0.3.29 fake-useragent==2.2.0 -chromadb==1.1.0 -weaviate-client==4.17.0 -opensearch-py==2.8.0 +chromadb==1.3.5 +weaviate-client==4.18.3 +opensearch-py==3.1.0 transformers==4.57.3 sentence-transformers==5.1.2 @@ -60,43 +61,43 @@ einops==0.8.1 ftfy==6.3.1 chardet==5.2.0 -pypdf==6.4.0 -fpdf2==2.8.2 -pymdown-extensions==10.17.2 -docx2txt==0.8 +pypdf==6.4.1 +fpdf2==2.8.5 +pymdown-extensions==10.18 +docx2txt==0.9 python-pptx==1.0.2 unstructured==0.18.21 msoffcrypto-tool==5.4.2 -nltk==3.9.1 +nltk==3.9.2 Markdown==3.10 pypandoc==1.16.2 -pandas==2.2.3 +pandas==2.3.3 openpyxl==3.1.5 pyxlsb==1.0.10 -xlrd==2.0.1 +xlrd==2.0.2 validators==0.35.0 psutil sentencepiece soundfile==0.13.1 -pillow==11.3.0 -opencv-python-headless==4.11.0.86 +pillow==12.0.0 +opencv-python-headless==4.12.0.88 rapidocr-onnxruntime==1.4.4 rank-bm25==0.2.2 -onnxruntime==1.20.1 -faster-whisper==1.1.1 +onnxruntime==1.23.2 +faster-whisper==1.2.1 -black==25.11.0 -youtube-transcript-api==1.2.2 +black==25.12.0 +youtube-transcript-api==1.2.3 pytube==15.0.0 pydub -ddgs==9.9.2 +ddgs==9.9.3 azure-ai-documentintelligence==1.0.2 -azure-identity==1.25.0 -azure-storage-blob==12.24.1 +azure-identity==1.25.1 +azure-storage-blob==12.27.1 azure-search-documents==11.6.0 ## Google Drive @@ -105,26 +106,26 @@ google-auth-httplib2 google-auth-oauthlib googleapis-common-protos==1.72.0 -google-cloud-storage==2.19.0 +google-cloud-storage==3.7.0 ## Databases pymongo -psycopg2-binary==2.9.10 -pgvector==0.4.1 +psycopg2-binary==2.9.11 +pgvector==0.4.2 -PyMySQL==1.1.1 -boto3==1.41.5 +PyMySQL==1.1.2 +boto3==1.42.5 -pymilvus==2.6.4 -qdrant-client==1.14.3 -playwright==1.56.0 # Caution: version must match docker-compose.playwright.yaml -elasticsearch==9.1.0 +pymilvus==2.6.5 +qdrant-client==1.16.1 +playwright==1.57.0 # Caution: version must match docker-compose.playwright.yaml - Update the docker-compose.yaml if necessary +elasticsearch==9.2.0 pinecone==6.0.2 -oracledb==3.2.0 +oracledb==3.4.1 av==14.0.1 # Caution: Set due to FATAL FIPS SELFTEST FAILURE, see discussion https://github.com/open-webui/open-webui/discussions/15720 -colbert-ai==0.2.21 +colbert-ai==0.2.22 ## Tests @@ -136,17 +137,17 @@ pytest-docker~=3.2.5 ldap3==2.9.1 ## Firecrawl -firecrawl-py==4.10.0 +firecrawl-py==4.10.4 ## Trace -opentelemetry-api==1.38.0 -opentelemetry-sdk==1.38.0 -opentelemetry-exporter-otlp==1.38.0 -opentelemetry-instrumentation==0.59b0 -opentelemetry-instrumentation-fastapi==0.59b0 -opentelemetry-instrumentation-sqlalchemy==0.59b0 -opentelemetry-instrumentation-redis==0.59b0 -opentelemetry-instrumentation-requests==0.59b0 -opentelemetry-instrumentation-logging==0.59b0 -opentelemetry-instrumentation-httpx==0.59b0 -opentelemetry-instrumentation-aiohttp-client==0.59b0 +opentelemetry-api==1.39.0 +opentelemetry-sdk==1.39.0 +opentelemetry-exporter-otlp==1.39.0 +opentelemetry-instrumentation==0.60b0 +opentelemetry-instrumentation-fastapi==0.60b0 +opentelemetry-instrumentation-sqlalchemy==0.60b0 +opentelemetry-instrumentation-redis==0.60b0 +opentelemetry-instrumentation-requests==0.60b0 +opentelemetry-instrumentation-logging==0.60b0 +opentelemetry-instrumentation-httpx==0.60b0 +opentelemetry-instrumentation-aiohttp-client==0.60b0 diff --git a/docker-compose.playwright.yaml b/docker-compose.playwright.yaml index fa2b49ff9a..e00a28df58 100644 --- a/docker-compose.playwright.yaml +++ b/docker-compose.playwright.yaml @@ -1,8 +1,8 @@ services: playwright: - image: mcr.microsoft.com/playwright:v1.49.1-noble # Version must match requirements.txt + image: mcr.microsoft.com/playwright:v1.57.0-noble # Version must match requirements.txt container_name: playwright - command: npx -y playwright@1.49.1 run-server --port 3000 --host 0.0.0.0 + command: npx -y playwright@1.57.0 run-server --port 3000 --host 0.0.0.0 open-webui: environment: diff --git a/kubernetes/helm/README.md b/kubernetes/helm/README.md deleted file mode 100644 index 5737007d96..0000000000 --- a/kubernetes/helm/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# Helm Charts -Open WebUI Helm Charts are now hosted in a separate repo, which can be found here: https://github.com/open-webui/helm-charts - -The charts are released at https://helm.openwebui.com. \ No newline at end of file diff --git a/kubernetes/manifest/base/kustomization.yaml b/kubernetes/manifest/base/kustomization.yaml deleted file mode 100644 index 61500f87c5..0000000000 --- a/kubernetes/manifest/base/kustomization.yaml +++ /dev/null @@ -1,8 +0,0 @@ -resources: - - open-webui.yaml - - ollama-service.yaml - - ollama-statefulset.yaml - - webui-deployment.yaml - - webui-service.yaml - - webui-ingress.yaml - - webui-pvc.yaml diff --git a/kubernetes/manifest/base/ollama-service.yaml b/kubernetes/manifest/base/ollama-service.yaml deleted file mode 100644 index 8bab65b59e..0000000000 --- a/kubernetes/manifest/base/ollama-service.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: ollama-service - namespace: open-webui -spec: - selector: - app: ollama - ports: - - protocol: TCP - port: 11434 - targetPort: 11434 \ No newline at end of file diff --git a/kubernetes/manifest/base/ollama-statefulset.yaml b/kubernetes/manifest/base/ollama-statefulset.yaml deleted file mode 100644 index cd1144caf9..0000000000 --- a/kubernetes/manifest/base/ollama-statefulset.yaml +++ /dev/null @@ -1,41 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: ollama - namespace: open-webui -spec: - serviceName: "ollama" - replicas: 1 - selector: - matchLabels: - app: ollama - template: - metadata: - labels: - app: ollama - spec: - containers: - - name: ollama - image: ollama/ollama:latest - ports: - - containerPort: 11434 - resources: - requests: - cpu: "2000m" - memory: "2Gi" - limits: - cpu: "4000m" - memory: "4Gi" - nvidia.com/gpu: "0" - volumeMounts: - - name: ollama-volume - mountPath: /root/.ollama - tty: true - volumeClaimTemplates: - - metadata: - name: ollama-volume - spec: - accessModes: [ "ReadWriteOnce" ] - resources: - requests: - storage: 30Gi \ No newline at end of file diff --git a/kubernetes/manifest/base/open-webui.yaml b/kubernetes/manifest/base/open-webui.yaml deleted file mode 100644 index 9c1a599f32..0000000000 --- a/kubernetes/manifest/base/open-webui.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: open-webui \ No newline at end of file diff --git a/kubernetes/manifest/base/webui-deployment.yaml b/kubernetes/manifest/base/webui-deployment.yaml deleted file mode 100644 index 79a0a9a23c..0000000000 --- a/kubernetes/manifest/base/webui-deployment.yaml +++ /dev/null @@ -1,38 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: open-webui-deployment - namespace: open-webui -spec: - replicas: 1 - selector: - matchLabels: - app: open-webui - template: - metadata: - labels: - app: open-webui - spec: - containers: - - name: open-webui - image: ghcr.io/open-webui/open-webui:main - ports: - - containerPort: 8080 - resources: - requests: - cpu: "500m" - memory: "500Mi" - limits: - cpu: "1000m" - memory: "1Gi" - env: - - name: OLLAMA_BASE_URL - value: "http://ollama-service.open-webui.svc.cluster.local:11434" - tty: true - volumeMounts: - - name: webui-volume - mountPath: /app/backend/data - volumes: - - name: webui-volume - persistentVolumeClaim: - claimName: open-webui-pvc \ No newline at end of file diff --git a/kubernetes/manifest/base/webui-ingress.yaml b/kubernetes/manifest/base/webui-ingress.yaml deleted file mode 100644 index dc0b53ccd4..0000000000 --- a/kubernetes/manifest/base/webui-ingress.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: open-webui-ingress - namespace: open-webui - #annotations: - # Use appropriate annotations for your Ingress controller, e.g., for NGINX: - # nginx.ingress.kubernetes.io/rewrite-target: / -spec: - rules: - - host: open-webui.minikube.local - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: open-webui-service - port: - number: 8080 diff --git a/kubernetes/manifest/base/webui-pvc.yaml b/kubernetes/manifest/base/webui-pvc.yaml deleted file mode 100644 index 97fb761d42..0000000000 --- a/kubernetes/manifest/base/webui-pvc.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - labels: - app: open-webui - name: open-webui-pvc - namespace: open-webui -spec: - accessModes: ["ReadWriteOnce"] - resources: - requests: - storage: 2Gi \ No newline at end of file diff --git a/kubernetes/manifest/base/webui-service.yaml b/kubernetes/manifest/base/webui-service.yaml deleted file mode 100644 index d73845f00a..0000000000 --- a/kubernetes/manifest/base/webui-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: open-webui-service - namespace: open-webui -spec: - type: NodePort # Use LoadBalancer if you're on a cloud that supports it - selector: - app: open-webui - ports: - - protocol: TCP - port: 8080 - targetPort: 8080 - # If using NodePort, you can optionally specify the nodePort: - # nodePort: 30000 \ No newline at end of file diff --git a/kubernetes/manifest/gpu/kustomization.yaml b/kubernetes/manifest/gpu/kustomization.yaml deleted file mode 100644 index c0d39fbfaa..0000000000 --- a/kubernetes/manifest/gpu/kustomization.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -resources: - - ../base - -patches: -- path: ollama-statefulset-gpu.yaml diff --git a/kubernetes/manifest/gpu/ollama-statefulset-gpu.yaml b/kubernetes/manifest/gpu/ollama-statefulset-gpu.yaml deleted file mode 100644 index 3e42443656..0000000000 --- a/kubernetes/manifest/gpu/ollama-statefulset-gpu.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: ollama - namespace: open-webui -spec: - selector: - matchLabels: - app: ollama - serviceName: "ollama" - template: - spec: - containers: - - name: ollama - resources: - limits: - nvidia.com/gpu: "1" diff --git a/pyproject.toml b/pyproject.toml index 10dd3259e4..c78ed1711f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = [ ] license = { file = "LICENSE" } dependencies = [ - "fastapi==0.123.0", + "fastapi==0.124.0", "uvicorn[standard]==0.37.0", "pydantic==2.12.5", "python-multipart==0.0.20", @@ -21,97 +21,98 @@ dependencies = [ "authlib==1.6.5", "requests==2.32.5", - "aiohttp==3.12.15", + "aiohttp==3.13.2", "async-timeout", "aiocache", "aiofiles", "starlette-compress==1.6.1", "httpx[socks,http2,zstd,cli,brotli]==0.28.1", "starsessions[redis]==2.2.1", + "python-mimeparse==2.0.0", - "sqlalchemy==2.0.38", + "sqlalchemy==2.0.44", "alembic==1.17.2", "peewee==3.18.3", "peewee-migrate==1.14.3", - "pycrdt==0.12.25", + "pycrdt==0.12.44", "redis", - "APScheduler==3.10.4", - "RestrictedPython==8.0", + "APScheduler==3.11.1", + "RestrictedPython==8.1", "loguru==0.7.3", "asgiref==3.11.0", "tiktoken", - "mcp==1.22.0", + "mcp==1.23.3", "openai", "anthropic", - "google-genai==1.52.0", + "google-genai==1.54.0", "google-generativeai==0.8.5", "langchain==0.3.27", "langchain-community==0.3.29", "fake-useragent==2.2.0", - "chromadb==1.0.20", - "opensearch-py==2.8.0", - "PyMySQL==1.1.1", - "boto3==1.41.5", + "chromadb==1.3.5", + "opensearch-py==3.1.0", + "PyMySQL==1.1.2", + "boto3==1.42.5", "transformers==4.57.3", "sentence-transformers==5.1.2", "accelerate", - "pyarrow==20.0.0", + "pyarrow==20.0.0", # fix: pin pyarrow version to 20 for rpi compatibility #15897 "einops==0.8.1", "ftfy==6.3.1", "chardet==5.2.0", - "pypdf==6.4.0", - "fpdf2==2.8.2", - "pymdown-extensions==10.17.2", - "docx2txt==0.8", + "pypdf==6.4.1", + "fpdf2==2.8.5", + "pymdown-extensions==10.18", + "docx2txt==0.9", "python-pptx==1.0.2", "unstructured==0.18.21", "msoffcrypto-tool==5.4.2", - "nltk==3.9.1", + "nltk==3.9.2", "Markdown==3.10", "pypandoc==1.16.2", - "pandas==2.2.3", + "pandas==2.3.3", "openpyxl==3.1.5", "pyxlsb==1.0.10", - "xlrd==2.0.1", + "xlrd==2.0.2", "validators==0.35.0", "psutil", "sentencepiece", "soundfile==0.13.1", "azure-ai-documentintelligence==1.0.2", - "pillow==11.3.0", - "opencv-python-headless==4.11.0.86", + "pillow==12.0.0", + "opencv-python-headless==4.12.0.88", "rapidocr-onnxruntime==1.4.4", "rank-bm25==0.2.2", - "onnxruntime==1.20.1", - "faster-whisper==1.1.1", + "onnxruntime==1.23.2", + "faster-whisper==1.2.1", - "black==25.11.0", - "youtube-transcript-api==1.2.2", + "black==25.12.0", + "youtube-transcript-api==1.2.3", "pytube==15.0.0", "pydub", - "ddgs==9.9.2", + "ddgs==9.9.3", "google-api-python-client", "google-auth-httplib2", "google-auth-oauthlib", "googleapis-common-protos==1.72.0", - "google-cloud-storage==2.19.0", + "google-cloud-storage==3.7.0", - "azure-identity==1.25.0", - "azure-storage-blob==12.24.1", + "azure-identity==1.25.1", + "azure-storage-blob==12.27.1", "ldap3==2.9.1", ] @@ -130,8 +131,8 @@ classifiers = [ [project.optional-dependencies] postgres = [ - "psycopg2-binary==2.9.10", - "pgvector==0.4.1", + "psycopg2-binary==2.9.11", + "pgvector==0.4.2", ] all = [ @@ -143,17 +144,18 @@ all = [ "docker~=7.1.0", "pytest~=8.3.2", "pytest-docker~=3.2.5", - "playwright==1.56.0", - "elasticsearch==9.1.0", + "playwright==1.57.0", # Caution: version must match docker-compose.playwright.yaml - Update the docker-compose.yaml if necessary + "elasticsearch==9.2.0", - "qdrant-client==1.14.3", - "weaviate-client==4.17.0", + "qdrant-client==1.16.1", "pymilvus==2.6.4", + "weaviate-client==4.18.3", + "pymilvus==2.6.5", "pinecone==6.0.2", - "oracledb==3.2.0", - "colbert-ai==0.2.21", + "oracledb==3.4.1", + "colbert-ai==0.2.22", - "firecrawl-py==4.10.0", + "firecrawl-py==4.10.4", "azure-search-documents==11.6.0", ] diff --git a/src/app.css b/src/app.css index fc093e5a6a..897dbdc3b7 100644 --- a/src/app.css +++ b/src/app.css @@ -803,3 +803,7 @@ body { position: relative; z-index: 0; } + +#note-content-container .ProseMirror { + padding-bottom: 2rem; /* space for the bottom toolbar */ +} diff --git a/src/lib/apis/channels/index.ts b/src/lib/apis/channels/index.ts index 0731b2ea9f..44817e97ef 100644 --- a/src/lib/apis/channels/index.ts +++ b/src/lib/apis/channels/index.ts @@ -491,6 +491,44 @@ export const getChannelThreadMessages = async ( return res; }; +export const getMessageData = async ( + token: string = '', + channel_id: string, + message_id: string +) => { + let error = null; + + const res = await fetch( + `${WEBUI_API_BASE_URL}/channels/${channel_id}/messages/${message_id}/data`, + { + method: 'GET', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + } + ) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + console.error(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + type MessageForm = { temp_id?: string; reply_to_id?: string; diff --git a/src/lib/apis/files/index.ts b/src/lib/apis/files/index.ts index 8351393e3c..07042c4ade 100644 --- a/src/lib/apis/files/index.ts +++ b/src/lib/apis/files/index.ts @@ -1,16 +1,26 @@ import { WEBUI_API_BASE_URL } from '$lib/constants'; import { splitStream } from '$lib/utils'; -export const uploadFile = async (token: string, file: File, metadata?: object | null) => { +export const uploadFile = async ( + token: string, + file: File, + metadata?: object | null, + process?: boolean | null +) => { const data = new FormData(); data.append('file', file); if (metadata) { data.append('metadata', JSON.stringify(metadata)); } + const searchParams = new URLSearchParams(); + if (process !== undefined && process !== null) { + searchParams.append('process', String(process)); + } + let error = null; - const res = await fetch(`${WEBUI_API_BASE_URL}/files/`, { + const res = await fetch(`${WEBUI_API_BASE_URL}/files/?${searchParams.toString()}`, { method: 'POST', headers: { Accept: 'application/json', diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts index c01c986a2a..9656a232ea 100644 --- a/src/lib/apis/knowledge/index.ts +++ b/src/lib/apis/knowledge/index.ts @@ -38,10 +38,13 @@ export const createNewKnowledge = async ( return res; }; -export const getKnowledgeBases = async (token: string = '') => { +export const getKnowledgeBases = async (token: string = '', page: number | null = null) => { let error = null; - const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/`, { + const searchParams = new URLSearchParams(); + if (page) searchParams.append('page', page.toString()); + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/?${searchParams.toString()}`, { method: 'GET', headers: { Accept: 'application/json', @@ -69,10 +72,20 @@ export const getKnowledgeBases = async (token: string = '') => { return res; }; -export const getKnowledgeBaseList = async (token: string = '') => { +export const searchKnowledgeBases = async ( + token: string = '', + query: string | null = null, + viewOption: string | null = null, + page: number | null = null +) => { let error = null; - const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/list`, { + const searchParams = new URLSearchParams(); + if (query) searchParams.append('query', query); + if (viewOption) searchParams.append('view_option', viewOption); + if (page) searchParams.append('page', page.toString()); + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/search?${searchParams.toString()}`, { method: 'GET', headers: { Accept: 'application/json', @@ -100,6 +113,55 @@ export const getKnowledgeBaseList = async (token: string = '') => { return res; }; +export const searchKnowledgeFiles = async ( + token: string, + query?: string | null = null, + viewOption?: string | null = null, + orderBy?: string | null = null, + direction?: string | null = null, + page: number = 1 +) => { + let error = null; + + const searchParams = new URLSearchParams(); + if (query) searchParams.append('query', query); + if (viewOption) searchParams.append('view_option', viewOption); + if (orderBy) searchParams.append('order_by', orderBy); + if (direction) searchParams.append('direction', direction); + searchParams.append('page', page.toString()); + + const res = await fetch( + `${WEBUI_API_BASE_URL}/knowledge/search/files?${searchParams.toString()}`, + { + method: 'GET', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + } + ) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.error(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const getKnowledgeById = async (token: string, id: string) => { let error = null; @@ -132,6 +194,56 @@ export const getKnowledgeById = async (token: string, id: string) => { return res; }; +export const searchKnowledgeFilesById = async ( + token: string, + id: string, + query?: string | null = null, + viewOption?: string | null = null, + orderBy?: string | null = null, + direction?: string | null = null, + page: number = 1 +) => { + let error = null; + + const searchParams = new URLSearchParams(); + if (query) searchParams.append('query', query); + if (viewOption) searchParams.append('view_option', viewOption); + if (orderBy) searchParams.append('order_by', orderBy); + if (direction) searchParams.append('direction', direction); + searchParams.append('page', page.toString()); + + const res = await fetch( + `${WEBUI_API_BASE_URL}/knowledge/${id}/files?${searchParams.toString()}`, + { + method: 'GET', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + } + ) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.error(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + type KnowledgeUpdateForm = { name?: string; description?: string; diff --git a/src/lib/apis/notes/index.ts b/src/lib/apis/notes/index.ts index 61794f6766..55f9427e0d 100644 --- a/src/lib/apis/notes/index.ts +++ b/src/lib/apis/notes/index.ts @@ -91,6 +91,65 @@ export const getNotes = async (token: string = '', raw: boolean = false) => { return grouped; }; +export const searchNotes = async ( + token: string = '', + query: string | null = null, + viewOption: string | null = null, + permission: string | null = null, + sortKey: string | null = null, + page: number | null = null +) => { + let error = null; + const searchParams = new URLSearchParams(); + + if (query !== null) { + searchParams.append('query', query); + } + + if (viewOption !== null) { + searchParams.append('view_option', viewOption); + } + + if (permission !== null) { + searchParams.append('permission', permission); + } + + if (sortKey !== null) { + searchParams.append('order_by', sortKey); + } + + if (page !== null) { + searchParams.append('page', `${page}`); + } + + const res = await fetch(`${WEBUI_API_BASE_URL}/notes/search?${searchParams.toString()}`, { + method: 'GET', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + console.error(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const getNoteList = async (token: string = '', page: number | null = null) => { let error = null; const searchParams = new URLSearchParams(); @@ -99,7 +158,7 @@ export const getNoteList = async (token: string = '', page: number | null = null searchParams.append('page', `${page}`); } - const res = await fetch(`${WEBUI_API_BASE_URL}/notes/list?${searchParams.toString()}`, { + const res = await fetch(`${WEBUI_API_BASE_URL}/notes/?${searchParams.toString()}`, { method: 'GET', headers: { Accept: 'application/json', diff --git a/src/lib/components/admin/Settings/Pipelines.svelte b/src/lib/components/admin/Settings/Pipelines.svelte index 18446da7dd..81ecfe2218 100644 --- a/src/lib/components/admin/Settings/Pipelines.svelte +++ b/src/lib/components/admin/Settings/Pipelines.svelte @@ -47,7 +47,7 @@ if (pipeline && (pipeline?.valves ?? false)) { for (const property in valves_spec.properties) { if (valves_spec.properties[property]?.type === 'array') { - valves[property] = valves[property].split(',').map((v) => v.trim()); + valves[property] = (valves[property] ?? '').split(',').map((v) => v.trim()); } } diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index 17191ac216..f26a9f85b8 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -767,6 +767,19 @@ {#if webConfig.WEB_LOADER_ENGINE === '' || webConfig.WEB_LOADER_ENGINE === 'safe_web'} +