From 4af7cc818e45c1a1f19947b5018e8a1f0d973d29 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sat, 22 Nov 2025 20:34:49 -0500 Subject: [PATCH] refac/fix: chat search null byte filter --- backend/open_webui/models/auths.py | 2 +- backend/open_webui/models/channels.py | 2 +- backend/open_webui/models/chats.py | 33 +++++++++++++++----------- backend/open_webui/models/feedbacks.py | 2 +- backend/open_webui/models/files.py | 2 +- backend/open_webui/models/folders.py | 2 +- backend/open_webui/models/functions.py | 2 +- backend/open_webui/models/memories.py | 2 +- backend/open_webui/models/messages.py | 2 +- 9 files changed, 27 insertions(+), 22 deletions(-) diff --git a/backend/open_webui/models/auths.py b/backend/open_webui/models/auths.py index 48bdc1ed97..39ff1cc7fb 100644 --- a/backend/open_webui/models/auths.py +++ b/backend/open_webui/models/auths.py @@ -19,7 +19,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"]) class Auth(Base): __tablename__ = "auth" - id = Column(String, primary_key=True) + id = Column(String, primary_key=True, unique=True) email = Column(String) password = Column(Text) active = Column(Boolean) diff --git a/backend/open_webui/models/channels.py b/backend/open_webui/models/channels.py index e75266be78..2a14e7a2d5 100644 --- a/backend/open_webui/models/channels.py +++ b/backend/open_webui/models/channels.py @@ -19,7 +19,7 @@ from sqlalchemy.sql import exists class Channel(Base): __tablename__ = "channel" - id = Column(Text, primary_key=True) + id = Column(Text, primary_key=True, unique=True) user_id = Column(Text) type = Column(Text, nullable=True) diff --git a/backend/open_webui/models/chats.py b/backend/open_webui/models/chats.py index 3411f50398..9c7b5595ea 100644 --- a/backend/open_webui/models/chats.py +++ b/backend/open_webui/models/chats.py @@ -26,7 +26,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"]) class Chat(Base): __tablename__ = "chat" - id = Column(String, primary_key=True) + id = Column(String, primary_key=True, unique=True) user_id = Column(String) title = Column(Text) chat = Column(JSON) @@ -794,25 +794,30 @@ class ChatTable: elif dialect_name == "postgresql": # PostgreSQL doesn't allow null bytes in text. We filter those out by checking # the JSON representation for \u0000 before attempting text extraction + + # Safety filter: JSON field must not contain \u0000 + query = query.filter(text("Chat.chat::text NOT LIKE '%\\\\u0000%'")) + + # Safety filter: title must not contain actual null bytes + query = query.filter(text("Chat.title::text NOT LIKE '%\\x00%'")) + postgres_content_sql = """ - EXISTS ( - SELECT 1 - FROM json_array_elements(Chat.chat->'messages') AS message - WHERE message->>'content' IS NOT NULL - AND LOWER(replace(message->>'content', E'\\x00', '')) LIKE '%' || :content_key || '%' - ) - """ - postgres_content_clause = text(postgres_content_sql) - # Also filter out chats with null bytes in title - query = query.filter( - text("replace(Chat.title, E'\\x00', '') ILIKE :title_key") + EXISTS ( + SELECT 1 + FROM json_array_elements(Chat.chat->'messages') AS message + WHERE json_typeof(message->'content') = 'string' + AND LOWER(message->>'content') LIKE '%' || :content_key || '%' ) + """ + + postgres_content_clause = text(postgres_content_sql) + query = query.filter( or_( Chat.title.ilike(bindparam("title_key")), postgres_content_clause, - ).params(title_key=f"%{search_text}%", content_key=search_text) - ) + ) + ).params(title_key=f"%{search_text}%", content_key=search_text.lower()) # Check if there are any tags to filter, it should have all the tags if "none" in tag_ids: diff --git a/backend/open_webui/models/feedbacks.py b/backend/open_webui/models/feedbacks.py index 33f7f6179a..5a91804b56 100644 --- a/backend/open_webui/models/feedbacks.py +++ b/backend/open_webui/models/feedbacks.py @@ -21,7 +21,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"]) class Feedback(Base): __tablename__ = "feedback" - id = Column(Text, primary_key=True) + id = Column(Text, primary_key=True, unique=True) user_id = Column(Text) version = Column(BigInteger, default=0) type = Column(Text) diff --git a/backend/open_webui/models/files.py b/backend/open_webui/models/files.py index e86000cfc8..1ed743df87 100644 --- a/backend/open_webui/models/files.py +++ b/backend/open_webui/models/files.py @@ -17,7 +17,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"]) class File(Base): __tablename__ = "file" - id = Column(String, primary_key=True) + id = Column(String, primary_key=True, unique=True) user_id = Column(String) hash = Column(Text, nullable=True) diff --git a/backend/open_webui/models/folders.py b/backend/open_webui/models/folders.py index 45f8247080..6e1735ecea 100644 --- a/backend/open_webui/models/folders.py +++ b/backend/open_webui/models/folders.py @@ -23,7 +23,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"]) class Folder(Base): __tablename__ = "folder" - id = Column(Text, primary_key=True) + id = Column(Text, primary_key=True, unique=True) parent_id = Column(Text, nullable=True) user_id = Column(Text) name = Column(Text) diff --git a/backend/open_webui/models/functions.py b/backend/open_webui/models/functions.py index 2020a29633..91736f949a 100644 --- a/backend/open_webui/models/functions.py +++ b/backend/open_webui/models/functions.py @@ -19,7 +19,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"]) class Function(Base): __tablename__ = "function" - id = Column(String, primary_key=True) + id = Column(String, primary_key=True, unique=True) user_id = Column(String) name = Column(Text) type = Column(Text) diff --git a/backend/open_webui/models/memories.py b/backend/open_webui/models/memories.py index 253371c680..f5f2492b99 100644 --- a/backend/open_webui/models/memories.py +++ b/backend/open_webui/models/memories.py @@ -14,7 +14,7 @@ from sqlalchemy import BigInteger, Column, String, Text class Memory(Base): __tablename__ = "memory" - id = Column(String, primary_key=True) + id = Column(String, primary_key=True, unique=True) user_id = Column(String) content = Column(Text) updated_at = Column(BigInteger) diff --git a/backend/open_webui/models/messages.py b/backend/open_webui/models/messages.py index 8b0027b8e7..6aaf09ca46 100644 --- a/backend/open_webui/models/messages.py +++ b/backend/open_webui/models/messages.py @@ -20,7 +20,7 @@ from sqlalchemy.sql import exists class MessageReaction(Base): __tablename__ = "message_reaction" - id = Column(Text, primary_key=True) + id = Column(Text, primary_key=True, unique=True) user_id = Column(Text) message_id = Column(Text) name = Column(Text)