refac/fix: chat search null byte filter

This commit is contained in:
Timothy Jaeryang Baek 2025-11-22 20:34:49 -05:00
parent 19ffa9fc19
commit 4af7cc818e
9 changed files with 27 additions and 22 deletions

View file

@ -19,7 +19,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
class Auth(Base): class Auth(Base):
__tablename__ = "auth" __tablename__ = "auth"
id = Column(String, primary_key=True) id = Column(String, primary_key=True, unique=True)
email = Column(String) email = Column(String)
password = Column(Text) password = Column(Text)
active = Column(Boolean) active = Column(Boolean)

View file

@ -19,7 +19,7 @@ from sqlalchemy.sql import exists
class Channel(Base): class Channel(Base):
__tablename__ = "channel" __tablename__ = "channel"
id = Column(Text, primary_key=True) id = Column(Text, primary_key=True, unique=True)
user_id = Column(Text) user_id = Column(Text)
type = Column(Text, nullable=True) type = Column(Text, nullable=True)

View file

@ -26,7 +26,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
class Chat(Base): class Chat(Base):
__tablename__ = "chat" __tablename__ = "chat"
id = Column(String, primary_key=True) id = Column(String, primary_key=True, unique=True)
user_id = Column(String) user_id = Column(String)
title = Column(Text) title = Column(Text)
chat = Column(JSON) chat = Column(JSON)
@ -794,25 +794,30 @@ class ChatTable:
elif dialect_name == "postgresql": elif dialect_name == "postgresql":
# PostgreSQL doesn't allow null bytes in text. We filter those out by checking # PostgreSQL doesn't allow null bytes in text. We filter those out by checking
# the JSON representation for \u0000 before attempting text extraction # the JSON representation for \u0000 before attempting text extraction
# Safety filter: JSON field must not contain \u0000
query = query.filter(text("Chat.chat::text NOT LIKE '%\\\\u0000%'"))
# Safety filter: title must not contain actual null bytes
query = query.filter(text("Chat.title::text NOT LIKE '%\\x00%'"))
postgres_content_sql = """ postgres_content_sql = """
EXISTS ( EXISTS (
SELECT 1 SELECT 1
FROM json_array_elements(Chat.chat->'messages') AS message FROM json_array_elements(Chat.chat->'messages') AS message
WHERE message->>'content' IS NOT NULL WHERE json_typeof(message->'content') = 'string'
AND LOWER(replace(message->>'content', E'\\x00', '')) LIKE '%' || :content_key || '%' AND LOWER(message->>'content') LIKE '%' || :content_key || '%'
)
"""
postgres_content_clause = text(postgres_content_sql)
# Also filter out chats with null bytes in title
query = query.filter(
text("replace(Chat.title, E'\\x00', '') ILIKE :title_key")
) )
"""
postgres_content_clause = text(postgres_content_sql)
query = query.filter( query = query.filter(
or_( or_(
Chat.title.ilike(bindparam("title_key")), Chat.title.ilike(bindparam("title_key")),
postgres_content_clause, postgres_content_clause,
).params(title_key=f"%{search_text}%", content_key=search_text) )
) ).params(title_key=f"%{search_text}%", content_key=search_text.lower())
# Check if there are any tags to filter, it should have all the tags # Check if there are any tags to filter, it should have all the tags
if "none" in tag_ids: if "none" in tag_ids:

View file

@ -21,7 +21,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
class Feedback(Base): class Feedback(Base):
__tablename__ = "feedback" __tablename__ = "feedback"
id = Column(Text, primary_key=True) id = Column(Text, primary_key=True, unique=True)
user_id = Column(Text) user_id = Column(Text)
version = Column(BigInteger, default=0) version = Column(BigInteger, default=0)
type = Column(Text) type = Column(Text)

View file

@ -17,7 +17,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
class File(Base): class File(Base):
__tablename__ = "file" __tablename__ = "file"
id = Column(String, primary_key=True) id = Column(String, primary_key=True, unique=True)
user_id = Column(String) user_id = Column(String)
hash = Column(Text, nullable=True) hash = Column(Text, nullable=True)

View file

@ -23,7 +23,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
class Folder(Base): class Folder(Base):
__tablename__ = "folder" __tablename__ = "folder"
id = Column(Text, primary_key=True) id = Column(Text, primary_key=True, unique=True)
parent_id = Column(Text, nullable=True) parent_id = Column(Text, nullable=True)
user_id = Column(Text) user_id = Column(Text)
name = Column(Text) name = Column(Text)

View file

@ -19,7 +19,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
class Function(Base): class Function(Base):
__tablename__ = "function" __tablename__ = "function"
id = Column(String, primary_key=True) id = Column(String, primary_key=True, unique=True)
user_id = Column(String) user_id = Column(String)
name = Column(Text) name = Column(Text)
type = Column(Text) type = Column(Text)

View file

@ -14,7 +14,7 @@ from sqlalchemy import BigInteger, Column, String, Text
class Memory(Base): class Memory(Base):
__tablename__ = "memory" __tablename__ = "memory"
id = Column(String, primary_key=True) id = Column(String, primary_key=True, unique=True)
user_id = Column(String) user_id = Column(String)
content = Column(Text) content = Column(Text)
updated_at = Column(BigInteger) updated_at = Column(BigInteger)

View file

@ -20,7 +20,7 @@ from sqlalchemy.sql import exists
class MessageReaction(Base): class MessageReaction(Base):
__tablename__ = "message_reaction" __tablename__ = "message_reaction"
id = Column(Text, primary_key=True) id = Column(Text, primary_key=True, unique=True)
user_id = Column(Text) user_id = Column(Text)
message_id = Column(Text) message_id = Column(Text)
name = Column(Text) name = Column(Text)