refac/fix: chat search null byte filter

This commit is contained in:
Timothy Jaeryang Baek 2025-11-22 20:34:49 -05:00
parent 19ffa9fc19
commit 4af7cc818e
9 changed files with 27 additions and 22 deletions

View file

@ -19,7 +19,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
class Auth(Base):
__tablename__ = "auth"
id = Column(String, primary_key=True)
id = Column(String, primary_key=True, unique=True)
email = Column(String)
password = Column(Text)
active = Column(Boolean)

View file

@ -19,7 +19,7 @@ from sqlalchemy.sql import exists
class Channel(Base):
__tablename__ = "channel"
id = Column(Text, primary_key=True)
id = Column(Text, primary_key=True, unique=True)
user_id = Column(Text)
type = Column(Text, nullable=True)

View file

@ -26,7 +26,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
class Chat(Base):
__tablename__ = "chat"
id = Column(String, primary_key=True)
id = Column(String, primary_key=True, unique=True)
user_id = Column(String)
title = Column(Text)
chat = Column(JSON)
@ -794,25 +794,30 @@ class ChatTable:
elif dialect_name == "postgresql":
# PostgreSQL doesn't allow null bytes in text. We filter those out by checking
# the JSON representation for \u0000 before attempting text extraction
# Safety filter: JSON field must not contain \u0000
query = query.filter(text("Chat.chat::text NOT LIKE '%\\\\u0000%'"))
# Safety filter: title must not contain actual null bytes
query = query.filter(text("Chat.title::text NOT LIKE '%\\x00%'"))
postgres_content_sql = """
EXISTS (
SELECT 1
FROM json_array_elements(Chat.chat->'messages') AS message
WHERE message->>'content' IS NOT NULL
AND LOWER(replace(message->>'content', E'\\x00', '')) LIKE '%' || :content_key || '%'
)
"""
postgres_content_clause = text(postgres_content_sql)
# Also filter out chats with null bytes in title
query = query.filter(
text("replace(Chat.title, E'\\x00', '') ILIKE :title_key")
EXISTS (
SELECT 1
FROM json_array_elements(Chat.chat->'messages') AS message
WHERE json_typeof(message->'content') = 'string'
AND LOWER(message->>'content') LIKE '%' || :content_key || '%'
)
"""
postgres_content_clause = text(postgres_content_sql)
query = query.filter(
or_(
Chat.title.ilike(bindparam("title_key")),
postgres_content_clause,
).params(title_key=f"%{search_text}%", content_key=search_text)
)
)
).params(title_key=f"%{search_text}%", content_key=search_text.lower())
# Check if there are any tags to filter, it should have all the tags
if "none" in tag_ids:

View file

@ -21,7 +21,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
class Feedback(Base):
__tablename__ = "feedback"
id = Column(Text, primary_key=True)
id = Column(Text, primary_key=True, unique=True)
user_id = Column(Text)
version = Column(BigInteger, default=0)
type = Column(Text)

View file

@ -17,7 +17,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
class File(Base):
__tablename__ = "file"
id = Column(String, primary_key=True)
id = Column(String, primary_key=True, unique=True)
user_id = Column(String)
hash = Column(Text, nullable=True)

View file

@ -23,7 +23,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
class Folder(Base):
__tablename__ = "folder"
id = Column(Text, primary_key=True)
id = Column(Text, primary_key=True, unique=True)
parent_id = Column(Text, nullable=True)
user_id = Column(Text)
name = Column(Text)

View file

@ -19,7 +19,7 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
class Function(Base):
__tablename__ = "function"
id = Column(String, primary_key=True)
id = Column(String, primary_key=True, unique=True)
user_id = Column(String)
name = Column(Text)
type = Column(Text)

View file

@ -14,7 +14,7 @@ from sqlalchemy import BigInteger, Column, String, Text
class Memory(Base):
__tablename__ = "memory"
id = Column(String, primary_key=True)
id = Column(String, primary_key=True, unique=True)
user_id = Column(String)
content = Column(Text)
updated_at = Column(BigInteger)

View file

@ -20,7 +20,7 @@ from sqlalchemy.sql import exists
class MessageReaction(Base):
__tablename__ = "message_reaction"
id = Column(Text, primary_key=True)
id = Column(Text, primary_key=True, unique=True)
user_id = Column(Text)
message_id = Column(Text)
name = Column(Text)