From ae203d8952b26c9d99bba7439dcde81076e3d889 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 21 Dec 2025 16:15:28 +0400 Subject: [PATCH] refac --- backend/open_webui/utils/misc.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/utils/misc.py b/backend/open_webui/utils/misc.py index 5ee0627f34..85f47719b6 100644 --- a/backend/open_webui/utils/misc.py +++ b/backend/open_webui/utils/misc.py @@ -377,12 +377,14 @@ def sanitize_text_for_db(text: str) -> str: """Remove null bytes and invalid UTF-8 surrogates from text for PostgreSQL storage.""" if not isinstance(text, str): return text - # Remove null bytes - PostgreSQL cannot store \x00 in text fields - text = text.replace("\x00", "") + # Remove null bytes + text = text.replace("\x00", "").replace("\u0000", "") # Remove invalid UTF-8 surrogate characters that can cause encoding errors # This handles cases where binary data or encoding issues introduced surrogates try: - text = text.encode("utf-8", errors="surrogatepass").decode("utf-8", errors="ignore") + text = text.encode("utf-8", errors="surrogatepass").decode( + "utf-8", errors="ignore" + ) except (UnicodeEncodeError, UnicodeDecodeError): pass return text