From e76f77bcb79dbd471c2c0761e6c16202c3f329ee Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sun, 9 Nov 2025 21:16:34 -0500 Subject: [PATCH] refac: stream chunk max buffer size --- backend/open_webui/env.py | 10 ++++++---- backend/open_webui/routers/openai.py | 4 ++-- backend/open_webui/utils/misc.py | 12 ++++++------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/backend/open_webui/env.py b/backend/open_webui/env.py index 4839396b21..b85316d955 100644 --- a/backend/open_webui/env.py +++ b/backend/open_webui/env.py @@ -570,16 +570,18 @@ else: CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = os.environ.get( - "CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE", "10485760" # 10MB + "CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE", "" ) if CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE == "": - CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = 1024 * 1024 * 10 + CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = None else: try: - CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = int(CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE) + CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = int( + CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE + ) except Exception: - CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = 1024 * 1024 * 10 + CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = None #################################### diff --git a/backend/open_webui/routers/openai.py b/backend/open_webui/routers/openai.py index d3ec158cd9..b0f283c82d 100644 --- a/backend/open_webui/routers/openai.py +++ b/backend/open_webui/routers/openai.py @@ -45,7 +45,7 @@ from open_webui.utils.payload import ( ) from open_webui.utils.misc import ( convert_logit_bias_input_to_json, - handle_large_stream_chunks, + stream_chunks_handler, ) from open_webui.utils.auth import get_admin_user, get_verified_user @@ -953,7 +953,7 @@ async def generate_chat_completion( if "text/event-stream" in r.headers.get("Content-Type", ""): streaming = True return StreamingResponse( - handle_large_stream_chunks(r.content), + stream_chunks_handler(r.content), status_code=r.status, headers=dict(r.headers), background=BackgroundTask( diff --git a/backend/open_webui/utils/misc.py b/backend/open_webui/utils/misc.py index 49465fb3ea..ce16691365 100644 --- a/backend/open_webui/utils/misc.py +++ b/backend/open_webui/utils/misc.py @@ -542,21 +542,21 @@ def extract_urls(text: str) -> list[str]: return url_pattern.findall(text) -def handle_large_stream_chunks(stream: aiohttp.StreamReader, max_buffer_size: int = CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE): +def stream_chunks_handler(stream: aiohttp.StreamReader): """ Handle stream response chunks, supporting large data chunks that exceed the original 16kb limit. When a single line exceeds max_buffer_size, returns an empty JSON string {} and skips subsequent data until encountering normally sized data. :param stream: The stream reader to handle. - :param max_buffer_size: The maximum buffer size in bytes, -1 means not handle large chunks, default is 10MB. :return: An async generator that yields the stream data. """ - if max_buffer_size <= 0: + max_buffer_size = CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE + if max_buffer_size is None or max_buffer_size <= 0: return stream - async def handle_stream_chunks(): + async def yield_safe_stream_chunks(): buffer = b"" skip_mode = False @@ -603,5 +603,5 @@ def handle_large_stream_chunks(stream: aiohttp.StreamReader, max_buffer_size: in # Process remaining buffer data if buffer and not skip_mode: yield buffer - - return handle_stream_chunks() + + return yield_safe_stream_chunks()