Merge pull request #18884 from ShirasawaSama/feature/handle-large-stream-chunks

feat: handle large stream chunks responses to support Nano Banana [Test Needed]
This commit is contained in:
Tim Baek 2025-11-09 21:08:49 -05:00 committed by GitHub
commit 27df461abd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 82 additions and 2 deletions

View file

@ -569,6 +569,19 @@ else:
CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES = 30 CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES = 30
CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = os.environ.get(
"CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE", "10485760" # 10MB
)
if CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE == "":
CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = 1024 * 1024 * 10
else:
try:
CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = int(CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE)
except Exception:
CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE = 1024 * 1024 * 10
#################################### ####################################
# WEBSOCKET SUPPORT # WEBSOCKET SUPPORT
#################################### ####################################

View file

@ -45,6 +45,7 @@ from open_webui.utils.payload import (
) )
from open_webui.utils.misc import ( from open_webui.utils.misc import (
convert_logit_bias_input_to_json, convert_logit_bias_input_to_json,
handle_large_stream_chunks,
) )
from open_webui.utils.auth import get_admin_user, get_verified_user from open_webui.utils.auth import get_admin_user, get_verified_user
@ -952,7 +953,7 @@ async def generate_chat_completion(
if "text/event-stream" in r.headers.get("Content-Type", ""): if "text/event-stream" in r.headers.get("Content-Type", ""):
streaming = True streaming = True
return StreamingResponse( return StreamingResponse(
r.content, handle_large_stream_chunks(r.content),
status_code=r.status, status_code=r.status,
headers=dict(r.headers), headers=dict(r.headers),
background=BackgroundTask( background=BackgroundTask(

View file

@ -8,10 +8,11 @@ from datetime import timedelta
from pathlib import Path from pathlib import Path
from typing import Callable, Optional from typing import Callable, Optional
import json import json
import aiohttp
import collections.abc import collections.abc
from open_webui.env import SRC_LOG_LEVELS from open_webui.env import SRC_LOG_LEVELS, CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["MAIN"]) log.setLevel(SRC_LOG_LEVELS["MAIN"])
@ -539,3 +540,68 @@ def extract_urls(text: str) -> list[str]:
r"(https?://[^\s]+)", re.IGNORECASE r"(https?://[^\s]+)", re.IGNORECASE
) # Matches http and https URLs ) # Matches http and https URLs
return url_pattern.findall(text) return url_pattern.findall(text)
def handle_large_stream_chunks(stream: aiohttp.StreamReader, max_buffer_size: int = CHAT_STREAM_RESPONSE_CHUNK_MAX_BUFFER_SIZE):
"""
Handle stream response chunks, supporting large data chunks that exceed the original 16kb limit.
When a single line exceeds max_buffer_size, returns an empty JSON string {} and skips subsequent data
until encountering normally sized data.
:param stream: The stream reader to handle.
:param max_buffer_size: The maximum buffer size in bytes, -1 means not handle large chunks, default is 10MB.
:return: An async generator that yields the stream data.
"""
if max_buffer_size <= 0:
return stream
async def handle_stream_chunks():
buffer = b""
skip_mode = False
async for data, _ in stream.iter_chunks():
if not data:
continue
# In skip_mode, if buffer already exceeds the limit, clear it (it's part of an oversized line)
if skip_mode and len(buffer) > max_buffer_size:
buffer = b""
lines = (buffer + data).split(b"\n")
# Process complete lines (except the last possibly incomplete fragment)
for i in range(len(lines) - 1):
line = lines[i]
if skip_mode:
# Skip mode: check if current line is small enough to exit skip mode
if len(line) <= max_buffer_size:
skip_mode = False
yield line
else:
yield b"data: {}"
else:
# Normal mode: check if line exceeds limit
if len(line) > max_buffer_size:
skip_mode = True
yield b"data: {}"
log.info(f"Skip mode triggered, line size: {len(line)}")
else:
yield line
# Save the last incomplete fragment
buffer = lines[-1]
# Check if buffer exceeds limit
if not skip_mode and len(buffer) > max_buffer_size:
skip_mode = True
log.info(f"Skip mode triggered, buffer size: {len(buffer)}")
# Clear oversized buffer to prevent unlimited growth
buffer = b""
# Process remaining buffer data
if buffer and not skip_mode:
yield buffer
return handle_stream_chunks()