diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 4a05df8256..92e611b4f6 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -1401,6 +1401,16 @@ async def chat_completion( request.state.direct = True request.state.model = model + # Chat Params + stream_delta_chunk_size = form_data.get("params", {}).get( + "stream_delta_chunk_size" + ) + # Model Params + if model_info and model_info.params: + stream_delta_chunk_size = model_info.params.model_dump().get( + "stream_delta_chunk_size" + ) + metadata = { "user_id": user.id, "chat_id": form_data.pop("chat_id", None), @@ -1414,16 +1424,21 @@ async def chat_completion( "variables": form_data.get("variables", {}), "model": model, "direct": model_item.get("direct", False), - **( - {"function_calling": "native"} - if form_data.get("params", {}).get("function_calling") == "native" - or ( - model_info - and model_info.params.model_dump().get("function_calling") - == "native" - ) - else {} - ), + "params": { + "stream_delta_chunk_size": stream_delta_chunk_size, + "function_calling": ( + "native" + if ( + form_data.get("params", {}).get("function_calling") == "native" + or ( + model_info + and model_info.params.model_dump().get("function_calling") + == "native" + ) + ) + else "default" + ), + }, } if metadata.get("chat_id") and (user and user.role != "admin"): diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 3ccef067c0..7ef911ea42 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -684,6 +684,7 @@ def apply_params_to_form_data(form_data, model): open_webui_params = { "stream_response": bool, + "stream_delta_chunk_size": int, "function_calling": str, "system": str, } @@ -930,7 +931,7 @@ async def process_chat_payload(request, form_data, user, metadata, model): } if tools_dict: - if metadata.get("function_calling") == "native": + if metadata.get("params", {}).get("function_calling") == "native": # If the function calling is native, then call the tools function calling handler metadata["tools"] = tools_dict form_data["tools"] = [ @@ -1816,6 +1817,15 @@ async def process_chat_response( response_tool_calls = [] + delta_count = 0 + delta_chunk_size = max( + 1, + int( + metadata.get("params", {}).get("stream_delta_chunk_size") + or 1 + ), + ) + async for line in response.body_iterator: line = line.decode("utf-8") if isinstance(line, bytes) else line data = line @@ -2063,12 +2073,23 @@ async def process_chat_response( ), } - await event_emitter( - { - "type": "chat:completion", - "data": data, - } - ) + if delta: + delta_count += 1 + if delta_count >= delta_chunk_size: + await event_emitter( + { + "type": "chat:completion", + "data": data, + } + ) + delta_count = 0 + else: + await event_emitter( + { + "type": "chat:completion", + "data": data, + } + ) except Exception as e: done = "data: [DONE]" in line if done: diff --git a/backend/open_webui/utils/payload.py b/backend/open_webui/utils/payload.py index 9b7f748359..316e61c34c 100644 --- a/backend/open_webui/utils/payload.py +++ b/backend/open_webui/utils/payload.py @@ -69,6 +69,7 @@ def remove_open_webui_params(params: dict) -> dict: """ open_webui_params = { "stream_response": bool, + "stream_delta_chunk_size": int, "function_calling": str, "system": str, } diff --git a/src/lib/components/chat/Settings/Advanced/AdvancedParams.svelte b/src/lib/components/chat/Settings/Advanced/AdvancedParams.svelte index 5eff2c8332..aa0a8aaa29 100644 --- a/src/lib/components/chat/Settings/Advanced/AdvancedParams.svelte +++ b/src/lib/components/chat/Settings/Advanced/AdvancedParams.svelte @@ -15,6 +15,7 @@ const defaultParams = { // Advanced stream_response: null, // Set stream responses for this model individually + stream_delta_chunk_size: null, // Set the chunk size for streaming responses function_calling: null, seed: null, stop: null, @@ -88,6 +89,63 @@ + {#if admin} +