feat: stream delta chunk

Co-Authored-By: Jan Kessler <Ithanil@users.noreply.github.com>
This commit is contained in:
Timothy Jaeryang Baek 2025-08-09 23:43:27 +04:00
parent e2b2e8b245
commit 1a93891d97
5 changed files with 115 additions and 17 deletions

View file

@ -1401,6 +1401,16 @@ async def chat_completion(
request.state.direct = True
request.state.model = model
# Chat Params
stream_delta_chunk_size = form_data.get("params", {}).get(
"stream_delta_chunk_size"
)
# Model Params
if model_info and model_info.params:
stream_delta_chunk_size = model_info.params.model_dump().get(
"stream_delta_chunk_size"
)
metadata = {
"user_id": user.id,
"chat_id": form_data.pop("chat_id", None),
@ -1414,16 +1424,21 @@ async def chat_completion(
"variables": form_data.get("variables", {}),
"model": model,
"direct": model_item.get("direct", False),
**(
{"function_calling": "native"}
if form_data.get("params", {}).get("function_calling") == "native"
or (
model_info
and model_info.params.model_dump().get("function_calling")
== "native"
)
else {}
),
"params": {
"stream_delta_chunk_size": stream_delta_chunk_size,
"function_calling": (
"native"
if (
form_data.get("params", {}).get("function_calling") == "native"
or (
model_info
and model_info.params.model_dump().get("function_calling")
== "native"
)
)
else "default"
),
},
}
if metadata.get("chat_id") and (user and user.role != "admin"):

View file

@ -684,6 +684,7 @@ def apply_params_to_form_data(form_data, model):
open_webui_params = {
"stream_response": bool,
"stream_delta_chunk_size": int,
"function_calling": str,
"system": str,
}
@ -930,7 +931,7 @@ async def process_chat_payload(request, form_data, user, metadata, model):
}
if tools_dict:
if metadata.get("function_calling") == "native":
if metadata.get("params", {}).get("function_calling") == "native":
# If the function calling is native, then call the tools function calling handler
metadata["tools"] = tools_dict
form_data["tools"] = [
@ -1816,6 +1817,15 @@ async def process_chat_response(
response_tool_calls = []
delta_count = 0
delta_chunk_size = max(
1,
int(
metadata.get("params", {}).get("stream_delta_chunk_size")
or 1
),
)
async for line in response.body_iterator:
line = line.decode("utf-8") if isinstance(line, bytes) else line
data = line
@ -2063,12 +2073,23 @@ async def process_chat_response(
),
}
await event_emitter(
{
"type": "chat:completion",
"data": data,
}
)
if delta:
delta_count += 1
if delta_count >= delta_chunk_size:
await event_emitter(
{
"type": "chat:completion",
"data": data,
}
)
delta_count = 0
else:
await event_emitter(
{
"type": "chat:completion",
"data": data,
}
)
except Exception as e:
done = "data: [DONE]" in line
if done:

View file

@ -69,6 +69,7 @@ def remove_open_webui_params(params: dict) -> dict:
"""
open_webui_params = {
"stream_response": bool,
"stream_delta_chunk_size": int,
"function_calling": str,
"system": str,
}

View file

@ -15,6 +15,7 @@
const defaultParams = {
// Advanced
stream_response: null, // Set stream responses for this model individually
stream_delta_chunk_size: null, // Set the chunk size for streaming responses
function_calling: null,
seed: null,
stop: null,
@ -88,6 +89,63 @@
</Tooltip>
</div>
{#if admin}
<div>
<Tooltip
content={$i18n.t(
'The stream delta chunk size for the model. Increasing the chunk size will make the model respond with larger pieces of text at once.'
)}
placement="top-start"
className="inline-tooltip"
>
<div class="flex w-full justify-between">
<div class=" self-center text-xs font-medium">
{$i18n.t('Stream Delta Chunk Size')}
</div>
<button
class="p-1 px-3 text-xs flex rounded-sm transition shrink-0 outline-hidden"
type="button"
on:click={() => {
params.stream_delta_chunk_size =
(params?.stream_delta_chunk_size ?? null) === null ? 1 : null;
}}
>
{#if (params?.stream_delta_chunk_size ?? null) === null}
<span class="ml-2 self-center"> {$i18n.t('Default')} </span>
{:else}
<span class="ml-2 self-center"> {$i18n.t('Custom')} </span>
{/if}
</button>
</div>
</Tooltip>
{#if (params?.stream_delta_chunk_size ?? null) !== null}
<div class="flex mt-0.5 space-x-2">
<div class=" flex-1">
<input
id="steps-range"
type="range"
min="1"
max="128"
step="1"
bind:value={params.stream_delta_chunk_size}
class="w-full h-2 rounded-lg appearance-none cursor-pointer dark:bg-gray-700"
/>
</div>
<div>
<input
bind:value={params.stream_delta_chunk_size}
type="number"
class=" bg-transparent text-center w-14"
min="1"
step="any"
/>
</div>
</div>
{/if}
</div>
{/if}
<div>
<Tooltip
content={$i18n.t(

View file

@ -42,6 +42,7 @@
let params = {
// Advanced
stream_response: null,
stream_delta_chunk_size: null,
function_calling: null,
seed: null,
temperature: null,
@ -71,6 +72,8 @@
system: system !== '' ? system : undefined,
params: {
stream_response: params.stream_response !== null ? params.stream_response : undefined,
stream_delta_chunk_size:
params.stream_delta_chunk_size !== null ? params.stream_delta_chunk_size : undefined,
function_calling: params.function_calling !== null ? params.function_calling : undefined,
seed: (params.seed !== null ? params.seed : undefined) ?? undefined,
stop: params.stop ? params.stop.split(',').filter((e) => e) : undefined,