open-webui/backend/open_webui/utils/chat.py

449 lines
14 KiB
Python
Raw Permalink Normal View History

2024-12-13 04:22:17 +00:00
import time
import logging
import sys
from aiocache import cached
2024-12-13 06:28:42 +00:00
from typing import Any, Optional
2024-12-13 04:22:17 +00:00
import random
import json
import inspect
2025-02-13 06:56:33 +00:00
import uuid
import asyncio
2024-12-13 04:22:17 +00:00
2025-02-13 07:21:16 +00:00
from fastapi import Request, status
from starlette.responses import Response, StreamingResponse, JSONResponse
2024-12-13 04:22:17 +00:00
2024-12-13 06:28:42 +00:00
from open_webui.models.users import UserModel
2024-12-13 04:22:17 +00:00
from open_webui.socket.main import (
2025-02-13 06:56:33 +00:00
sio,
2024-12-13 04:22:17 +00:00
get_event_call,
get_event_emitter,
)
from open_webui.functions import generate_function_chat_completion
from open_webui.routers.openai import (
generate_chat_completion as generate_openai_chat_completion,
)
from open_webui.routers.ollama import (
generate_chat_completion as generate_ollama_chat_completion,
)
from open_webui.routers.pipelines import (
2024-12-13 06:28:42 +00:00
process_pipeline_inlet_filter,
2024-12-13 04:22:17 +00:00
process_pipeline_outlet_filter,
)
from open_webui.models.functions import Functions
from open_webui.models.models import Models
from open_webui.utils.plugin import (
load_function_module_by_id,
get_function_module_from_cache,
)
2024-12-13 06:28:42 +00:00
from open_webui.utils.models import get_all_models, check_model_access
2024-12-13 04:22:17 +00:00
from open_webui.utils.payload import convert_payload_openai_to_ollama
from open_webui.utils.response import (
convert_response_ollama_to_openai,
convert_streaming_response_ollama_to_openai,
)
from open_webui.utils.filter import (
get_sorted_filter_ids,
process_filter_functions,
)
2024-12-13 06:28:42 +00:00
2024-12-13 04:22:17 +00:00
from open_webui.env import SRC_LOG_LEVELS, GLOBAL_LOG_LEVEL, BYPASS_MODEL_ACCESS_CONTROL
logging.basicConfig(stream=sys.stdout, level=GLOBAL_LOG_LEVEL)
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["MAIN"])
2025-02-13 06:56:33 +00:00
async def generate_direct_chat_completion(
request: Request,
form_data: dict,
user: Any,
models: dict,
):
log.info("generate_direct_chat_completion")
2025-02-13 06:56:33 +00:00
metadata = form_data.pop("metadata", {})
user_id = metadata.get("user_id")
session_id = metadata.get("session_id")
request_id = str(uuid.uuid4()) # Generate a unique request ID
event_caller = get_event_call(metadata)
channel = f"{user_id}:{session_id}:{request_id}"
0.6.33 (#18118) * feat: improve ollama model management experience This commit introduces several improvements to the Ollama model management modal: - Adds a cancel button to the model pulling operation, using the existing 'x' button pattern. - Adds a cancel button to the "Update All" models operation, allowing the user to cancel the update for the currently processing model. - Cleans up toast notifications when updating all models. A single toast is now shown at the beginning and a summary toast at the end, preventing notification spam. - Refactors the `ManageOllama.svelte` component to support these new cancellation features. - Adds tooltips to all buttons in the modal to improve clarity. - Disables buttons when their corresponding input fields are empty to prevent accidental clicks. * fix * i18n: improve Chinese translation * fix: handle non‑UTF8 chars in third‑party responses without error * German translation of new strings in i18n * log web search queries only with level 'debug' instead of 'info' * Tool calls now only include text and dont inlcude other content like image b64 * fix onedrive * fix: discovery url * fix: default permissions not being loaded * fix: ai hallucination * fix: non rich text input copy * refac: rm print statements * refac: disable direct models from model editors * refac/fix: do not process xlsx files with azure doc intelligence * Update pull_request_template.md * Update generated image translation in DE-de * added missing danish translations * feat(onedrive): Enable search and "My Organization" pivot * style(onedrive): Formatting fix * feat: Implement toggling for vertical and horizontal flow layouts This commit introduces the necessary logic and UI controls to allow users to switch the Flow component layout between vertical and horizontal orientations. * **`Flow.svelte` Refactoring:** * Updates logic for calculating level offsets and node positions to consistently respect the current flow orientation. * Adds a control panel using `<Controls>` and `<SwitchButton>` components. * Provides user interface elements to easily switch the flow layout between horizontal and vertical orientations. * build(deps): bump pydantic from 2.11.7 to 2.11.9 in /backend Bumps [pydantic](https://github.com/pydantic/pydantic) from 2.11.7 to 2.11.9. - [Release notes](https://github.com/pydantic/pydantic/releases) - [Changelog](https://github.com/pydantic/pydantic/blob/v2.11.9/HISTORY.md) - [Commits](https://github.com/pydantic/pydantic/compare/v2.11.7...v2.11.9) --- updated-dependencies: - dependency-name: pydantic dependency-version: 2.11.9 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump black from 25.1.0 to 25.9.0 in /backend Bumps [black](https://github.com/psf/black) from 25.1.0 to 25.9.0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/25.1.0...25.9.0) --- updated-dependencies: - dependency-name: black dependency-version: 25.9.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump markdown from 3.8.2 to 3.9 in /backend Bumps [markdown](https://github.com/Python-Markdown/markdown) from 3.8.2 to 3.9. - [Release notes](https://github.com/Python-Markdown/markdown/releases) - [Changelog](https://github.com/Python-Markdown/markdown/blob/master/docs/changelog.md) - [Commits](https://github.com/Python-Markdown/markdown/compare/3.8.2...3.9.0) --- updated-dependencies: - dependency-name: markdown dependency-version: '3.9' dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump chromadb from 1.0.20 to 1.1.0 in /backend Bumps [chromadb](https://github.com/chroma-core/chroma) from 1.0.20 to 1.1.0. - [Release notes](https://github.com/chroma-core/chroma/releases) - [Changelog](https://github.com/chroma-core/chroma/blob/main/RELEASE_PROCESS.md) - [Commits](https://github.com/chroma-core/chroma/compare/1.0.20...1.1.0) --- updated-dependencies: - dependency-name: chromadb dependency-version: 1.1.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump opentelemetry-api from 1.36.0 to 1.37.0 Bumps [opentelemetry-api](https://github.com/open-telemetry/opentelemetry-python) from 1.36.0 to 1.37.0. - [Release notes](https://github.com/open-telemetry/opentelemetry-python/releases) - [Changelog](https://github.com/open-telemetry/opentelemetry-python/blob/main/CHANGELOG.md) - [Commits](https://github.com/open-telemetry/opentelemetry-python/compare/v1.36.0...v1.37.0) --- updated-dependencies: - dependency-name: opentelemetry-api dependency-version: 1.37.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * refac: ollama embed form data * fix: non rich text handling * fix: oauth client registration * refac * chore: dep bump * chore: fastapi bump * chore/refac: bump bcrypt and remove passlib * Improving Korean Translation * refac * Improving Korean Translation * feat: PWA share_target implementation Co-Authored-By: gjveld <19951982+gjveld@users.noreply.github.com> * refac: message input mobile detection behaviour * feat: model_ids per folder * Update translation.json (pt-BR) inclusion of new translations of items that have been added * refac * refac * refac * refac * refac/fix: temp chat * refac * refac: stop task * refac/fix: azure audio escape * refac: external tool validation * refac/enh: start.sh additional args support * refac * refac: styling * refac/fix: direct connection floating action buttons * refac/fix: system prompt duplication * refac/enh: openai tts additional params support * refac * feat: load data in parallel to accelerate page loading speed * i18n: improve Chinese translation * refac * refac: model selector * UPD: i18n es-ES Translation v0.6.33 UPD: i18n es-ES Translation v0.6.33 Updated new strings. * refac * improved query pref by querying only relevant columns * refac/enh: docling params * refac * refac: openai additional headers support * refac * FEAT: Add Vega Char Visualizer Renderer ### FEAT: Add Vega Char Visualizer Renderer Feature required in https://github.com/open-webui/open-webui/discussions/18022 Added npm vega lib to package.json Added function for visualization renderer to src/libs/utils/index.ts Added logic to src/lib/components/chat/Messages/CodeBlock.svelte The treatment is similar as for mermaid diagrams. Reference: https://vega.github.io/vega/ * refac * chore * refac * FEAT: Add Vega-Lite Char Visualizer Renderer ### FEAT: Add Vega Char Visualizer Renderer Add suport for Vega-Lite Specifications. Vega-Lite is a "compiled" version of Vega Char Visualizer. For be rendered with Vega it have to be compiled. This PR add the check and compile if necessary, is a complement of recent Vega Renderer Feature added. * refac * refac/fix: switch * enh/refac: url input handling * refac * refac: styling * UPD: Add Validators & Error Toast for Mermaid & Vega diagrams ### UPD: Feat: Add Validators & Error Toast for Mermaid & Vega diagrams Description: As many time the diagrams generated or entered have syntax errors the diagrams are not rendered due to that errors, but as there isn't any notification is difficult to know what happend. This PR add validator and toast notification when error on Mermaid and Vega/Vega-Lite diagrams, helping the user to fix its. * removed redundant knowledge API call * Fix Code Format * refac: model workspace view * refac * refac: knowledge * refac: prompts * refac: tools * refac * feat: attach folder * refac: make tencentcloud-sdk-python optional * refac/fix: oauth * enh: ENABLE_OAUTH_EMAIL_FALLBACK * refac/fix: folders * Update requirements.txt * Update pyproject.toml * UPD: Add Validators & Error Toast for Mermaid & Vega diagrams ### UPD: Feat: Add Validators & Error Toast for Mermaid & Vega diagrams Description: As many time the diagrams generated or entered have syntax errors the diagrams are not rendered due to that errors, but as there isn't any notification is difficult to know what happend. This PR add validator and toast notification when error on Mermaid and Vega/Vega-Lite diagrams, helping the user to fix its. Note: Another possibility of integrating this Graph Visualizer is through its svelte component: https://github.com/vega/svelte-vega/tree/main/packages/svelte-vega * Removed unused toast import & Code Format * refac * refac: external tool server view * refac * refac: overview * refac: styling * refac * Update bug_report.yaml * refac * refac * refac * refac * refac: oauth client fallback * Fixed: Cannot handle batch sizes > 1 if no padding token is defined Fixes Cannot handle batch sizes > 1 if no padding token is defined For reranker models that do not have this defined in their config by using the eos_token_id if present as pad_token_id. * refac: fallback to reasoning content * fix(i18n): corrected typo in Spanish translation for "Reasoning Tags" Typo fixed in Spanish translation file at line 1240 of `open-webui/src/lib/i18n/locales/es-ES/translation.json`: - Incorrect: "Eriquetas de Razonamiento" - Correct: "Etiquetas de Razonamiento" This improves clarity and consistency in the UI. * refac/fix: ENABLE_STAR_SESSIONS_MIDDLEWARE * refac/fix: redirect * refac * refac * refac * refac: web search error handling * refac: source parsing * refac: functions * refac * refac/enh: note pdf export * refac/fix: mcp oauth2.1 * chore: format * chore: Changelog (#17995) * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * refac * chore: dep bump --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: silentoplayz <jacwoo21@outlook.com> Co-authored-by: Shirasawa <764798966@qq.com> Co-authored-by: Jan Kessler <jakessle@uni-mainz.de> Co-authored-by: Jacob Leksan <jacob.leksan@expedient.com> Co-authored-by: Classic298 <27028174+Classic298@users.noreply.github.com> Co-authored-by: sinejespersen <sinejespersen@protonmail.com> Co-authored-by: Selene Blok <selene.blok@rws.nl> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Cyp <cypher9715@naver.com> Co-authored-by: gjveld <19951982+gjveld@users.noreply.github.com> Co-authored-by: joaoback <156559121+joaoback@users.noreply.github.com> Co-authored-by: _00_ <131402327+rgaricano@users.noreply.github.com> Co-authored-by: expruc <eygabi01@gmail.com> Co-authored-by: YetheSamartaka <55753928+YetheSamartaka@users.noreply.github.com> Co-authored-by: Akutangulo <akutangulo@gmail.com>
2025-10-07 21:20:27 +00:00
logging.info(f"WebSocket channel: {channel}")
2025-02-13 06:56:33 +00:00
if form_data.get("stream"):
q = asyncio.Queue()
2025-02-13 07:21:16 +00:00
async def message_listener(sid, data):
"""
Handle received socket messages and push them into the queue.
"""
await q.put(data)
2025-02-13 06:56:33 +00:00
2025-02-13 07:21:16 +00:00
# Register the listener
sio.on(channel, message_listener)
2025-02-13 06:56:33 +00:00
2025-02-13 07:21:16 +00:00
# Start processing chat completion in background
res = await event_caller(
{
"type": "request:chat:completion",
"data": {
"form_data": form_data,
"model": models[form_data["model"]],
"channel": channel,
"session_id": session_id,
},
}
)
2025-02-13 06:56:33 +00:00
log.info(f"res: {res}")
2025-02-13 07:21:16 +00:00
if res.get("status", False):
# Define a generator to stream responses
async def event_generator():
nonlocal q
try:
while True:
data = await q.get() # Wait for new messages
if isinstance(data, dict):
if "done" in data and data["done"]:
break # Stop streaming when 'done' is received
yield f"data: {json.dumps(data)}\n\n"
elif isinstance(data, str):
0.6.33 (#18118) * feat: improve ollama model management experience This commit introduces several improvements to the Ollama model management modal: - Adds a cancel button to the model pulling operation, using the existing 'x' button pattern. - Adds a cancel button to the "Update All" models operation, allowing the user to cancel the update for the currently processing model. - Cleans up toast notifications when updating all models. A single toast is now shown at the beginning and a summary toast at the end, preventing notification spam. - Refactors the `ManageOllama.svelte` component to support these new cancellation features. - Adds tooltips to all buttons in the modal to improve clarity. - Disables buttons when their corresponding input fields are empty to prevent accidental clicks. * fix * i18n: improve Chinese translation * fix: handle non‑UTF8 chars in third‑party responses without error * German translation of new strings in i18n * log web search queries only with level 'debug' instead of 'info' * Tool calls now only include text and dont inlcude other content like image b64 * fix onedrive * fix: discovery url * fix: default permissions not being loaded * fix: ai hallucination * fix: non rich text input copy * refac: rm print statements * refac: disable direct models from model editors * refac/fix: do not process xlsx files with azure doc intelligence * Update pull_request_template.md * Update generated image translation in DE-de * added missing danish translations * feat(onedrive): Enable search and "My Organization" pivot * style(onedrive): Formatting fix * feat: Implement toggling for vertical and horizontal flow layouts This commit introduces the necessary logic and UI controls to allow users to switch the Flow component layout between vertical and horizontal orientations. * **`Flow.svelte` Refactoring:** * Updates logic for calculating level offsets and node positions to consistently respect the current flow orientation. * Adds a control panel using `<Controls>` and `<SwitchButton>` components. * Provides user interface elements to easily switch the flow layout between horizontal and vertical orientations. * build(deps): bump pydantic from 2.11.7 to 2.11.9 in /backend Bumps [pydantic](https://github.com/pydantic/pydantic) from 2.11.7 to 2.11.9. - [Release notes](https://github.com/pydantic/pydantic/releases) - [Changelog](https://github.com/pydantic/pydantic/blob/v2.11.9/HISTORY.md) - [Commits](https://github.com/pydantic/pydantic/compare/v2.11.7...v2.11.9) --- updated-dependencies: - dependency-name: pydantic dependency-version: 2.11.9 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump black from 25.1.0 to 25.9.0 in /backend Bumps [black](https://github.com/psf/black) from 25.1.0 to 25.9.0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/25.1.0...25.9.0) --- updated-dependencies: - dependency-name: black dependency-version: 25.9.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump markdown from 3.8.2 to 3.9 in /backend Bumps [markdown](https://github.com/Python-Markdown/markdown) from 3.8.2 to 3.9. - [Release notes](https://github.com/Python-Markdown/markdown/releases) - [Changelog](https://github.com/Python-Markdown/markdown/blob/master/docs/changelog.md) - [Commits](https://github.com/Python-Markdown/markdown/compare/3.8.2...3.9.0) --- updated-dependencies: - dependency-name: markdown dependency-version: '3.9' dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump chromadb from 1.0.20 to 1.1.0 in /backend Bumps [chromadb](https://github.com/chroma-core/chroma) from 1.0.20 to 1.1.0. - [Release notes](https://github.com/chroma-core/chroma/releases) - [Changelog](https://github.com/chroma-core/chroma/blob/main/RELEASE_PROCESS.md) - [Commits](https://github.com/chroma-core/chroma/compare/1.0.20...1.1.0) --- updated-dependencies: - dependency-name: chromadb dependency-version: 1.1.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * build(deps): bump opentelemetry-api from 1.36.0 to 1.37.0 Bumps [opentelemetry-api](https://github.com/open-telemetry/opentelemetry-python) from 1.36.0 to 1.37.0. - [Release notes](https://github.com/open-telemetry/opentelemetry-python/releases) - [Changelog](https://github.com/open-telemetry/opentelemetry-python/blob/main/CHANGELOG.md) - [Commits](https://github.com/open-telemetry/opentelemetry-python/compare/v1.36.0...v1.37.0) --- updated-dependencies: - dependency-name: opentelemetry-api dependency-version: 1.37.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * refac: ollama embed form data * fix: non rich text handling * fix: oauth client registration * refac * chore: dep bump * chore: fastapi bump * chore/refac: bump bcrypt and remove passlib * Improving Korean Translation * refac * Improving Korean Translation * feat: PWA share_target implementation Co-Authored-By: gjveld <19951982+gjveld@users.noreply.github.com> * refac: message input mobile detection behaviour * feat: model_ids per folder * Update translation.json (pt-BR) inclusion of new translations of items that have been added * refac * refac * refac * refac * refac/fix: temp chat * refac * refac: stop task * refac/fix: azure audio escape * refac: external tool validation * refac/enh: start.sh additional args support * refac * refac: styling * refac/fix: direct connection floating action buttons * refac/fix: system prompt duplication * refac/enh: openai tts additional params support * refac * feat: load data in parallel to accelerate page loading speed * i18n: improve Chinese translation * refac * refac: model selector * UPD: i18n es-ES Translation v0.6.33 UPD: i18n es-ES Translation v0.6.33 Updated new strings. * refac * improved query pref by querying only relevant columns * refac/enh: docling params * refac * refac: openai additional headers support * refac * FEAT: Add Vega Char Visualizer Renderer ### FEAT: Add Vega Char Visualizer Renderer Feature required in https://github.com/open-webui/open-webui/discussions/18022 Added npm vega lib to package.json Added function for visualization renderer to src/libs/utils/index.ts Added logic to src/lib/components/chat/Messages/CodeBlock.svelte The treatment is similar as for mermaid diagrams. Reference: https://vega.github.io/vega/ * refac * chore * refac * FEAT: Add Vega-Lite Char Visualizer Renderer ### FEAT: Add Vega Char Visualizer Renderer Add suport for Vega-Lite Specifications. Vega-Lite is a "compiled" version of Vega Char Visualizer. For be rendered with Vega it have to be compiled. This PR add the check and compile if necessary, is a complement of recent Vega Renderer Feature added. * refac * refac/fix: switch * enh/refac: url input handling * refac * refac: styling * UPD: Add Validators & Error Toast for Mermaid & Vega diagrams ### UPD: Feat: Add Validators & Error Toast for Mermaid & Vega diagrams Description: As many time the diagrams generated or entered have syntax errors the diagrams are not rendered due to that errors, but as there isn't any notification is difficult to know what happend. This PR add validator and toast notification when error on Mermaid and Vega/Vega-Lite diagrams, helping the user to fix its. * removed redundant knowledge API call * Fix Code Format * refac: model workspace view * refac * refac: knowledge * refac: prompts * refac: tools * refac * feat: attach folder * refac: make tencentcloud-sdk-python optional * refac/fix: oauth * enh: ENABLE_OAUTH_EMAIL_FALLBACK * refac/fix: folders * Update requirements.txt * Update pyproject.toml * UPD: Add Validators & Error Toast for Mermaid & Vega diagrams ### UPD: Feat: Add Validators & Error Toast for Mermaid & Vega diagrams Description: As many time the diagrams generated or entered have syntax errors the diagrams are not rendered due to that errors, but as there isn't any notification is difficult to know what happend. This PR add validator and toast notification when error on Mermaid and Vega/Vega-Lite diagrams, helping the user to fix its. Note: Another possibility of integrating this Graph Visualizer is through its svelte component: https://github.com/vega/svelte-vega/tree/main/packages/svelte-vega * Removed unused toast import & Code Format * refac * refac: external tool server view * refac * refac: overview * refac: styling * refac * Update bug_report.yaml * refac * refac * refac * refac * refac: oauth client fallback * Fixed: Cannot handle batch sizes > 1 if no padding token is defined Fixes Cannot handle batch sizes > 1 if no padding token is defined For reranker models that do not have this defined in their config by using the eos_token_id if present as pad_token_id. * refac: fallback to reasoning content * fix(i18n): corrected typo in Spanish translation for "Reasoning Tags" Typo fixed in Spanish translation file at line 1240 of `open-webui/src/lib/i18n/locales/es-ES/translation.json`: - Incorrect: "Eriquetas de Razonamiento" - Correct: "Etiquetas de Razonamiento" This improves clarity and consistency in the UI. * refac/fix: ENABLE_STAR_SESSIONS_MIDDLEWARE * refac/fix: redirect * refac * refac * refac * refac: web search error handling * refac: source parsing * refac: functions * refac * refac/enh: note pdf export * refac/fix: mcp oauth2.1 * chore: format * chore: Changelog (#17995) * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * refac * chore: dep bump --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: silentoplayz <jacwoo21@outlook.com> Co-authored-by: Shirasawa <764798966@qq.com> Co-authored-by: Jan Kessler <jakessle@uni-mainz.de> Co-authored-by: Jacob Leksan <jacob.leksan@expedient.com> Co-authored-by: Classic298 <27028174+Classic298@users.noreply.github.com> Co-authored-by: sinejespersen <sinejespersen@protonmail.com> Co-authored-by: Selene Blok <selene.blok@rws.nl> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Cyp <cypher9715@naver.com> Co-authored-by: gjveld <19951982+gjveld@users.noreply.github.com> Co-authored-by: joaoback <156559121+joaoback@users.noreply.github.com> Co-authored-by: _00_ <131402327+rgaricano@users.noreply.github.com> Co-authored-by: expruc <eygabi01@gmail.com> Co-authored-by: YetheSamartaka <55753928+YetheSamartaka@users.noreply.github.com> Co-authored-by: Akutangulo <akutangulo@gmail.com>
2025-10-07 21:20:27 +00:00
if "data:" in data:
yield f"{data}\n\n"
else:
yield f"data: {data}\n\n"
2025-02-13 07:21:16 +00:00
except Exception as e:
log.debug(f"Error in event generator: {e}")
pass
# Define a background task to run the event generator
async def background():
try:
del sio.handlers["/"][channel]
except Exception as e:
pass
# Return the streaming response
return StreamingResponse(
event_generator(), media_type="text/event-stream", background=background
)
else:
raise Exception(str(res))
2025-02-13 06:56:33 +00:00
else:
res = await event_caller(
{
"type": "request:chat:completion",
"data": {
"form_data": form_data,
"model": models[form_data["model"]],
"channel": channel,
"session_id": session_id,
},
}
)
2025-03-03 16:08:49 +00:00
if "error" in res and res["error"]:
2025-02-13 06:56:33 +00:00
raise Exception(res["error"])
return res
2024-12-13 04:22:17 +00:00
async def generate_chat_completion(
request: Request,
form_data: dict,
user: Any,
bypass_filter: bool = False,
):
2025-02-13 23:17:41 +00:00
log.debug(f"generate_chat_completion: {form_data}")
2024-12-13 04:22:17 +00:00
if BYPASS_MODEL_ACCESS_CONTROL:
bypass_filter = True
2025-02-13 08:34:45 +00:00
if hasattr(request.state, "metadata"):
2025-02-14 01:06:55 +00:00
if "metadata" not in form_data:
form_data["metadata"] = request.state.metadata
else:
form_data["metadata"] = {
**form_data["metadata"],
**request.state.metadata,
}
2025-02-13 08:34:45 +00:00
2025-02-13 07:26:47 +00:00
if getattr(request.state, "direct", False) and hasattr(request.state, "model"):
2025-02-13 06:56:33 +00:00
models = {
request.state.model["id"]: request.state.model,
}
2025-02-13 08:34:45 +00:00
log.debug(f"direct connection to model: {models}")
2025-02-13 06:56:33 +00:00
else:
models = request.app.state.MODELS
2024-12-13 04:22:17 +00:00
model_id = form_data["model"]
if model_id not in models:
raise Exception("Model not found")
model = models[model_id]
2025-02-13 07:49:00 +00:00
if getattr(request.state, "direct", False):
2025-02-13 06:56:33 +00:00
return await generate_direct_chat_completion(
request, form_data, user=user, models=models
)
else:
2025-02-13 22:21:34 +00:00
# Check if user has access to the model
if not bypass_filter and user.role == "user":
try:
check_model_access(user, model)
except Exception as e:
raise e
if model.get("owned_by") == "arena":
2025-02-13 06:56:33 +00:00
model_ids = model.get("info", {}).get("meta", {}).get("model_ids")
filter_mode = model.get("info", {}).get("meta", {}).get("filter_mode")
if model_ids and filter_mode == "exclude":
model_ids = [
model["id"]
for model in list(request.app.state.MODELS.values())
if model.get("owned_by") != "arena" and model["id"] not in model_ids
]
selected_model_id = None
if isinstance(model_ids, list) and model_ids:
selected_model_id = random.choice(model_ids)
else:
model_ids = [
model["id"]
for model in list(request.app.state.MODELS.values())
if model.get("owned_by") != "arena"
]
selected_model_id = random.choice(model_ids)
form_data["model"] = selected_model_id
if form_data.get("stream") == True:
async def stream_wrapper(stream):
yield f"data: {json.dumps({'selected_model_id': selected_model_id})}\n\n"
async for chunk in stream:
yield chunk
response = await generate_chat_completion(
request, form_data, user, bypass_filter=True
)
return StreamingResponse(
stream_wrapper(response.body_iterator),
media_type="text/event-stream",
background=response.background,
)
else:
return {
**(
await generate_chat_completion(
request, form_data, user, bypass_filter=True
)
),
"selected_model_id": selected_model_id,
}
if model.get("pipe"):
# Below does not require bypass_filter because this is the only route the uses this function and it is already bypassing the filter
return await generate_function_chat_completion(
request, form_data, user=user, models=models
2024-12-13 04:22:17 +00:00
)
if model.get("owned_by") == "ollama":
2025-02-13 06:56:33 +00:00
# Using /ollama/api/chat endpoint
form_data = convert_payload_openai_to_ollama(form_data)
response = await generate_ollama_chat_completion(
request=request,
form_data=form_data,
user=user,
bypass_filter=bypass_filter,
2024-12-13 04:22:17 +00:00
)
2025-02-13 06:56:33 +00:00
if form_data.get("stream"):
response.headers["content-type"] = "text/event-stream"
return StreamingResponse(
convert_streaming_response_ollama_to_openai(response),
headers=dict(response.headers),
background=response.background,
)
else:
return convert_response_ollama_to_openai(response)
2024-12-13 04:22:17 +00:00
else:
2025-02-13 06:56:33 +00:00
return await generate_openai_chat_completion(
request=request,
form_data=form_data,
user=user,
bypass_filter=bypass_filter,
2024-12-13 04:22:17 +00:00
)
2024-12-29 00:42:43 +00:00
chat_completion = generate_chat_completion
2024-12-13 04:22:17 +00:00
async def chat_completed(request: Request, form_data: dict, user: Any):
2024-12-21 16:59:12 +00:00
if not request.app.state.MODELS:
await get_all_models(request, user=user)
2025-02-13 06:56:33 +00:00
2025-02-13 07:26:47 +00:00
if getattr(request.state, "direct", False) and hasattr(request.state, "model"):
2025-02-13 06:56:33 +00:00
models = {
request.state.model["id"]: request.state.model,
}
else:
models = request.app.state.MODELS
2024-12-13 04:22:17 +00:00
data = form_data
model_id = data["model"]
if model_id not in models:
raise Exception("Model not found")
model = models[model_id]
try:
2025-02-16 06:25:18 +00:00
data = await process_pipeline_outlet_filter(request, data, user, models)
2024-12-13 04:22:17 +00:00
except Exception as e:
return Exception(f"Error: {e}")
metadata = {
"chat_id": data["chat_id"],
"message_id": data["id"],
2025-05-16 20:08:03 +00:00
"filter_ids": data.get("filter_ids", []),
"session_id": data["session_id"],
"user_id": user.id,
}
extra_params = {
"__event_emitter__": get_event_emitter(metadata),
"__event_call__": get_event_call(metadata),
"__user__": user.model_dump() if isinstance(user, UserModel) else {},
"__metadata__": metadata,
"__request__": request,
2025-02-08 09:07:05 +00:00
"__model__": model,
}
2024-12-13 04:22:17 +00:00
try:
2025-03-05 02:04:55 +00:00
filter_functions = [
Functions.get_function_by_id(filter_id)
2025-05-16 20:08:03 +00:00
for filter_id in get_sorted_filter_ids(
request, model, metadata.get("filter_ids", [])
)
2025-03-05 02:04:55 +00:00
]
result, _ = await process_filter_functions(
request=request,
2025-03-05 02:04:55 +00:00
filter_functions=filter_functions,
2025-02-08 06:57:39 +00:00
filter_type="outlet",
form_data=data,
extra_params=extra_params,
)
return result
except Exception as e:
return Exception(f"Error: {e}")
2024-12-13 04:22:17 +00:00
async def chat_action(request: Request, action_id: str, form_data: dict, user: Any):
if "." in action_id:
action_id, sub_action_id = action_id.split(".")
else:
sub_action_id = None
action = Functions.get_function_by_id(action_id)
if not action:
raise Exception(f"Action not found: {action_id}")
2024-12-21 16:59:12 +00:00
if not request.app.state.MODELS:
await get_all_models(request, user=user)
2025-02-13 06:56:33 +00:00
2025-02-13 07:26:47 +00:00
if getattr(request.state, "direct", False) and hasattr(request.state, "model"):
2025-02-13 06:56:33 +00:00
models = {
request.state.model["id"]: request.state.model,
}
else:
models = request.app.state.MODELS
2024-12-13 04:22:17 +00:00
data = form_data
model_id = data["model"]
if model_id not in models:
raise Exception("Model not found")
model = models[model_id]
__event_emitter__ = get_event_emitter(
{
"chat_id": data["chat_id"],
"message_id": data["id"],
"session_id": data["session_id"],
2025-01-04 05:31:24 +00:00
"user_id": user.id,
2024-12-13 04:22:17 +00:00
}
)
__event_call__ = get_event_call(
{
"chat_id": data["chat_id"],
"message_id": data["id"],
"session_id": data["session_id"],
2025-01-04 05:31:24 +00:00
"user_id": user.id,
2024-12-13 04:22:17 +00:00
}
)
function_module, _, _ = get_function_module_from_cache(request, action_id)
2024-12-13 04:22:17 +00:00
if hasattr(function_module, "valves") and hasattr(function_module, "Valves"):
valves = Functions.get_function_valves_by_id(action_id)
function_module.valves = function_module.Valves(**(valves if valves else {}))
if hasattr(function_module, "action"):
try:
action = function_module.action
# Get the signature of the function
sig = inspect.signature(action)
params = {"body": data}
# Extra parameters to be passed to the function
extra_params = {
"__model__": model,
"__id__": sub_action_id if sub_action_id is not None else action_id,
"__event_emitter__": __event_emitter__,
"__event_call__": __event_call__,
2024-12-14 06:51:43 +00:00
"__request__": request,
2024-12-13 04:22:17 +00:00
}
# Add extra params in contained in function signature
for key, value in extra_params.items():
if key in sig.parameters:
params[key] = value
if "__user__" in sig.parameters:
2025-06-18 06:50:49 +00:00
__user__ = user.model_dump() if isinstance(user, UserModel) else {}
2024-12-13 04:22:17 +00:00
try:
if hasattr(function_module, "UserValves"):
__user__["valves"] = function_module.UserValves(
**Functions.get_user_valves_by_id_and_user_id(
action_id, user.id
)
)
except Exception as e:
log.exception(f"Failed to get user values: {e}")
2024-12-13 04:22:17 +00:00
params = {**params, "__user__": __user__}
if inspect.iscoroutinefunction(action):
data = await action(**params)
else:
data = action(**params)
except Exception as e:
return Exception(f"Error: {e}")
return data