diff --git a/.env.example b/.env.example index c38bf88bfb..35ea12a885 100644 --- a/.env.example +++ b/.env.example @@ -7,6 +7,15 @@ OPENAI_API_KEY='' # AUTOMATIC1111_BASE_URL="http://localhost:7860" +# For production, you should only need one host as +# fastapi serves the svelte-kit built frontend and backend from the same host and port. +# To test with CORS locally, you can set something like +# CORS_ALLOW_ORIGIN='http://localhost:5173;http://localhost:8080' +CORS_ALLOW_ORIGIN='*' + +# For production you should set this to match the proxy configuration (127.0.0.1) +FORWARDED_ALLOW_IPS='*' + # DO NOT TRACK SCARF_NO_ANALYTICS=true DO_NOT_TRACK=true diff --git a/.gitattributes b/.gitattributes index 526c8a38d4..bf368a4c6c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,49 @@ -*.sh text eol=lf \ No newline at end of file +# TypeScript +*.ts text eol=lf +*.tsx text eol=lf + +# JavaScript +*.js text eol=lf +*.jsx text eol=lf +*.mjs text eol=lf +*.cjs text eol=lf + +# Svelte +*.svelte text eol=lf + +# HTML/CSS +*.html text eol=lf +*.css text eol=lf +*.scss text eol=lf +*.less text eol=lf + +# Config files and JSON +*.json text eol=lf +*.jsonc text eol=lf +*.yml text eol=lf +*.yaml text eol=lf +*.toml text eol=lf + +# Shell scripts +*.sh text eol=lf + +# Markdown & docs +*.md text eol=lf +*.mdx text eol=lf +*.txt text eol=lf + +# Git-related +.gitattributes text eol=lf +.gitignore text eol=lf + +# Prettier and other dotfiles +.prettierrc text eol=lf +.prettierignore text eol=lf +.eslintrc text eol=lf +.eslintignore text eol=lf +.stylelintrc text eol=lf +.editorconfig text eol=lf + +# Misc +*.env text eol=lf +*.lock text eol=lf \ No newline at end of file diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml index 02b4461b5a..821ffb7206 100644 --- a/.github/workflows/docker-build.yaml +++ b/.github/workflows/docker-build.yaml @@ -14,16 +14,18 @@ env: jobs: build-main-image: - runs-on: ${{ matrix.platform == 'linux/arm64' && 'ubuntu-24.04-arm' || 'ubuntu-latest' }} + runs-on: ${{ matrix.runner }} permissions: contents: read packages: write strategy: fail-fast: false matrix: - platform: - - linux/amd64 - - linux/arm64 + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm steps: # GitHub Packages requires the entire repository name to be in lowercase @@ -111,16 +113,18 @@ jobs: retention-days: 1 build-cuda-image: - runs-on: ${{ matrix.platform == 'linux/arm64' && 'ubuntu-24.04-arm' || 'ubuntu-latest' }} + runs-on: ${{ matrix.runner }} permissions: contents: read packages: write strategy: fail-fast: false matrix: - platform: - - linux/amd64 - - linux/arm64 + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm steps: # GitHub Packages requires the entire repository name to be in lowercase @@ -211,16 +215,18 @@ jobs: retention-days: 1 build-cuda126-image: - runs-on: ${{ matrix.platform == 'linux/arm64' && 'ubuntu-24.04-arm' || 'ubuntu-latest' }} + runs-on: ${{ matrix.runner }} permissions: contents: read packages: write strategy: fail-fast: false matrix: - platform: - - linux/amd64 - - linux/arm64 + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm steps: # GitHub Packages requires the entire repository name to be in lowercase @@ -312,16 +318,18 @@ jobs: retention-days: 1 build-ollama-image: - runs-on: ${{ matrix.platform == 'linux/arm64' && 'ubuntu-24.04-arm' || 'ubuntu-latest' }} + runs-on: ${{ matrix.runner }} permissions: contents: read packages: write strategy: fail-fast: false matrix: - platform: - - linux/amd64 - - linux/arm64 + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm steps: # GitHub Packages requires the entire repository name to be in lowercase diff --git a/.prettierrc b/.prettierrc index a77fddea90..22558729f4 100644 --- a/.prettierrc +++ b/.prettierrc @@ -5,5 +5,6 @@ "printWidth": 100, "plugins": ["prettier-plugin-svelte"], "pluginSearchDirs": ["."], - "overrides": [{ "files": "*.svelte", "options": { "parser": "svelte" } }] + "overrides": [{ "files": "*.svelte", "options": { "parser": "svelte" } }], + "endOfLine": "lf" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a6e9f0098..4d09d3bf8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,45 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.6.14] - 2025-06-10 + +### Added + +- 🤖 **Automatic "Follow Up" Suggestions**: Open WebUI now intelligently generates actionable "Follow Up" suggestions automatically with each message you send, helping you stay productive and inspired without interrupting your flow; you can always disable this in Settings if you prefer a distraction-free experience. +- 🧩 **OpenAI-Compatible Embeddings Endpoint**: Introducing a fully OpenAI-style '/api/embeddings' endpoint—now you can plug in OpenAI-style embeddings workflows with zero hassle, making integrations with external tools and platforms seamless and familiar. +- ↗️ **Model Pinning for Quick Access**: Pin your favorite or most-used models to the sidebar for instant selection—no more scrolling through long model lists; your go-to models are always visible and ready for fast access. +- 📌 **Selector Model Item Menu**: Each model in the selector now features a menu where you can easily pin/unpin to the sidebar and copy a direct link—simplifying collaboration and staying organized in even the busiest environments. +- 🛑 **Reliable Stop for Ongoing Chats in Multi-Replica Setups**: Stopping or cancelling an in-progress chat now works reliably even in clustered deployments—ensuring every user can interrupt AI output at any time, no matter your scale. +- 🧠 **'Think' Parameter for Ollama Models**: Leverage new 'think' parameter support for Ollama—giving you advanced control over AI reasoning process and further tuning model behavior for your unique use cases. +- 💬 **Picture Description Modes for Docling**: Customize how images are described/extracted by Docling Loader for smarter, more detailed, and workflow-tailored image understanding in your document pipelines. +- 🛠 **Settings Modal Deep Linking**: Every tab in Settings now has its own route—making direct navigation and sharing of precise settings faster and more intuitive. +- 🎤 **Audio HTML Component Token**: Easily embed and play audio directly in your chats, improving voice-based workflows and making audio content instantly accessible and manageable from any conversation. +- 🔑 **Support for Secret Key File**: Now you can specify 'WEBUI_SECRET_KEY_FILE' for more secure and flexible key management—ideal for advanced deployments and tighter security standards. +- 💡 **Clarity When Cloning Prompts**: Cloned workspace prompts are clearly labelled with "(Clone)" and IDs have "-clone", keeping your prompt library organized and preventing accidental overwrites. +- 📝 **Dedicated User Role Edit Modal**: Updating user roles now reliably opens a dedicated edit user modal instead of cycling through roles—making it safer and more clear to manage team permissions. +- 🏞️ **Better Handling & Storage of Interpreter-Generated Images**: Code interpreter-generated images are now centrally stored and reliably loaded from the database or cloud storage, ensuring your artifacts are always available. +- 🚀 **Pinecone & Vector Search Optimizations**: Applied latest best practices from Pinecone for smarter timeouts, intelligent retry control, improved connection pooling, faster DNS, and concurrent batch handling—giving you more reliable, faster document search and RAG performance without manual tweaks. +- ⚙️ **Ollama Advanced Parameters Unified**: 'keep_alive' and 'format' options are now integrated into the advanced params section—edit everything from the model editor for flexible model control. +- 🛠️ **CUDA 12.6 Docker Image Support**: Deploy to NVIDIA GPUs with capability 7.0 and below (e.g., V100, GTX1080) via new cuda126 image—broadening your hardware options for scalable AI workloads. +- 🔒 **Experimental Table-Level PGVector Data Encryption**: Activate pgcrypto encryption support for pgvector to secure your vector search table contents, giving organizations enhanced compliance and data protection—perfect for enterprise or regulated environments. +- 👁 **Accessibility Upgrades Across Interface**: Chat buttons and close controls are now labelled and structured for optimal accessibility support, ensuring smoother operation with assistive technologies. +- 🎨 **High-Contrast Mode Expansions**: High-contrast accessibility mode now also applies to menu items, tabs, and search input fields, offering a more readable experience for all users. +- 🛠️ **Tooltip & Translation Clarity**: Improved translation and tooltip clarity, especially over radio buttons, making the UI more understandable for all users. +- 🔠 **Global Localization & Translation Improvements**: Hefty upgrades to Traditional Chinese, Simplified Chinese, Hebrew, Russian, Irish, German, and Danish translation packs—making the platform feel native and intuitive for even more users worldwide. +- ⚡ **General Backend Stability & Security Enhancements**: Refined numerous backend routines to minimize memory use, improve performance, and streamline integration with external APIs—making the entire platform more robust and secure for daily work. + +### Fixed + +- 🏷 **Feedback Score Display Improved**: Addressed overflow and visibility issues with feedback scores for more readable and accessible evaluations. +- 🗂 **Admin Settings Model Edits Apply Immediately**: Changes made in the Model Editor within Admin Settings now take effect instantly, eliminating confusion during model management. +- 🔄 **Assigned Tools Update Instantly on New Chats**: Models assigned with specific tools now consistently update and are available in every new chat—making tool workflows more predictable and robust. +- 🛠 **Document Settings Saved Only on User Action**: Document settings now save only when you press the Save button, reducing accidental changes and ensuring greater control. +- 🔊 **Voice Recording on Older iOS Devices Restored**: Voice input is now fully functional on older iOS devices, keeping voice workflows accessible to all users. +- 🔒 **Trusted Email Header Session Security**: User sessions now strictly verify the trusted email header matches the logged-in user's email, ensuring secure authentication and preventing accidental session switching. +- 🔒 **Consistent User Signout on Email Mismatch**: When the trusted email in the header changes, you will now be properly signed out and redirected, safeguarding your session's integrity. +- 🛠 **General Error & Content Validation Improvements**: Smarter error handling means clearer messages and fewer unnecessary retries—making batch uploads, document handling, and knowledge indexing more resilient. +- 🕵️ **Better Feedback on Chat Title Edits**: Error messages now show clearly if problems occur while editing chat titles. + ## [0.6.13] - 2025-05-30 ### Added diff --git a/README.md b/README.md index ea1f2acbbe..9d6a66e410 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ Want to learn more about Open WebUI's features? Check out our [Open WebUI docume - N8N • Does your interface have a backend yet?
Try n8n + n8n • Does your interface have a backend yet?
Try n8n @@ -86,6 +86,16 @@ Want to learn more about Open WebUI's features? Check out our [Open WebUI docume Warp • The intelligent terminal for developers + + + + Tailscale + + + + Tailscale • Connect self-hosted AI to any device with Tailscale + + --- @@ -181,6 +191,8 @@ After installation, you can access Open WebUI at [http://localhost:3000](http:// We offer various installation alternatives, including non-Docker native installation methods, Docker Compose, Kustomize, and Helm. Visit our [Open WebUI Documentation](https://docs.openwebui.com/getting-started/) or join our [Discord community](https://discord.gg/5rJgQTnV4s) for comprehensive guidance. +Look at the [Local Development Guide](https://docs.openwebui.com/getting-started/advanced-topics/development) for instructions on setting up a local development environment. + ### Troubleshooting Encountering connection issues? Our [Open WebUI Documentation](https://docs.openwebui.com/troubleshooting/) has got you covered. For further assistance and to join our vibrant community, visit the [Open WebUI Discord](https://discord.gg/5rJgQTnV4s). diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 1a98bd8e27..6a9030432b 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -347,6 +347,24 @@ MICROSOFT_CLIENT_TENANT_ID = PersistentConfig( os.environ.get("MICROSOFT_CLIENT_TENANT_ID", ""), ) +MICROSOFT_CLIENT_LOGIN_BASE_URL = PersistentConfig( + "MICROSOFT_CLIENT_LOGIN_BASE_URL", + "oauth.microsoft.login_base_url", + os.environ.get( + "MICROSOFT_CLIENT_LOGIN_BASE_URL", "https://login.microsoftonline.com" + ), +) + +MICROSOFT_CLIENT_PICTURE_URL = PersistentConfig( + "MICROSOFT_CLIENT_PICTURE_URL", + "oauth.microsoft.picture_url", + os.environ.get( + "MICROSOFT_CLIENT_PICTURE_URL", + "https://graph.microsoft.com/v1.0/me/photo/$value", + ), +) + + MICROSOFT_OAUTH_SCOPE = PersistentConfig( "MICROSOFT_OAUTH_SCOPE", "oauth.microsoft.scope", @@ -542,7 +560,7 @@ def load_oauth_providers(): name="microsoft", client_id=MICROSOFT_CLIENT_ID.value, client_secret=MICROSOFT_CLIENT_SECRET.value, - server_metadata_url=f"https://login.microsoftonline.com/{MICROSOFT_CLIENT_TENANT_ID.value}/v2.0/.well-known/openid-configuration?appid={MICROSOFT_CLIENT_ID.value}", + server_metadata_url=f"{MICROSOFT_CLIENT_LOGIN_BASE_URL.value}/{MICROSOFT_CLIENT_TENANT_ID.value}/v2.0/.well-known/openid-configuration?appid={MICROSOFT_CLIENT_ID.value}", client_kwargs={ "scope": MICROSOFT_OAUTH_SCOPE.value, }, @@ -551,7 +569,7 @@ def load_oauth_providers(): OAUTH_PROVIDERS["microsoft"] = { "redirect_uri": MICROSOFT_REDIRECT_URI.value, - "picture_url": "https://graph.microsoft.com/v1.0/me/photo/$value", + "picture_url": MICROSOFT_CLIENT_PICTURE_URL.value, "register": microsoft_oauth_register, } @@ -1245,12 +1263,6 @@ if THREAD_POOL_SIZE is not None and isinstance(THREAD_POOL_SIZE, str): THREAD_POOL_SIZE = None -def validate_cors_origins(origins): - for origin in origins: - if origin != "*": - validate_cors_origin(origin) - - def validate_cors_origin(origin): parsed_url = urlparse(origin) @@ -1270,16 +1282,17 @@ def validate_cors_origin(origin): # To test CORS_ALLOW_ORIGIN locally, you can set something like # CORS_ALLOW_ORIGIN=http://localhost:5173;http://localhost:8080 # in your .env file depending on your frontend port, 5173 in this case. -CORS_ALLOW_ORIGIN = os.environ.get( - "CORS_ALLOW_ORIGIN", "*;http://localhost:5173;http://localhost:8080" -).split(";") +CORS_ALLOW_ORIGIN = os.environ.get("CORS_ALLOW_ORIGIN", "*").split(";") -if "*" in CORS_ALLOW_ORIGIN: +if CORS_ALLOW_ORIGIN == ["*"]: log.warning( "\n\nWARNING: CORS_ALLOW_ORIGIN IS SET TO '*' - NOT RECOMMENDED FOR PRODUCTION DEPLOYMENTS.\n" ) - -validate_cors_origins(CORS_ALLOW_ORIGIN) +else: + # You have to pick between a single wildcard or a list of origins. + # Doing both will result in CORS errors in the browser. + for origin in CORS_ALLOW_ORIGIN: + validate_cors_origin(origin) class BannerModel(BaseModel): @@ -1419,9 +1432,9 @@ FOLLOW_UP_GENERATION_PROMPT_TEMPLATE = PersistentConfig( ) DEFAULT_FOLLOW_UP_GENERATION_PROMPT_TEMPLATE = """### Task: -SSuggest 3-5 relevant follow-up questions or prompts that the **user** might naturally ask next in this conversation, based on the chat history, to help continue or deepen the discussion. +Suggest 3-5 relevant follow-up questions or prompts that the user might naturally ask next in this conversation as a **user**, based on the chat history, to help continue or deepen the discussion. ### Guidelines: -- Phrase all follow-up questions from the user’s perspective, addressed to the assistant or expert. +- Write all follow-up questions from the user’s point of view, directed to the assistant. - Make questions concise, clear, and directly related to the discussed topic(s). - Only suggest follow-ups that make sense given the chat content and do not repeat what was already covered. - If the conversation is very short or not specific, suggest more general (but relevant) follow-ups the user might ask. @@ -1812,6 +1825,13 @@ PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH = int( os.environ.get("PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH", "1536") ) +PGVECTOR_PGCRYPTO = os.getenv("PGVECTOR_PGCRYPTO", "false").lower() == "true" +PGVECTOR_PGCRYPTO_KEY = os.getenv("PGVECTOR_PGCRYPTO_KEY", None) +if PGVECTOR_PGCRYPTO and not PGVECTOR_PGCRYPTO_KEY: + raise ValueError( + "PGVECTOR_PGCRYPTO is enabled but PGVECTOR_PGCRYPTO_KEY is not set. Please provide a valid key." + ) + # Pinecone PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY", None) PINECONE_ENVIRONMENT = os.environ.get("PINECONE_ENVIRONMENT", None) @@ -1972,6 +1992,40 @@ DOCLING_DO_PICTURE_DESCRIPTION = PersistentConfig( os.getenv("DOCLING_DO_PICTURE_DESCRIPTION", "False").lower() == "true", ) +DOCLING_PICTURE_DESCRIPTION_MODE = PersistentConfig( + "DOCLING_PICTURE_DESCRIPTION_MODE", + "rag.docling_picture_description_mode", + os.getenv("DOCLING_PICTURE_DESCRIPTION_MODE", ""), +) + + +docling_picture_description_local = os.getenv("DOCLING_PICTURE_DESCRIPTION_LOCAL", "") +try: + docling_picture_description_local = json.loads(docling_picture_description_local) +except json.JSONDecodeError: + docling_picture_description_local = {} + + +DOCLING_PICTURE_DESCRIPTION_LOCAL = PersistentConfig( + "DOCLING_PICTURE_DESCRIPTION_LOCAL", + "rag.docling_picture_description_local", + docling_picture_description_local, +) + +docling_picture_description_api = os.getenv("DOCLING_PICTURE_DESCRIPTION_API", "") +try: + docling_picture_description_api = json.loads(docling_picture_description_api) +except json.JSONDecodeError: + docling_picture_description_api = {} + + +DOCLING_PICTURE_DESCRIPTION_API = PersistentConfig( + "DOCLING_PICTURE_DESCRIPTION_API", + "rag.docling_picture_description_api", + docling_picture_description_api, +) + + DOCUMENT_INTELLIGENCE_ENDPOINT = PersistentConfig( "DOCUMENT_INTELLIGENCE_ENDPOINT", "rag.document_intelligence_endpoint", @@ -2471,6 +2525,18 @@ PERPLEXITY_API_KEY = PersistentConfig( os.getenv("PERPLEXITY_API_KEY", ""), ) +PERPLEXITY_MODEL = PersistentConfig( + "PERPLEXITY_MODEL", + "rag.web.search.perplexity_model", + os.getenv("PERPLEXITY_MODEL", "sonar"), +) + +PERPLEXITY_SEARCH_CONTEXT_USAGE = PersistentConfig( + "PERPLEXITY_SEARCH_CONTEXT_USAGE", + "rag.web.search.perplexity_search_context_usage", + os.getenv("PERPLEXITY_SEARCH_CONTEXT_USAGE", "medium"), +) + SOUGOU_API_SID = PersistentConfig( "SOUGOU_API_SID", "rag.web.search.sougou_api_sid", @@ -3009,3 +3075,23 @@ LDAP_VALIDATE_CERT = PersistentConfig( LDAP_CIPHERS = PersistentConfig( "LDAP_CIPHERS", "ldap.server.ciphers", os.environ.get("LDAP_CIPHERS", "ALL") ) + +# For LDAP Group Management +ENABLE_LDAP_GROUP_MANAGEMENT = PersistentConfig( + "ENABLE_LDAP_GROUP_MANAGEMENT", + "ldap.group.enable_management", + os.environ.get("ENABLE_LDAP_GROUP_MANAGEMENT", "False").lower() == "true", +) + +ENABLE_LDAP_GROUP_CREATION = PersistentConfig( + "ENABLE_LDAP_GROUP_CREATION", + "ldap.group.enable_creation", + os.environ.get("ENABLE_LDAP_GROUP_CREATION", "False").lower() == "true", +) + +LDAP_ATTRIBUTE_FOR_GROUPS = PersistentConfig( + "LDAP_ATTRIBUTE_FOR_GROUPS", + "ldap.server.attribute_for_groups", + os.environ.get("LDAP_ATTRIBUTE_FOR_GROUPS", "memberOf"), +) + diff --git a/backend/open_webui/env.py b/backend/open_webui/env.py index fcfccaedf5..7601748376 100644 --- a/backend/open_webui/env.py +++ b/backend/open_webui/env.py @@ -5,6 +5,7 @@ import os import pkgutil import sys import shutil +from uuid import uuid4 from pathlib import Path import markdown @@ -130,6 +131,7 @@ else: PACKAGE_DATA = {"version": "0.0.0"} VERSION = PACKAGE_DATA["version"] +INSTANCE_ID = os.environ.get("INSTANCE_ID", str(uuid4())) # Function to parse each section diff --git a/backend/open_webui/functions.py b/backend/open_webui/functions.py index 20fabb2dc7..6eb5c1bbdb 100644 --- a/backend/open_webui/functions.py +++ b/backend/open_webui/functions.py @@ -25,6 +25,7 @@ from open_webui.socket.main import ( ) +from open_webui.models.users import UserModel from open_webui.models.functions import Functions from open_webui.models.models import Models @@ -227,12 +228,7 @@ async def generate_function_chat_completion( "__task__": __task__, "__task_body__": __task_body__, "__files__": files, - "__user__": { - "id": user.id, - "email": user.email, - "name": user.name, - "role": user.role, - }, + "__user__": user.model_dump() if isinstance(user, UserModel) else {}, "__metadata__": metadata, "__request__": request, } diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index a75aebb322..84c6b6caa1 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -8,6 +8,8 @@ import shutil import sys import time import random +from uuid import uuid4 + from contextlib import asynccontextmanager from urllib.parse import urlencode, parse_qs, urlparse @@ -19,6 +21,7 @@ from aiocache import cached import aiohttp import anyio.to_thread import requests +from redis import Redis from fastapi import ( @@ -37,7 +40,7 @@ from fastapi import ( from fastapi.openapi.docs import get_swagger_ui_html from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse, RedirectResponse +from fastapi.responses import FileResponse, JSONResponse, RedirectResponse from fastapi.staticfiles import StaticFiles from starlette_compress import CompressMiddleware @@ -231,6 +234,9 @@ from open_webui.config import ( DOCLING_OCR_ENGINE, DOCLING_OCR_LANG, DOCLING_DO_PICTURE_DESCRIPTION, + DOCLING_PICTURE_DESCRIPTION_MODE, + DOCLING_PICTURE_DESCRIPTION_LOCAL, + DOCLING_PICTURE_DESCRIPTION_API, DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY, MISTRAL_OCR_API_KEY, @@ -268,6 +274,8 @@ from open_webui.config import ( BRAVE_SEARCH_API_KEY, EXA_API_KEY, PERPLEXITY_API_KEY, + PERPLEXITY_MODEL, + PERPLEXITY_SEARCH_CONTEXT_USAGE, SOUGOU_API_SID, SOUGOU_API_SK, KAGI_SEARCH_API_KEY, @@ -341,6 +349,10 @@ from open_webui.config import ( LDAP_CA_CERT_FILE, LDAP_VALIDATE_CERT, LDAP_CIPHERS, + # LDAP Group Management + ENABLE_LDAP_GROUP_MANAGEMENT, + ENABLE_LDAP_GROUP_CREATION, + LDAP_ATTRIBUTE_FOR_GROUPS, # Misc ENV, CACHE_DIR, @@ -386,6 +398,7 @@ from open_webui.env import ( SAFE_MODE, SRC_LOG_LEVELS, VERSION, + INSTANCE_ID, WEBUI_BUILD_HASH, WEBUI_SECRET_KEY, WEBUI_SESSION_COOKIE_SAME_SITE, @@ -413,6 +426,7 @@ from open_webui.utils.chat import ( chat_completed as chat_completed_handler, chat_action as chat_action_handler, ) +from open_webui.utils.embeddings import generate_embeddings from open_webui.utils.middleware import process_chat_payload, process_chat_response from open_webui.utils.access_control import has_access @@ -426,8 +440,10 @@ from open_webui.utils.auth import ( from open_webui.utils.plugin import install_tool_and_function_dependencies from open_webui.utils.oauth import OAuthManager from open_webui.utils.security_headers import SecurityHeadersMiddleware +from open_webui.utils.redis import get_redis_connection from open_webui.tasks import ( + redis_task_command_listener, list_task_ids_by_chat_id, stop_task, list_tasks, @@ -479,7 +495,9 @@ https://github.com/open-webui/open-webui @asynccontextmanager async def lifespan(app: FastAPI): + app.state.instance_id = INSTANCE_ID start_logger() + if RESET_CONFIG_ON_START: reset_config() @@ -491,6 +509,19 @@ async def lifespan(app: FastAPI): log.info("Installing external dependencies of functions and tools...") install_tool_and_function_dependencies() + app.state.redis = get_redis_connection( + redis_url=REDIS_URL, + redis_sentinels=get_sentinels_from_env( + REDIS_SENTINEL_HOSTS, REDIS_SENTINEL_PORT + ), + async_mode=True, + ) + + if app.state.redis is not None: + app.state.redis_task_command_listener = asyncio.create_task( + redis_task_command_listener(app) + ) + if THREAD_POOL_SIZE and THREAD_POOL_SIZE > 0: limiter = anyio.to_thread.current_default_thread_limiter() limiter.total_tokens = THREAD_POOL_SIZE @@ -499,6 +530,9 @@ async def lifespan(app: FastAPI): yield + if hasattr(app.state, "redis_task_command_listener"): + app.state.redis_task_command_listener.cancel() + app = FastAPI( title="Open WebUI", @@ -510,10 +544,12 @@ app = FastAPI( oauth_manager = OAuthManager(app) +app.state.instance_id = None app.state.config = AppConfig( redis_url=REDIS_URL, redis_sentinels=get_sentinels_from_env(REDIS_SENTINEL_HOSTS, REDIS_SENTINEL_PORT), ) +app.state.redis = None app.state.WEBUI_NAME = WEBUI_NAME app.state.LICENSE_METADATA = None @@ -644,6 +680,11 @@ app.state.config.LDAP_CA_CERT_FILE = LDAP_CA_CERT_FILE app.state.config.LDAP_VALIDATE_CERT = LDAP_VALIDATE_CERT app.state.config.LDAP_CIPHERS = LDAP_CIPHERS +# For LDAP Group Management +app.state.config.ENABLE_LDAP_GROUP_MANAGEMENT = ENABLE_LDAP_GROUP_MANAGEMENT +app.state.config.ENABLE_LDAP_GROUP_CREATION = ENABLE_LDAP_GROUP_CREATION +app.state.config.LDAP_ATTRIBUTE_FOR_GROUPS = LDAP_ATTRIBUTE_FOR_GROUPS + app.state.AUTH_TRUSTED_EMAIL_HEADER = WEBUI_AUTH_TRUSTED_EMAIL_HEADER app.state.AUTH_TRUSTED_NAME_HEADER = WEBUI_AUTH_TRUSTED_NAME_HEADER @@ -698,6 +739,9 @@ app.state.config.DOCLING_SERVER_URL = DOCLING_SERVER_URL app.state.config.DOCLING_OCR_ENGINE = DOCLING_OCR_ENGINE app.state.config.DOCLING_OCR_LANG = DOCLING_OCR_LANG app.state.config.DOCLING_DO_PICTURE_DESCRIPTION = DOCLING_DO_PICTURE_DESCRIPTION +app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE = DOCLING_PICTURE_DESCRIPTION_MODE +app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL = DOCLING_PICTURE_DESCRIPTION_LOCAL +app.state.config.DOCLING_PICTURE_DESCRIPTION_API = DOCLING_PICTURE_DESCRIPTION_API app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY @@ -773,6 +817,8 @@ app.state.config.BING_SEARCH_V7_ENDPOINT = BING_SEARCH_V7_ENDPOINT app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = BING_SEARCH_V7_SUBSCRIPTION_KEY app.state.config.EXA_API_KEY = EXA_API_KEY app.state.config.PERPLEXITY_API_KEY = PERPLEXITY_API_KEY +app.state.config.PERPLEXITY_MODEL = PERPLEXITY_MODEL +app.state.config.PERPLEXITY_SEARCH_CONTEXT_USAGE = PERPLEXITY_SEARCH_CONTEXT_USAGE app.state.config.SOUGOU_API_SID = SOUGOU_API_SID app.state.config.SOUGOU_API_SK = SOUGOU_API_SK app.state.config.EXTERNAL_WEB_SEARCH_URL = EXTERNAL_WEB_SEARCH_URL @@ -1203,6 +1249,37 @@ async def get_base_models(request: Request, user=Depends(get_admin_user)): return {"data": models} +################################## +# Embeddings +################################## + + +@app.post("/api/embeddings") +async def embeddings( + request: Request, form_data: dict, user=Depends(get_verified_user) +): + """ + OpenAI-compatible embeddings endpoint. + + This handler: + - Performs user/model checks and dispatches to the correct backend. + - Supports OpenAI, Ollama, arena models, pipelines, and any compatible provider. + + Args: + request (Request): Request context. + form_data (dict): OpenAI-like payload (e.g., {"model": "...", "input": [...]}) + user (UserModel): Authenticated user. + + Returns: + dict: OpenAI-compatible embeddings response. + """ + # Make sure models are loaded in app state + if not request.app.state.MODELS: + await get_all_models(request, user=user) + # Use generic dispatcher in utils.embeddings + return await generate_embeddings(request, form_data, user) + + @app.post("/api/chat/completions") async def chat_completion( request: Request, @@ -1344,26 +1421,30 @@ async def chat_action( @app.post("/api/tasks/stop/{task_id}") -async def stop_task_endpoint(task_id: str, user=Depends(get_verified_user)): +async def stop_task_endpoint( + request: Request, task_id: str, user=Depends(get_verified_user) +): try: - result = await stop_task(task_id) + result = await stop_task(request, task_id) return result except ValueError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) @app.get("/api/tasks") -async def list_tasks_endpoint(user=Depends(get_verified_user)): - return {"tasks": list_tasks()} +async def list_tasks_endpoint(request: Request, user=Depends(get_verified_user)): + return {"tasks": await list_tasks(request)} @app.get("/api/tasks/chat/{chat_id}") -async def list_tasks_by_chat_id_endpoint(chat_id: str, user=Depends(get_verified_user)): +async def list_tasks_by_chat_id_endpoint( + request: Request, chat_id: str, user=Depends(get_verified_user) +): chat = Chats.get_chat_by_id(chat_id) if chat is None or chat.user_id != user.id: return {"task_ids": []} - task_ids = list_task_ids_by_chat_id(chat_id) + task_ids = await list_task_ids_by_chat_id(request, chat_id) print(f"Task IDs for chat {chat_id}: {task_ids}") return {"task_ids": task_ids} @@ -1634,7 +1715,20 @@ async def healthcheck_with_db(): app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") -app.mount("/cache", StaticFiles(directory=CACHE_DIR), name="cache") + + +@app.get("/cache/{path:path}") +async def serve_cache_file( + path: str, + user=Depends(get_verified_user), +): + file_path = os.path.abspath(os.path.join(CACHE_DIR, path)) + # prevent path traversal + if not file_path.startswith(os.path.abspath(CACHE_DIR)): + raise HTTPException(status_code=404, detail="File not found") + if not os.path.isfile(file_path): + raise HTTPException(status_code=404, detail="File not found") + return FileResponse(file_path) def swagger_ui_html(*args, **kwargs): diff --git a/backend/open_webui/models/groups.py b/backend/open_webui/models/groups.py index df79284cfa..096041e40f 100644 --- a/backend/open_webui/models/groups.py +++ b/backend/open_webui/models/groups.py @@ -207,9 +207,39 @@ class GroupTable: except Exception: return False - def sync_user_groups_by_group_names( + def create_groups_by_group_names( self, user_id: str, group_names: list[str] - ) -> bool: + ) -> list[GroupModel]: + + # check for existing groups + existing_groups = self.get_groups() + existing_group_names = {group.name for group in existing_groups} + + new_groups = [] + + with get_db() as db: + for group_name in group_names: + if group_name not in existing_group_names: + new_group = GroupModel( + id=str(uuid.uuid4()), + user_id=user_id, + name=group_name, + description="", + created_at=int(time.time()), + updated_at=int(time.time()), + ) + try: + result = Group(**new_group.model_dump()) + db.add(result) + db.commit() + db.refresh(result) + new_groups.append(GroupModel.model_validate(result)) + except Exception as e: + log.exception(e) + continue + return new_groups + + def sync_groups_by_group_names(self, user_id: str, group_names: list[str]) -> bool: with get_db() as db: try: groups = db.query(Group).filter(Group.name.in_(group_names)).all() diff --git a/backend/open_webui/models/users.py b/backend/open_webui/models/users.py index a5dd9467bc..00d5040884 100644 --- a/backend/open_webui/models/users.py +++ b/backend/open_webui/models/users.py @@ -370,7 +370,7 @@ class UsersTable: except Exception: return False - def update_user_api_key_by_id(self, id: str, api_key: str) -> str: + def update_user_api_key_by_id(self, id: str, api_key: str) -> bool: try: with get_db() as db: result = db.query(User).filter_by(id=id).update({"api_key": api_key}) diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index 0c7daf9051..fd1f606761 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -2,6 +2,7 @@ import requests import logging import ftfy import sys +import json from langchain_community.document_loaders import ( AzureAIDocumentIntelligenceLoader, @@ -146,17 +147,32 @@ class DoclingLoader: ) } - params = { - "image_export_mode": "placeholder", - "table_mode": "accurate", - } + params = {"image_export_mode": "placeholder", "table_mode": "accurate"} if self.params: - if self.params.get("do_picture_classification"): - params["do_picture_classification"] = self.params.get( - "do_picture_classification" + if self.params.get("do_picture_description"): + params["do_picture_description"] = self.params.get( + "do_picture_description" ) + picture_description_mode = self.params.get( + "picture_description_mode", "" + ).lower() + + if picture_description_mode == "local" and self.params.get( + "picture_description_local", {} + ): + params["picture_description_local"] = self.params.get( + "picture_description_local", {} + ) + + elif picture_description_mode == "api" and self.params.get( + "picture_description_api", {} + ): + params["picture_description_api"] = self.params.get( + "picture_description_api", {} + ) + if self.params.get("ocr_engine") and self.params.get("ocr_lang"): params["ocr_engine"] = self.params.get("ocr_engine") params["ocr_lang"] = [ @@ -284,17 +300,20 @@ class Loader: if self._is_text_file(file_ext, file_content_type): loader = TextLoader(file_path, autodetect_encoding=True) else: + # Build params for DoclingLoader + params = self.kwargs.get("DOCLING_PARAMS", {}) + if not isinstance(params, dict): + try: + params = json.loads(params) + except json.JSONDecodeError: + log.error("Invalid DOCLING_PARAMS format, expected JSON object") + params = {} + loader = DoclingLoader( url=self.kwargs.get("DOCLING_SERVER_URL"), file_path=file_path, mime_type=file_content_type, - params={ - "ocr_engine": self.kwargs.get("DOCLING_OCR_ENGINE"), - "ocr_lang": self.kwargs.get("DOCLING_OCR_LANG"), - "do_picture_classification": self.kwargs.get( - "DOCLING_DO_PICTURE_DESCRIPTION" - ), - }, + params=params, ) elif ( self.engine == "document_intelligence" diff --git a/backend/open_webui/retrieval/loaders/youtube.py b/backend/open_webui/retrieval/loaders/youtube.py index d908cc8cb5..be5e533588 100644 --- a/backend/open_webui/retrieval/loaders/youtube.py +++ b/backend/open_webui/retrieval/loaders/youtube.py @@ -1,4 +1,5 @@ import logging +from xml.etree.ElementTree import ParseError from typing import Any, Dict, Generator, List, Optional, Sequence, Union from urllib.parse import parse_qs, urlparse @@ -93,7 +94,6 @@ class YoutubeLoader: "http": self.proxy_url, "https": self.proxy_url, } - # Don't log complete URL because it might contain secrets log.debug(f"Using proxy URL: {self.proxy_url[:14]}...") else: youtube_proxies = None @@ -110,11 +110,37 @@ class YoutubeLoader: for lang in self.language: try: transcript = transcript_list.find_transcript([lang]) + if transcript.is_generated: + log.debug(f"Found generated transcript for language '{lang}'") + try: + transcript = transcript_list.find_manually_created_transcript( + [lang] + ) + log.debug(f"Found manual transcript for language '{lang}'") + except NoTranscriptFound: + log.debug( + f"No manual transcript found for language '{lang}', using generated" + ) + pass + log.debug(f"Found transcript for language '{lang}'") - transcript_pieces: List[Dict[str, Any]] = transcript.fetch() + try: + transcript_pieces: List[Dict[str, Any]] = transcript.fetch() + except ParseError: + log.debug(f"Empty or invalid transcript for language '{lang}'") + continue + + if not transcript_pieces: + log.debug(f"Empty transcript for language '{lang}'") + continue + transcript_text = " ".join( map( - lambda transcript_piece: transcript_piece.text.strip(" "), + lambda transcript_piece: ( + transcript_piece.text.strip(" ") + if hasattr(transcript_piece, "text") + else "" + ), transcript_pieces, ) ) @@ -131,6 +157,4 @@ class YoutubeLoader: log.warning( f"No transcript found for any of the specified languages: {languages_tried}. Verify if the video has transcripts, add more languages if needed." ) - raise NoTranscriptFound( - f"No transcript found for any supported language. Verify if the video has transcripts, add more languages if needed." - ) + raise NoTranscriptFound(self.video_id, self.language, list(transcript_list)) diff --git a/backend/open_webui/retrieval/vector/dbs/pgvector.py b/backend/open_webui/retrieval/vector/dbs/pgvector.py index b6cb2a4e25..632937ef5b 100644 --- a/backend/open_webui/retrieval/vector/dbs/pgvector.py +++ b/backend/open_webui/retrieval/vector/dbs/pgvector.py @@ -1,12 +1,16 @@ from typing import Optional, List, Dict, Any import logging +import json from sqlalchemy import ( + func, + literal, cast, column, create_engine, Column, Integer, MetaData, + LargeBinary, select, text, Text, @@ -28,7 +32,12 @@ from open_webui.retrieval.vector.main import ( SearchResult, GetResult, ) -from open_webui.config import PGVECTOR_DB_URL, PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH +from open_webui.config import ( + PGVECTOR_DB_URL, + PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH, + PGVECTOR_PGCRYPTO, + PGVECTOR_PGCRYPTO_KEY, +) from open_webui.env import SRC_LOG_LEVELS @@ -39,14 +48,27 @@ log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) +def pgcrypto_encrypt(val, key): + return func.pgp_sym_encrypt(val, literal(key)) + + +def pgcrypto_decrypt(col, key, outtype="text"): + return func.cast(func.pgp_sym_decrypt(col, literal(key)), outtype) + + class DocumentChunk(Base): __tablename__ = "document_chunk" id = Column(Text, primary_key=True) vector = Column(Vector(dim=VECTOR_LENGTH), nullable=True) collection_name = Column(Text, nullable=False) - text = Column(Text, nullable=True) - vmetadata = Column(MutableDict.as_mutable(JSONB), nullable=True) + + if PGVECTOR_PGCRYPTO: + text = Column(LargeBinary, nullable=True) + vmetadata = Column(LargeBinary, nullable=True) + else: + text = Column(Text, nullable=True) + vmetadata = Column(MutableDict.as_mutable(JSONB), nullable=True) class PgvectorClient(VectorDBBase): @@ -70,6 +92,15 @@ class PgvectorClient(VectorDBBase): # Ensure the pgvector extension is available self.session.execute(text("CREATE EXTENSION IF NOT EXISTS vector;")) + if PGVECTOR_PGCRYPTO: + # Ensure the pgcrypto extension is available for encryption + self.session.execute(text("CREATE EXTENSION IF NOT EXISTS pgcrypto;")) + + if not PGVECTOR_PGCRYPTO_KEY: + raise ValueError( + "PGVECTOR_PGCRYPTO_KEY must be set when PGVECTOR_PGCRYPTO is enabled." + ) + # Check vector length consistency self.check_vector_length() @@ -147,44 +178,39 @@ class PgvectorClient(VectorDBBase): def insert(self, collection_name: str, items: List[VectorItem]) -> None: try: - new_items = [] - for item in items: - vector = self.adjust_vector_length(item["vector"]) - new_chunk = DocumentChunk( - id=item["id"], - vector=vector, - collection_name=collection_name, - text=item["text"], - vmetadata=item["metadata"], - ) - new_items.append(new_chunk) - self.session.bulk_save_objects(new_items) - self.session.commit() - log.info( - f"Inserted {len(new_items)} items into collection '{collection_name}'." - ) - except Exception as e: - self.session.rollback() - log.exception(f"Error during insert: {e}") - raise - - def upsert(self, collection_name: str, items: List[VectorItem]) -> None: - try: - for item in items: - vector = self.adjust_vector_length(item["vector"]) - existing = ( - self.session.query(DocumentChunk) - .filter(DocumentChunk.id == item["id"]) - .first() - ) - if existing: - existing.vector = vector - existing.text = item["text"] - existing.vmetadata = item["metadata"] - existing.collection_name = ( - collection_name # Update collection_name if necessary + if PGVECTOR_PGCRYPTO: + for item in items: + vector = self.adjust_vector_length(item["vector"]) + # Use raw SQL for BYTEA/pgcrypto + self.session.execute( + text( + """ + INSERT INTO document_chunk + (id, vector, collection_name, text, vmetadata) + VALUES ( + :id, :vector, :collection_name, + pgp_sym_encrypt(:text, :key), + pgp_sym_encrypt(:metadata::text, :key) + ) + ON CONFLICT (id) DO NOTHING + """ + ), + { + "id": item["id"], + "vector": vector, + "collection_name": collection_name, + "text": item["text"], + "metadata": json.dumps(item["metadata"]), + "key": PGVECTOR_PGCRYPTO_KEY, + }, ) - else: + self.session.commit() + log.info(f"Encrypted & inserted {len(items)} into '{collection_name}'") + + else: + new_items = [] + for item in items: + vector = self.adjust_vector_length(item["vector"]) new_chunk = DocumentChunk( id=item["id"], vector=vector, @@ -192,11 +218,78 @@ class PgvectorClient(VectorDBBase): text=item["text"], vmetadata=item["metadata"], ) - self.session.add(new_chunk) - self.session.commit() - log.info( - f"Upserted {len(items)} items into collection '{collection_name}'." - ) + new_items.append(new_chunk) + self.session.bulk_save_objects(new_items) + self.session.commit() + log.info( + f"Inserted {len(new_items)} items into collection '{collection_name}'." + ) + except Exception as e: + self.session.rollback() + log.exception(f"Error during insert: {e}") + raise + + def upsert(self, collection_name: str, items: List[VectorItem]) -> None: + try: + if PGVECTOR_PGCRYPTO: + for item in items: + vector = self.adjust_vector_length(item["vector"]) + self.session.execute( + text( + """ + INSERT INTO document_chunk + (id, vector, collection_name, text, vmetadata) + VALUES ( + :id, :vector, :collection_name, + pgp_sym_encrypt(:text, :key), + pgp_sym_encrypt(:metadata::text, :key) + ) + ON CONFLICT (id) DO UPDATE SET + vector = EXCLUDED.vector, + collection_name = EXCLUDED.collection_name, + text = EXCLUDED.text, + vmetadata = EXCLUDED.vmetadata + """ + ), + { + "id": item["id"], + "vector": vector, + "collection_name": collection_name, + "text": item["text"], + "metadata": json.dumps(item["metadata"]), + "key": PGVECTOR_PGCRYPTO_KEY, + }, + ) + self.session.commit() + log.info(f"Encrypted & upserted {len(items)} into '{collection_name}'") + else: + for item in items: + vector = self.adjust_vector_length(item["vector"]) + existing = ( + self.session.query(DocumentChunk) + .filter(DocumentChunk.id == item["id"]) + .first() + ) + if existing: + existing.vector = vector + existing.text = item["text"] + existing.vmetadata = item["metadata"] + existing.collection_name = ( + collection_name # Update collection_name if necessary + ) + else: + new_chunk = DocumentChunk( + id=item["id"], + vector=vector, + collection_name=collection_name, + text=item["text"], + vmetadata=item["metadata"], + ) + self.session.add(new_chunk) + self.session.commit() + log.info( + f"Upserted {len(items)} items into collection '{collection_name}'." + ) except Exception as e: self.session.rollback() log.exception(f"Error during upsert: {e}") @@ -230,16 +323,32 @@ class PgvectorClient(VectorDBBase): .alias("query_vectors") ) + result_fields = [ + DocumentChunk.id, + ] + if PGVECTOR_PGCRYPTO: + result_fields.append( + pgcrypto_decrypt( + DocumentChunk.text, PGVECTOR_PGCRYPTO_KEY, Text + ).label("text") + ) + result_fields.append( + pgcrypto_decrypt( + DocumentChunk.vmetadata, PGVECTOR_PGCRYPTO_KEY, JSONB + ).label("vmetadata") + ) + else: + result_fields.append(DocumentChunk.text) + result_fields.append(DocumentChunk.vmetadata) + result_fields.append( + (DocumentChunk.vector.cosine_distance(query_vectors.c.q_vector)).label( + "distance" + ) + ) + # Build the lateral subquery for each query vector subq = ( - select( - DocumentChunk.id, - DocumentChunk.text, - DocumentChunk.vmetadata, - ( - DocumentChunk.vector.cosine_distance(query_vectors.c.q_vector) - ).label("distance"), - ) + select(*result_fields) .where(DocumentChunk.collection_name == collection_name) .order_by( (DocumentChunk.vector.cosine_distance(query_vectors.c.q_vector)) @@ -299,17 +408,43 @@ class PgvectorClient(VectorDBBase): self, collection_name: str, filter: Dict[str, Any], limit: Optional[int] = None ) -> Optional[GetResult]: try: - query = self.session.query(DocumentChunk).filter( - DocumentChunk.collection_name == collection_name - ) + if PGVECTOR_PGCRYPTO: + # Build where clause for vmetadata filter + where_clauses = [DocumentChunk.collection_name == collection_name] + for key, value in filter.items(): + # decrypt then check key: JSON filter after decryption + where_clauses.append( + pgcrypto_decrypt( + DocumentChunk.vmetadata, PGVECTOR_PGCRYPTO_KEY, JSONB + )[key].astext + == str(value) + ) + stmt = select( + DocumentChunk.id, + pgcrypto_decrypt( + DocumentChunk.text, PGVECTOR_PGCRYPTO_KEY, Text + ).label("text"), + pgcrypto_decrypt( + DocumentChunk.vmetadata, PGVECTOR_PGCRYPTO_KEY, JSONB + ).label("vmetadata"), + ).where(*where_clauses) + if limit is not None: + stmt = stmt.limit(limit) + results = self.session.execute(stmt).all() + else: + query = self.session.query(DocumentChunk).filter( + DocumentChunk.collection_name == collection_name + ) - for key, value in filter.items(): - query = query.filter(DocumentChunk.vmetadata[key].astext == str(value)) + for key, value in filter.items(): + query = query.filter( + DocumentChunk.vmetadata[key].astext == str(value) + ) - if limit is not None: - query = query.limit(limit) + if limit is not None: + query = query.limit(limit) - results = query.all() + results = query.all() if not results: return None @@ -331,20 +466,38 @@ class PgvectorClient(VectorDBBase): self, collection_name: str, limit: Optional[int] = None ) -> Optional[GetResult]: try: - query = self.session.query(DocumentChunk).filter( - DocumentChunk.collection_name == collection_name - ) - if limit is not None: - query = query.limit(limit) + if PGVECTOR_PGCRYPTO: + stmt = select( + DocumentChunk.id, + pgcrypto_decrypt( + DocumentChunk.text, PGVECTOR_PGCRYPTO_KEY, Text + ).label("text"), + pgcrypto_decrypt( + DocumentChunk.vmetadata, PGVECTOR_PGCRYPTO_KEY, JSONB + ).label("vmetadata"), + ).where(DocumentChunk.collection_name == collection_name) + if limit is not None: + stmt = stmt.limit(limit) + results = self.session.execute(stmt).all() + ids = [[row.id for row in results]] + documents = [[row.text for row in results]] + metadatas = [[row.vmetadata for row in results]] + else: - results = query.all() + query = self.session.query(DocumentChunk).filter( + DocumentChunk.collection_name == collection_name + ) + if limit is not None: + query = query.limit(limit) - if not results: - return None + results = query.all() - ids = [[result.id for result in results]] - documents = [[result.text for result in results]] - metadatas = [[result.vmetadata for result in results]] + if not results: + return None + + ids = [[result.id for result in results]] + documents = [[result.text for result in results]] + metadatas = [[result.vmetadata for result in results]] return GetResult(ids=ids, documents=documents, metadatas=metadatas) except Exception as e: @@ -358,17 +511,33 @@ class PgvectorClient(VectorDBBase): filter: Optional[Dict[str, Any]] = None, ) -> None: try: - query = self.session.query(DocumentChunk).filter( - DocumentChunk.collection_name == collection_name - ) - if ids: - query = query.filter(DocumentChunk.id.in_(ids)) - if filter: - for key, value in filter.items(): - query = query.filter( - DocumentChunk.vmetadata[key].astext == str(value) - ) - deleted = query.delete(synchronize_session=False) + if PGVECTOR_PGCRYPTO: + wheres = [DocumentChunk.collection_name == collection_name] + if ids: + wheres.append(DocumentChunk.id.in_(ids)) + if filter: + for key, value in filter.items(): + wheres.append( + pgcrypto_decrypt( + DocumentChunk.vmetadata, PGVECTOR_PGCRYPTO_KEY, JSONB + )[key].astext + == str(value) + ) + stmt = DocumentChunk.__table__.delete().where(*wheres) + result = self.session.execute(stmt) + deleted = result.rowcount + else: + query = self.session.query(DocumentChunk).filter( + DocumentChunk.collection_name == collection_name + ) + if ids: + query = query.filter(DocumentChunk.id.in_(ids)) + if filter: + for key, value in filter.items(): + query = query.filter( + DocumentChunk.vmetadata[key].astext == str(value) + ) + deleted = query.delete(synchronize_session=False) self.session.commit() log.info(f"Deleted {deleted} items from collection '{collection_name}'.") except Exception as e: diff --git a/backend/open_webui/retrieval/web/perplexity.py b/backend/open_webui/retrieval/web/perplexity.py index e5314eb1f7..4e046668fa 100644 --- a/backend/open_webui/retrieval/web/perplexity.py +++ b/backend/open_webui/retrieval/web/perplexity.py @@ -1,10 +1,20 @@ import logging -from typing import Optional, List +from typing import Optional, Literal import requests from open_webui.retrieval.web.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS +MODELS = Literal[ + "sonar", + "sonar-pro", + "sonar-reasoning", + "sonar-reasoning-pro", + "sonar-deep-research", +] +SEARCH_CONTEXT_USAGE_LEVELS = Literal["low", "medium", "high"] + + log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -14,6 +24,8 @@ def search_perplexity( query: str, count: int, filter_list: Optional[list[str]] = None, + model: MODELS = "sonar", + search_context_usage: SEARCH_CONTEXT_USAGE_LEVELS = "medium", ) -> list[SearchResult]: """Search using Perplexity API and return the results as a list of SearchResult objects. @@ -21,6 +33,9 @@ def search_perplexity( api_key (str): A Perplexity API key query (str): The query to search for count (int): Maximum number of results to return + filter_list (Optional[list[str]]): List of domains to filter results + model (str): The Perplexity model to use (sonar, sonar-pro) + search_context_usage (str): Search context usage level (low, medium, high) """ @@ -33,7 +48,7 @@ def search_perplexity( # Create payload for the API call payload = { - "model": "sonar", + "model": model, "messages": [ { "role": "system", @@ -43,6 +58,9 @@ def search_perplexity( ], "temperature": 0.2, # Lower temperature for more factual responses "stream": False, + "web_search_options": { + "search_context_usage": search_context_usage, + }, } headers = { diff --git a/backend/open_webui/routers/auths.py b/backend/open_webui/routers/auths.py index 06e506228a..60a12db4b3 100644 --- a/backend/open_webui/routers/auths.py +++ b/backend/open_webui/routers/auths.py @@ -55,9 +55,8 @@ from typing import Optional, List from ssl import CERT_NONE, CERT_REQUIRED, PROTOCOL_TLS -if ENABLE_LDAP.value: - from ldap3 import Server, Connection, NONE, Tls - from ldap3.utils.conv import escape_filter_chars +from ldap3 import Server, Connection, NONE, Tls +from ldap3.utils.conv import escape_filter_chars router = APIRouter() @@ -229,14 +228,30 @@ async def ldap_auth(request: Request, response: Response, form_data: LdapForm): if not connection_app.bind(): raise HTTPException(400, detail="Application account bind failed") + ENABLE_LDAP_GROUP_MANAGEMENT = ( + request.app.state.config.ENABLE_LDAP_GROUP_MANAGEMENT + ) + ENABLE_LDAP_GROUP_CREATION = request.app.state.config.ENABLE_LDAP_GROUP_CREATION + LDAP_ATTRIBUTE_FOR_GROUPS = request.app.state.config.LDAP_ATTRIBUTE_FOR_GROUPS + + search_attributes = [ + f"{LDAP_ATTRIBUTE_FOR_USERNAME}", + f"{LDAP_ATTRIBUTE_FOR_MAIL}", + "cn", + ] + + if ENABLE_LDAP_GROUP_MANAGEMENT: + search_attributes.append(f"{LDAP_ATTRIBUTE_FOR_GROUPS}") + log.info( + f"LDAP Group Management enabled. Adding {LDAP_ATTRIBUTE_FOR_GROUPS} to search attributes" + ) + + log.info(f"LDAP search attributes: {search_attributes}") + search_success = connection_app.search( search_base=LDAP_SEARCH_BASE, search_filter=f"(&({LDAP_ATTRIBUTE_FOR_USERNAME}={escape_filter_chars(form_data.user.lower())}){LDAP_SEARCH_FILTERS})", - attributes=[ - f"{LDAP_ATTRIBUTE_FOR_USERNAME}", - f"{LDAP_ATTRIBUTE_FOR_MAIL}", - "cn", - ], + attributes=search_attributes, ) if not search_success or not connection_app.entries: @@ -259,6 +274,69 @@ async def ldap_auth(request: Request, response: Response, form_data: LdapForm): cn = str(entry["cn"]) user_dn = entry.entry_dn + user_groups = [] + if ENABLE_LDAP_GROUP_MANAGEMENT and LDAP_ATTRIBUTE_FOR_GROUPS in entry: + group_dns = entry[LDAP_ATTRIBUTE_FOR_GROUPS] + log.info(f"LDAP raw group DNs for user {username}: {group_dns}") + + if group_dns: + log.info(f"LDAP group_dns original: {group_dns}") + log.info(f"LDAP group_dns type: {type(group_dns)}") + log.info(f"LDAP group_dns length: {len(group_dns)}") + + if hasattr(group_dns, "value"): + group_dns = group_dns.value + log.info(f"Extracted .value property: {group_dns}") + elif hasattr(group_dns, "__iter__") and not isinstance( + group_dns, (str, bytes) + ): + group_dns = list(group_dns) + log.info(f"Converted to list: {group_dns}") + + if isinstance(group_dns, list): + group_dns = [str(item) for item in group_dns] + else: + group_dns = [str(group_dns)] + + log.info( + f"LDAP group_dns after processing - type: {type(group_dns)}, length: {len(group_dns)}" + ) + + for group_idx, group_dn in enumerate(group_dns): + group_dn = str(group_dn) + log.info(f"Processing group DN #{group_idx + 1}: {group_dn}") + + try: + group_cn = None + + for item in group_dn.split(","): + item = item.strip() + if item.upper().startswith("CN="): + group_cn = item[3:] + break + + if group_cn: + user_groups.append(group_cn) + + else: + log.warning( + f"Could not extract CN from group DN: {group_dn}" + ) + except Exception as e: + log.warning( + f"Failed to extract group name from DN {group_dn}: {e}" + ) + + log.info( + f"LDAP groups for user {username}: {user_groups} (total: {len(user_groups)})" + ) + else: + log.info(f"No groups found for user {username}") + elif ENABLE_LDAP_GROUP_MANAGEMENT: + log.warning( + f"LDAP Group Management enabled but {LDAP_ATTRIBUTE_FOR_GROUPS} attribute not found in user entry" + ) + if username == form_data.user.lower(): connection_user = Connection( server, @@ -334,6 +412,22 @@ async def ldap_auth(request: Request, response: Response, form_data: LdapForm): user.id, request.app.state.config.USER_PERMISSIONS ) + if ( + user.role != "admin" + and ENABLE_LDAP_GROUP_MANAGEMENT + and user_groups + ): + if ENABLE_LDAP_GROUP_CREATION: + Groups.create_groups_by_group_names(user.id, user_groups) + + try: + Groups.sync_groups_by_group_names(user.id, user_groups) + log.info( + f"Successfully synced groups for user {user.id}: {user_groups}" + ) + except Exception as e: + log.error(f"Failed to sync groups for user {user.id}: {e}") + return { "token": token, "token_type": "Bearer", @@ -386,7 +480,7 @@ async def signin(request: Request, response: Response, form_data: SigninForm): group_names = [name.strip() for name in group_names if name.strip()] if group_names: - Groups.sync_user_groups_by_group_names(user.id, group_names) + Groups.sync_groups_by_group_names(user.id, group_names) elif WEBUI_AUTH == False: admin_email = "admin@localhost" diff --git a/backend/open_webui/routers/images.py b/backend/open_webui/routers/images.py index c6d8e41864..52686a5841 100644 --- a/backend/open_webui/routers/images.py +++ b/backend/open_webui/routers/images.py @@ -420,7 +420,7 @@ def load_b64_image_data(b64_str): try: if "," in b64_str: header, encoded = b64_str.split(",", 1) - mime_type = header.split(";")[0] + mime_type = header.split(";")[0].lstrip("data:") img_data = base64.b64decode(encoded) else: mime_type = "image/png" @@ -428,7 +428,7 @@ def load_b64_image_data(b64_str): return img_data, mime_type except Exception as e: log.exception(f"Error loading image data: {e}") - return None + return None, None def load_url_image_data(url, headers=None): diff --git a/backend/open_webui/routers/notes.py b/backend/open_webui/routers/notes.py index 09bcb97e83..94f8325d70 100644 --- a/backend/open_webui/routers/notes.py +++ b/backend/open_webui/routers/notes.py @@ -124,8 +124,9 @@ async def get_note_by_id(request: Request, id: str, user=Depends(get_verified_us status_code=status.HTTP_404_NOT_FOUND, detail=ERROR_MESSAGES.NOT_FOUND ) - if (user.role != "admin" and user.id != note.user_id) or ( - not has_access(user.id, type="read", access_control=note.access_control) + if user.role != "admin" or ( + user.id != note.user_id + and not has_access(user.id, type="read", access_control=note.access_control) ): raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail=ERROR_MESSAGES.DEFAULT() @@ -157,8 +158,9 @@ async def update_note_by_id( status_code=status.HTTP_404_NOT_FOUND, detail=ERROR_MESSAGES.NOT_FOUND ) - if (user.role != "admin" and user.id != note.user_id) or ( - not has_access(user.id, type="write", access_control=note.access_control) + if user.role != "admin" or ( + user.id != note.user_id + and not has_access(user.id, type="write", access_control=note.access_control) ): raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail=ERROR_MESSAGES.DEFAULT() @@ -195,8 +197,9 @@ async def delete_note_by_id(request: Request, id: str, user=Depends(get_verified status_code=status.HTTP_404_NOT_FOUND, detail=ERROR_MESSAGES.NOT_FOUND ) - if (user.role != "admin" and user.id != note.user_id) or ( - not has_access(user.id, type="write", access_control=note.access_control) + if user.role != "admin" or ( + user.id != note.user_id + and not has_access(user.id, type="write", access_control=note.access_control) ): raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail=ERROR_MESSAGES.DEFAULT() diff --git a/backend/open_webui/routers/ollama.py b/backend/open_webui/routers/ollama.py index 95f48fb1c8..ea46a1cca7 100644 --- a/backend/open_webui/routers/ollama.py +++ b/backend/open_webui/routers/ollama.py @@ -1232,6 +1232,9 @@ class GenerateChatCompletionForm(BaseModel): stream: Optional[bool] = True keep_alive: Optional[Union[int, str]] = None tools: Optional[list[dict]] = None + model_config = ConfigDict( + extra="allow", + ) async def get_ollama_url(request: Request, model: str, url_idx: Optional[int] = None): @@ -1269,7 +1272,9 @@ async def generate_chat_completion( detail=str(e), ) - payload = {**form_data.model_dump(exclude_none=True)} + if isinstance(form_data, BaseModel): + payload = {**form_data.model_dump(exclude_none=True)} + if "metadata" in payload: del payload["metadata"] @@ -1285,11 +1290,7 @@ async def generate_chat_completion( if params: system = params.pop("system", None) - # Unlike OpenAI, Ollama does not support params directly in the body - payload["options"] = apply_model_params_to_body_ollama( - params, (payload.get("options", {}) or {}) - ) - + payload = apply_model_params_to_body_ollama(params, payload) payload = apply_model_system_prompt_to_body(system, payload, metadata, user) # Check if user has access to the model @@ -1323,7 +1324,7 @@ async def generate_chat_completion( prefix_id = api_config.get("prefix_id", None) if prefix_id: payload["model"] = payload["model"].replace(f"{prefix_id}.", "") - # payload["keep_alive"] = -1 # keep alive forever + return await send_post_request( url=f"{url}/api/chat", payload=json.dumps(payload), diff --git a/backend/open_webui/routers/openai.py b/backend/open_webui/routers/openai.py index 9c3c393677..7649271fee 100644 --- a/backend/open_webui/routers/openai.py +++ b/backend/open_webui/routers/openai.py @@ -887,6 +887,88 @@ async def generate_chat_completion( await session.close() +async def embeddings(request: Request, form_data: dict, user): + """ + Calls the embeddings endpoint for OpenAI-compatible providers. + + Args: + request (Request): The FastAPI request context. + form_data (dict): OpenAI-compatible embeddings payload. + user (UserModel): The authenticated user. + + Returns: + dict: OpenAI-compatible embeddings response. + """ + idx = 0 + # Prepare payload/body + body = json.dumps(form_data) + # Find correct backend url/key based on model + await get_all_models(request, user=user) + model_id = form_data.get("model") + models = request.app.state.OPENAI_MODELS + if model_id in models: + idx = models[model_id]["urlIdx"] + url = request.app.state.config.OPENAI_API_BASE_URLS[idx] + key = request.app.state.config.OPENAI_API_KEYS[idx] + r = None + session = None + streaming = False + try: + session = aiohttp.ClientSession(trust_env=True) + r = await session.request( + method="POST", + url=f"{url}/embeddings", + data=body, + headers={ + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS and user + else {} + ), + }, + ) + r.raise_for_status() + if "text/event-stream" in r.headers.get("Content-Type", ""): + streaming = True + return StreamingResponse( + r.content, + status_code=r.status, + headers=dict(r.headers), + background=BackgroundTask( + cleanup_response, response=r, session=session + ), + ) + else: + response_data = await r.json() + return response_data + except Exception as e: + log.exception(e) + detail = None + if r is not None: + try: + res = await r.json() + if "error" in res: + detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}" + except Exception: + detail = f"External: {e}" + raise HTTPException( + status_code=r.status if r else 500, + detail=detail if detail else "Open WebUI: Server Connection Error", + ) + finally: + if not streaming and session: + if r: + r.close() + await session.close() + + @router.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"]) async def proxy(path: str, request: Request, user=Depends(get_verified_user)): """ diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 343b0513c9..2bd73c25e3 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -414,6 +414,9 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE, "DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG, "DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, + "DOCLING_PICTURE_DESCRIPTION_MODE": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE, + "DOCLING_PICTURE_DESCRIPTION_LOCAL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL, + "DOCLING_PICTURE_DESCRIPTION_API": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API, "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, @@ -467,6 +470,8 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "BING_SEARCH_V7_SUBSCRIPTION_KEY": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, "EXA_API_KEY": request.app.state.config.EXA_API_KEY, "PERPLEXITY_API_KEY": request.app.state.config.PERPLEXITY_API_KEY, + "PERPLEXITY_MODEL": request.app.state.config.PERPLEXITY_MODEL, + "PERPLEXITY_SEARCH_CONTEXT_USAGE": request.app.state.config.PERPLEXITY_SEARCH_CONTEXT_USAGE, "SOUGOU_API_SID": request.app.state.config.SOUGOU_API_SID, "SOUGOU_API_SK": request.app.state.config.SOUGOU_API_SK, "WEB_LOADER_ENGINE": request.app.state.config.WEB_LOADER_ENGINE, @@ -520,6 +525,8 @@ class WebConfig(BaseModel): BING_SEARCH_V7_SUBSCRIPTION_KEY: Optional[str] = None EXA_API_KEY: Optional[str] = None PERPLEXITY_API_KEY: Optional[str] = None + PERPLEXITY_MODEL: Optional[str] = None + PERPLEXITY_SEARCH_CONTEXT_USAGE: Optional[str] = None SOUGOU_API_SID: Optional[str] = None SOUGOU_API_SK: Optional[str] = None WEB_LOADER_ENGINE: Optional[str] = None @@ -571,6 +578,9 @@ class ConfigForm(BaseModel): DOCLING_OCR_ENGINE: Optional[str] = None DOCLING_OCR_LANG: Optional[str] = None DOCLING_DO_PICTURE_DESCRIPTION: Optional[bool] = None + DOCLING_PICTURE_DESCRIPTION_MODE: Optional[str] = None + DOCLING_PICTURE_DESCRIPTION_LOCAL: Optional[dict] = None + DOCLING_PICTURE_DESCRIPTION_API: Optional[dict] = None DOCUMENT_INTELLIGENCE_ENDPOINT: Optional[str] = None DOCUMENT_INTELLIGENCE_KEY: Optional[str] = None MISTRAL_OCR_API_KEY: Optional[str] = None @@ -744,6 +754,22 @@ async def update_rag_config( else request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION ) + request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE = ( + form_data.DOCLING_PICTURE_DESCRIPTION_MODE + if form_data.DOCLING_PICTURE_DESCRIPTION_MODE is not None + else request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE + ) + request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL = ( + form_data.DOCLING_PICTURE_DESCRIPTION_LOCAL + if form_data.DOCLING_PICTURE_DESCRIPTION_LOCAL is not None + else request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL + ) + request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API = ( + form_data.DOCLING_PICTURE_DESCRIPTION_API + if form_data.DOCLING_PICTURE_DESCRIPTION_API is not None + else request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API + ) + request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = ( form_data.DOCUMENT_INTELLIGENCE_ENDPOINT if form_data.DOCUMENT_INTELLIGENCE_ENDPOINT is not None @@ -907,6 +933,10 @@ async def update_rag_config( ) request.app.state.config.EXA_API_KEY = form_data.web.EXA_API_KEY request.app.state.config.PERPLEXITY_API_KEY = form_data.web.PERPLEXITY_API_KEY + request.app.state.config.PERPLEXITY_MODEL = form_data.web.PERPLEXITY_MODEL + request.app.state.config.PERPLEXITY_SEARCH_CONTEXT_USAGE = ( + form_data.web.PERPLEXITY_SEARCH_CONTEXT_USAGE + ) request.app.state.config.SOUGOU_API_SID = form_data.web.SOUGOU_API_SID request.app.state.config.SOUGOU_API_SK = form_data.web.SOUGOU_API_SK @@ -977,6 +1007,9 @@ async def update_rag_config( "DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE, "DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG, "DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, + "DOCLING_PICTURE_DESCRIPTION_MODE": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE, + "DOCLING_PICTURE_DESCRIPTION_LOCAL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL, + "DOCLING_PICTURE_DESCRIPTION_API": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API, "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, @@ -1030,6 +1063,8 @@ async def update_rag_config( "BING_SEARCH_V7_SUBSCRIPTION_KEY": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, "EXA_API_KEY": request.app.state.config.EXA_API_KEY, "PERPLEXITY_API_KEY": request.app.state.config.PERPLEXITY_API_KEY, + "PERPLEXITY_MODEL": request.app.state.config.PERPLEXITY_MODEL, + "PERPLEXITY_SEARCH_CONTEXT_USAGE": request.app.state.config.PERPLEXITY_SEARCH_CONTEXT_USAGE, "SOUGOU_API_SID": request.app.state.config.SOUGOU_API_SID, "SOUGOU_API_SK": request.app.state.config.SOUGOU_API_SK, "WEB_LOADER_ENGINE": request.app.state.config.WEB_LOADER_ENGINE, @@ -1321,9 +1356,14 @@ def process_file( EXTERNAL_DOCUMENT_LOADER_API_KEY=request.app.state.config.EXTERNAL_DOCUMENT_LOADER_API_KEY, TIKA_SERVER_URL=request.app.state.config.TIKA_SERVER_URL, DOCLING_SERVER_URL=request.app.state.config.DOCLING_SERVER_URL, - DOCLING_OCR_ENGINE=request.app.state.config.DOCLING_OCR_ENGINE, - DOCLING_OCR_LANG=request.app.state.config.DOCLING_OCR_LANG, - DOCLING_DO_PICTURE_DESCRIPTION=request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, + DOCLING_PARAMS={ + "ocr_engine": request.app.state.config.DOCLING_OCR_ENGINE, + "ocr_lang": request.app.state.config.DOCLING_OCR_LANG, + "do_picture_description": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION, + "picture_description_mode": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE, + "picture_description_local": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL, + "picture_description_api": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API, + }, PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES, DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, DOCUMENT_INTELLIGENCE_KEY=request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, @@ -1740,19 +1780,14 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: request.app.state.config.WEB_SEARCH_RESULT_COUNT, request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) - elif engine == "exa": - return search_exa( - request.app.state.config.EXA_API_KEY, - query, - request.app.state.config.WEB_SEARCH_RESULT_COUNT, - request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, - ) elif engine == "perplexity": return search_perplexity( request.app.state.config.PERPLEXITY_API_KEY, query, request.app.state.config.WEB_SEARCH_RESULT_COUNT, request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, + model=request.app.state.config.PERPLEXITY_MODEL, + search_context_usage=request.app.state.config.PERPLEXITY_SEARCH_CONTEXT_USAGE, ) elif engine == "sougou": if ( diff --git a/backend/open_webui/routers/utils.py b/backend/open_webui/routers/utils.py index b64adafb44..0e6768a671 100644 --- a/backend/open_webui/routers/utils.py +++ b/backend/open_webui/routers/utils.py @@ -33,7 +33,7 @@ class CodeForm(BaseModel): @router.post("/code/format") -async def format_code(form_data: CodeForm, user=Depends(get_verified_user)): +async def format_code(form_data: CodeForm, user=Depends(get_admin_user)): try: formatted_code = black.format_str(form_data.code, mode=black.Mode()) return {"code": formatted_code} diff --git a/backend/open_webui/tasks.py b/backend/open_webui/tasks.py index e575e6885c..2d3955f0a2 100644 --- a/backend/open_webui/tasks.py +++ b/backend/open_webui/tasks.py @@ -2,16 +2,87 @@ import asyncio from typing import Dict from uuid import uuid4 +import json +from redis.asyncio import Redis +from fastapi import Request +from typing import Dict, List, Optional # A dictionary to keep track of active tasks tasks: Dict[str, asyncio.Task] = {} chat_tasks = {} -def cleanup_task(task_id: str, id=None): +REDIS_TASKS_KEY = "open-webui:tasks" +REDIS_CHAT_TASKS_KEY = "open-webui:tasks:chat" +REDIS_PUBSUB_CHANNEL = "open-webui:tasks:commands" + + +def is_redis(request: Request) -> bool: + # Called everywhere a request is available to check Redis + return hasattr(request.app.state, "redis") and (request.app.state.redis is not None) + + +async def redis_task_command_listener(app): + redis: Redis = app.state.redis + pubsub = redis.pubsub() + await pubsub.subscribe(REDIS_PUBSUB_CHANNEL) + + async for message in pubsub.listen(): + if message["type"] != "message": + continue + try: + command = json.loads(message["data"]) + if command.get("action") == "stop": + task_id = command.get("task_id") + local_task = tasks.get(task_id) + if local_task: + local_task.cancel() + except Exception as e: + print(f"Error handling distributed task command: {e}") + + +### ------------------------------ +### REDIS-ENABLED HANDLERS +### ------------------------------ + + +async def redis_save_task(redis: Redis, task_id: str, chat_id: Optional[str]): + pipe = redis.pipeline() + pipe.hset(REDIS_TASKS_KEY, task_id, chat_id or "") + if chat_id: + pipe.sadd(f"{REDIS_CHAT_TASKS_KEY}:{chat_id}", task_id) + await pipe.execute() + + +async def redis_cleanup_task(redis: Redis, task_id: str, chat_id: Optional[str]): + pipe = redis.pipeline() + pipe.hdel(REDIS_TASKS_KEY, task_id) + if chat_id: + pipe.srem(f"{REDIS_CHAT_TASKS_KEY}:{chat_id}", task_id) + if (await pipe.scard(f"{REDIS_CHAT_TASKS_KEY}:{chat_id}").execute())[-1] == 0: + pipe.delete(f"{REDIS_CHAT_TASKS_KEY}:{chat_id}") # Remove if empty set + await pipe.execute() + + +async def redis_list_tasks(redis: Redis) -> List[str]: + return list(await redis.hkeys(REDIS_TASKS_KEY)) + + +async def redis_list_chat_tasks(redis: Redis, chat_id: str) -> List[str]: + return list(await redis.smembers(f"{REDIS_CHAT_TASKS_KEY}:{chat_id}")) + + +async def redis_send_command(redis: Redis, command: dict): + await redis.publish(REDIS_PUBSUB_CHANNEL, json.dumps(command)) + + +async def cleanup_task(request, task_id: str, id=None): """ Remove a completed or canceled task from the global `tasks` dictionary. """ + if is_redis(request): + await redis_cleanup_task(request.app.state.redis, task_id, id) + tasks.pop(task_id, None) # Remove the task if it exists # If an ID is provided, remove the task from the chat_tasks dictionary @@ -21,7 +92,7 @@ def cleanup_task(task_id: str, id=None): chat_tasks.pop(id, None) -def create_task(coroutine, id=None): +async def create_task(request, coroutine, id=None): """ Create a new asyncio task and add it to the global task dictionary. """ @@ -29,7 +100,9 @@ def create_task(coroutine, id=None): task = asyncio.create_task(coroutine) # Create the task # Add a done callback for cleanup - task.add_done_callback(lambda t: cleanup_task(task_id, id)) + task.add_done_callback( + lambda t: asyncio.create_task(cleanup_task(request, task_id, id)) + ) tasks[task_id] = task # If an ID is provided, associate the task with that ID @@ -38,34 +111,46 @@ def create_task(coroutine, id=None): else: chat_tasks[id] = [task_id] + if is_redis(request): + await redis_save_task(request.app.state.redis, task_id, id) + return task_id, task -def get_task(task_id: str): - """ - Retrieve a task by its task ID. - """ - return tasks.get(task_id) - - -def list_tasks(): +async def list_tasks(request): """ List all currently active task IDs. """ + if is_redis(request): + return await redis_list_tasks(request.app.state.redis) return list(tasks.keys()) -def list_task_ids_by_chat_id(id): +async def list_task_ids_by_chat_id(request, id): """ List all tasks associated with a specific ID. """ + if is_redis(request): + return await redis_list_chat_tasks(request.app.state.redis, id) return chat_tasks.get(id, []) -async def stop_task(task_id: str): +async def stop_task(request, task_id: str): """ Cancel a running task and remove it from the global task list. """ + if is_redis(request): + # PUBSUB: All instances check if they have this task, and stop if so. + await redis_send_command( + request.app.state.redis, + { + "action": "stop", + "task_id": task_id, + }, + ) + # Optionally check if task_id still in Redis a few moments later for feedback? + return {"status": True, "message": f"Stop signal sent for {task_id}"} + task = tasks.get(task_id) if not task: raise ValueError(f"Task with ID {task_id} not found.") diff --git a/backend/open_webui/utils/access_control.py b/backend/open_webui/utils/access_control.py index 1699cfaa7c..c93574527f 100644 --- a/backend/open_webui/utils/access_control.py +++ b/backend/open_webui/utils/access_control.py @@ -60,7 +60,7 @@ def get_permissions( # Combine permissions from all user groups for group in user_groups: - group_permissions = group.permissions + group_permissions = group.permissions or {} permissions = combine_permissions(permissions, group_permissions) # Ensure all fields from default_permissions are present and filled in diff --git a/backend/open_webui/utils/auth.py b/backend/open_webui/utils/auth.py index 2db0da7e5d..9befaf2a91 100644 --- a/backend/open_webui/utils/auth.py +++ b/backend/open_webui/utils/auth.py @@ -23,6 +23,7 @@ from open_webui.env import ( TRUSTED_SIGNATURE_KEY, STATIC_DIR, SRC_LOG_LEVELS, + WEBUI_AUTH_TRUSTED_EMAIL_HEADER, ) from fastapi import BackgroundTasks, Depends, HTTPException, Request, Response, status @@ -157,6 +158,7 @@ def get_http_authorization_cred(auth_header: Optional[str]): def get_current_user( request: Request, + response: Response, background_tasks: BackgroundTasks, auth_token: HTTPAuthorizationCredentials = Depends(bearer_security), ): @@ -225,6 +227,21 @@ def get_current_user( detail=ERROR_MESSAGES.INVALID_TOKEN, ) else: + if WEBUI_AUTH_TRUSTED_EMAIL_HEADER: + trusted_email = request.headers.get( + WEBUI_AUTH_TRUSTED_EMAIL_HEADER, "" + ).lower() + if trusted_email and user.email != trusted_email: + # Delete the token cookie + response.delete_cookie("token") + # Delete OAuth token if present + if request.cookies.get("oauth_id_token"): + response.delete_cookie("oauth_id_token") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="User mismatch. Please sign in again.", + ) + # Add user info to current span current_span = trace.get_current_span() if current_span: diff --git a/backend/open_webui/utils/chat.py b/backend/open_webui/utils/chat.py index 4bd744e3c3..268c910e3e 100644 --- a/backend/open_webui/utils/chat.py +++ b/backend/open_webui/utils/chat.py @@ -320,12 +320,7 @@ async def chat_completed(request: Request, form_data: dict, user: Any): extra_params = { "__event_emitter__": get_event_emitter(metadata), "__event_call__": get_event_call(metadata), - "__user__": { - "id": user.id, - "email": user.email, - "name": user.name, - "role": user.role, - }, + "__user__": user.model_dump() if isinstance(user, UserModel) else {}, "__metadata__": metadata, "__request__": request, "__model__": model, @@ -424,12 +419,7 @@ async def chat_action(request: Request, action_id: str, form_data: dict, user: A params[key] = value if "__user__" in sig.parameters: - __user__ = { - "id": user.id, - "email": user.email, - "name": user.name, - "role": user.role, - } + __user__ = (user.model_dump() if isinstance(user, UserModel) else {},) try: if hasattr(function_module, "UserValves"): diff --git a/backend/open_webui/utils/embeddings.py b/backend/open_webui/utils/embeddings.py new file mode 100644 index 0000000000..49ce72c3c5 --- /dev/null +++ b/backend/open_webui/utils/embeddings.py @@ -0,0 +1,90 @@ +import random +import logging +import sys + +from fastapi import Request +from open_webui.models.users import UserModel +from open_webui.models.models import Models +from open_webui.utils.models import check_model_access +from open_webui.env import SRC_LOG_LEVELS, GLOBAL_LOG_LEVEL, BYPASS_MODEL_ACCESS_CONTROL + +from open_webui.routers.openai import embeddings as openai_embeddings +from open_webui.routers.ollama import ( + embeddings as ollama_embeddings, + GenerateEmbeddingsForm, +) + + +from open_webui.utils.payload import convert_embedding_payload_openai_to_ollama +from open_webui.utils.response import convert_embedding_response_ollama_to_openai + +logging.basicConfig(stream=sys.stdout, level=GLOBAL_LOG_LEVEL) +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["MAIN"]) + + +async def generate_embeddings( + request: Request, + form_data: dict, + user: UserModel, + bypass_filter: bool = False, +): + """ + Dispatch and handle embeddings generation based on the model type (OpenAI, Ollama). + + Args: + request (Request): The FastAPI request context. + form_data (dict): The input data sent to the endpoint. + user (UserModel): The authenticated user. + bypass_filter (bool): If True, disables access filtering (default False). + + Returns: + dict: The embeddings response, following OpenAI API compatibility. + """ + if BYPASS_MODEL_ACCESS_CONTROL: + bypass_filter = True + + # Attach extra metadata from request.state if present + if hasattr(request.state, "metadata"): + if "metadata" not in form_data: + form_data["metadata"] = request.state.metadata + else: + form_data["metadata"] = { + **form_data["metadata"], + **request.state.metadata, + } + + # If "direct" flag present, use only that model + if getattr(request.state, "direct", False) and hasattr(request.state, "model"): + models = { + request.state.model["id"]: request.state.model, + } + else: + models = request.app.state.MODELS + + model_id = form_data.get("model") + if model_id not in models: + raise Exception("Model not found") + model = models[model_id] + + # Access filtering + if not getattr(request.state, "direct", False): + if not bypass_filter and user.role == "user": + check_model_access(user, model) + + # Ollama backend + if model.get("owned_by") == "ollama": + ollama_payload = convert_embedding_payload_openai_to_ollama(form_data) + response = await ollama_embeddings( + request=request, + form_data=GenerateEmbeddingsForm(**ollama_payload), + user=user, + ) + return convert_embedding_response_ollama_to_openai(response) + + # Default: OpenAI or compatible backend + return await openai_embeddings( + request=request, + form_data=form_data, + user=user, + ) diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 21659ea272..0106779a87 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -37,7 +37,12 @@ from open_webui.routers.tasks import ( generate_chat_tags, ) from open_webui.routers.retrieval import process_web_search, SearchForm -from open_webui.routers.images import image_generations, GenerateImageForm +from open_webui.routers.images import ( + load_b64_image_data, + image_generations, + GenerateImageForm, + upload_image, +) from open_webui.routers.pipelines import ( process_pipeline_inlet_filter, process_pipeline_outlet_filter, @@ -693,13 +698,8 @@ def apply_params_to_form_data(form_data, model): params = deep_update(params, custom_params) if model.get("ollama"): + # Ollama specific parameters form_data["options"] = params - - if "format" in params: - form_data["format"] = params["format"] - - if "keep_alive" in params: - form_data["keep_alive"] = params["keep_alive"] else: if isinstance(params, dict): for key, value in params.items(): @@ -727,12 +727,7 @@ async def process_chat_payload(request, form_data, user, metadata, model): extra_params = { "__event_emitter__": event_emitter, "__event_call__": event_call, - "__user__": { - "id": user.id, - "email": user.email, - "name": user.name, - "role": user.role, - }, + "__user__": user.model_dump() if isinstance(user, UserModel) else {}, "__metadata__": metadata, "__request__": request, "__model__": model, @@ -1327,12 +1322,7 @@ async def process_chat_response( extra_params = { "__event_emitter__": event_emitter, "__event_call__": event_caller, - "__user__": { - "id": user.id, - "email": user.email, - "name": user.name, - "role": user.role, - }, + "__user__": user.model_dump() if isinstance(user, UserModel) else {}, "__metadata__": metadata, "__request__": request, "__model__": model, @@ -1876,9 +1866,11 @@ async def process_chat_response( value = delta.get("content") - reasoning_content = delta.get( - "reasoning_content" - ) or delta.get("reasoning") + reasoning_content = ( + delta.get("reasoning_content") + or delta.get("reasoning") + or delta.get("thinking") + ) if reasoning_content: if ( not content_blocks @@ -2269,28 +2261,21 @@ async def process_chat_response( stdoutLines = stdout.split("\n") for idx, line in enumerate(stdoutLines): if "data:image/png;base64" in line: - id = str(uuid4()) - - # ensure the path exists - os.makedirs( - os.path.join(CACHE_DIR, "images"), - exist_ok=True, + image_url = "" + # Extract base64 image data from the line + image_data, content_type = ( + load_b64_image_data(line) ) - - image_path = os.path.join( - CACHE_DIR, - f"images/{id}.png", - ) - - with open(image_path, "wb") as f: - f.write( - base64.b64decode( - line.split(",")[1] - ) + if image_data is not None: + image_url = upload_image( + request, + image_data, + content_type, + metadata, + user, ) - stdoutLines[idx] = ( - f"![Output Image {idx}](/cache/images/{id}.png)" + f"![Output Image]({image_url})" ) output["stdout"] = "\n".join(stdoutLines) @@ -2301,30 +2286,22 @@ async def process_chat_response( resultLines = result.split("\n") for idx, line in enumerate(resultLines): if "data:image/png;base64" in line: - id = str(uuid4()) - - # ensure the path exists - os.makedirs( - os.path.join(CACHE_DIR, "images"), - exist_ok=True, + image_url = "" + # Extract base64 image data from the line + image_data, content_type = ( + load_b64_image_data(line) ) - - image_path = os.path.join( - CACHE_DIR, - f"images/{id}.png", - ) - - with open(image_path, "wb") as f: - f.write( - base64.b64decode( - line.split(",")[1] - ) + if image_data is not None: + image_url = upload_image( + request, + image_data, + content_type, + metadata, + user, ) - resultLines[idx] = ( - f"![Output Image {idx}](/cache/images/{id}.png)" + f"![Output Image]({image_url})" ) - output["result"] = "\n".join(resultLines) except Exception as e: output = str(e) @@ -2433,8 +2410,8 @@ async def process_chat_response( await response.background() # background_tasks.add_task(post_response_handler, response, events) - task_id, _ = create_task( - post_response_handler(response, events), id=metadata["chat_id"] + task_id, _ = await create_task( + request, post_response_handler(response, events), id=metadata["chat_id"] ) return {"status": True, "task_id": task_id} diff --git a/backend/open_webui/utils/misc.py b/backend/open_webui/utils/misc.py index ffc8c93ca4..107e2ed252 100644 --- a/backend/open_webui/utils/misc.py +++ b/backend/open_webui/utils/misc.py @@ -208,6 +208,7 @@ def openai_chat_message_template(model: str): def openai_chat_chunk_message_template( model: str, content: Optional[str] = None, + reasoning_content: Optional[str] = None, tool_calls: Optional[list[dict]] = None, usage: Optional[dict] = None, ) -> dict: @@ -220,6 +221,9 @@ def openai_chat_chunk_message_template( if content: template["choices"][0]["delta"]["content"] = content + if reasoning_content: + template["choices"][0]["delta"]["reasoning_content"] = reasoning_content + if tool_calls: template["choices"][0]["delta"]["tool_calls"] = tool_calls @@ -234,6 +238,7 @@ def openai_chat_chunk_message_template( def openai_chat_completion_message_template( model: str, message: Optional[str] = None, + reasoning_content: Optional[str] = None, tool_calls: Optional[list[dict]] = None, usage: Optional[dict] = None, ) -> dict: @@ -241,8 +246,9 @@ def openai_chat_completion_message_template( template["object"] = "chat.completion" if message is not None: template["choices"][0]["message"] = { - "content": message, "role": "assistant", + "content": message, + **({"reasoning_content": reasoning_content} if reasoning_content else {}), **({"tool_calls": tool_calls} if tool_calls else {}), } diff --git a/backend/open_webui/utils/payload.py b/backend/open_webui/utils/payload.py index 02eb0da22b..9b7f748359 100644 --- a/backend/open_webui/utils/payload.py +++ b/backend/open_webui/utils/payload.py @@ -175,16 +175,32 @@ def apply_model_params_to_body_ollama(params: dict, form_data: dict) -> dict: "num_thread": int, } - # Extract keep_alive from options if it exists - if "options" in form_data and "keep_alive" in form_data["options"]: - form_data["keep_alive"] = form_data["options"]["keep_alive"] - del form_data["options"]["keep_alive"] + def parse_json(value: str) -> dict: + """ + Parses a JSON string into a dictionary, handling potential JSONDecodeError. + """ + try: + return json.loads(value) + except Exception as e: + return value - if "options" in form_data and "format" in form_data["options"]: - form_data["format"] = form_data["options"]["format"] - del form_data["options"]["format"] + ollama_root_params = { + "format": lambda x: parse_json(x), + "keep_alive": lambda x: parse_json(x), + "think": bool, + } - return apply_model_params_to_body(params, form_data, mappings) + for key, value in ollama_root_params.items(): + if (param := params.get(key, None)) is not None: + # Copy the parameter to new name then delete it, to prevent Ollama warning of invalid option provided + form_data[key] = value(param) + del params[key] + + # Unlike OpenAI, Ollama does not support params directly in the body + form_data["options"] = apply_model_params_to_body( + params, (form_data.get("options", {}) or {}), mappings + ) + return form_data def convert_messages_openai_to_ollama(messages: list[dict]) -> list[dict]: @@ -279,36 +295,48 @@ def convert_payload_openai_to_ollama(openai_payload: dict) -> dict: openai_payload.get("messages") ) ollama_payload["stream"] = openai_payload.get("stream", False) - if "tools" in openai_payload: ollama_payload["tools"] = openai_payload["tools"] - if "format" in openai_payload: - ollama_payload["format"] = openai_payload["format"] - # If there are advanced parameters in the payload, format them in Ollama's options field if openai_payload.get("options"): ollama_payload["options"] = openai_payload["options"] ollama_options = openai_payload["options"] + def parse_json(value: str) -> dict: + """ + Parses a JSON string into a dictionary, handling potential JSONDecodeError. + """ + try: + return json.loads(value) + except Exception as e: + return value + + ollama_root_params = { + "format": lambda x: parse_json(x), + "keep_alive": lambda x: parse_json(x), + "think": bool, + } + + # Ollama's options field can contain parameters that should be at the root level. + for key, value in ollama_root_params.items(): + if (param := ollama_options.get(key, None)) is not None: + # Copy the parameter to new name then delete it, to prevent Ollama warning of invalid option provided + ollama_payload[key] = value(param) + del ollama_options[key] + # Re-Mapping OpenAI's `max_tokens` -> Ollama's `num_predict` if "max_tokens" in ollama_options: ollama_options["num_predict"] = ollama_options["max_tokens"] - del ollama_options[ - "max_tokens" - ] # To prevent Ollama warning of invalid option provided + del ollama_options["max_tokens"] # Ollama lacks a "system" prompt option. It has to be provided as a direct parameter, so we copy it down. + # Comment: Not sure why this is needed, but we'll keep it for compatibility. if "system" in ollama_options: ollama_payload["system"] = ollama_options["system"] - del ollama_options[ - "system" - ] # To prevent Ollama warning of invalid option provided + del ollama_options["system"] - # Extract keep_alive from options if it exists - if "keep_alive" in ollama_options: - ollama_payload["keep_alive"] = ollama_options["keep_alive"] - del ollama_options["keep_alive"] + ollama_payload["options"] = ollama_options # If there is the "stop" parameter in the openai_payload, remap it to the ollama_payload.options if "stop" in openai_payload: @@ -329,3 +357,32 @@ def convert_payload_openai_to_ollama(openai_payload: dict) -> dict: ollama_payload["format"] = format return ollama_payload + + +def convert_embedding_payload_openai_to_ollama(openai_payload: dict) -> dict: + """ + Convert an embeddings request payload from OpenAI format to Ollama format. + + Args: + openai_payload (dict): The original payload designed for OpenAI API usage. + + Returns: + dict: A payload compatible with the Ollama API embeddings endpoint. + """ + ollama_payload = {"model": openai_payload.get("model")} + input_value = openai_payload.get("input") + + # Ollama expects 'input' as a list, and 'prompt' as a single string. + if isinstance(input_value, list): + ollama_payload["input"] = input_value + ollama_payload["prompt"] = "\n".join(str(x) for x in input_value) + else: + ollama_payload["input"] = [input_value] + ollama_payload["prompt"] = str(input_value) + + # Optionally forward other fields if present + for optional_key in ("options", "truncate", "keep_alive"): + if optional_key in openai_payload: + ollama_payload[optional_key] = openai_payload[optional_key] + + return ollama_payload diff --git a/backend/open_webui/utils/redis.py b/backend/open_webui/utils/redis.py index e0a53e73d1..70ae18f115 100644 --- a/backend/open_webui/utils/redis.py +++ b/backend/open_webui/utils/redis.py @@ -1,7 +1,6 @@ import socketio -import redis -from redis import asyncio as aioredis from urllib.parse import urlparse +from typing import Optional def parse_redis_service_url(redis_url): @@ -18,23 +17,46 @@ def parse_redis_service_url(redis_url): } -def get_redis_connection(redis_url, redis_sentinels, decode_responses=True): - if redis_sentinels: - redis_config = parse_redis_service_url(redis_url) - sentinel = redis.sentinel.Sentinel( - redis_sentinels, - port=redis_config["port"], - db=redis_config["db"], - username=redis_config["username"], - password=redis_config["password"], - decode_responses=decode_responses, - ) +def get_redis_connection( + redis_url, redis_sentinels, async_mode=False, decode_responses=True +): + if async_mode: + import redis.asyncio as redis - # Get a master connection from Sentinel - return sentinel.master_for(redis_config["service"]) + # If using sentinel in async mode + if redis_sentinels: + redis_config = parse_redis_service_url(redis_url) + sentinel = redis.sentinel.Sentinel( + redis_sentinels, + port=redis_config["port"], + db=redis_config["db"], + username=redis_config["username"], + password=redis_config["password"], + decode_responses=decode_responses, + ) + return sentinel.master_for(redis_config["service"]) + elif redis_url: + return redis.from_url(redis_url, decode_responses=decode_responses) + else: + return None else: - # Standard Redis connection - return redis.Redis.from_url(redis_url, decode_responses=decode_responses) + import redis + + if redis_sentinels: + redis_config = parse_redis_service_url(redis_url) + sentinel = redis.sentinel.Sentinel( + redis_sentinels, + port=redis_config["port"], + db=redis_config["db"], + username=redis_config["username"], + password=redis_config["password"], + decode_responses=decode_responses, + ) + return sentinel.master_for(redis_config["service"]) + elif redis_url: + return redis.Redis.from_url(redis_url, decode_responses=decode_responses) + else: + return None def get_sentinels_from_env(sentinel_hosts_env, sentinel_port_env): diff --git a/backend/open_webui/utils/response.py b/backend/open_webui/utils/response.py index 8c3f1a58eb..8ddd502e2e 100644 --- a/backend/open_webui/utils/response.py +++ b/backend/open_webui/utils/response.py @@ -83,6 +83,7 @@ def convert_ollama_usage_to_openai(data: dict) -> dict: def convert_response_ollama_to_openai(ollama_response: dict) -> dict: model = ollama_response.get("model", "ollama") message_content = ollama_response.get("message", {}).get("content", "") + reasoning_content = ollama_response.get("message", {}).get("thinking", None) tool_calls = ollama_response.get("message", {}).get("tool_calls", None) openai_tool_calls = None @@ -94,7 +95,7 @@ def convert_response_ollama_to_openai(ollama_response: dict) -> dict: usage = convert_ollama_usage_to_openai(data) response = openai_chat_completion_message_template( - model, message_content, openai_tool_calls, usage + model, message_content, reasoning_content, openai_tool_calls, usage ) return response @@ -105,6 +106,7 @@ async def convert_streaming_response_ollama_to_openai(ollama_streaming_response) model = data.get("model", "ollama") message_content = data.get("message", {}).get("content", None) + reasoning_content = data.get("message", {}).get("thinking", None) tool_calls = data.get("message", {}).get("tool_calls", None) openai_tool_calls = None @@ -118,10 +120,71 @@ async def convert_streaming_response_ollama_to_openai(ollama_streaming_response) usage = convert_ollama_usage_to_openai(data) data = openai_chat_chunk_message_template( - model, message_content, openai_tool_calls, usage + model, message_content, reasoning_content, openai_tool_calls, usage ) line = f"data: {json.dumps(data)}\n\n" yield line yield "data: [DONE]\n\n" + + +def convert_embedding_response_ollama_to_openai(response) -> dict: + """ + Convert the response from Ollama embeddings endpoint to the OpenAI-compatible format. + + Args: + response (dict): The response from the Ollama API, + e.g. {"embedding": [...], "model": "..."} + or {"embeddings": [{"embedding": [...], "index": 0}, ...], "model": "..."} + + Returns: + dict: Response adapted to OpenAI's embeddings API format. + e.g. { + "object": "list", + "data": [ + {"object": "embedding", "embedding": [...], "index": 0}, + ... + ], + "model": "...", + } + """ + # Ollama batch-style output + if isinstance(response, dict) and "embeddings" in response: + openai_data = [] + for i, emb in enumerate(response["embeddings"]): + openai_data.append( + { + "object": "embedding", + "embedding": emb.get("embedding"), + "index": emb.get("index", i), + } + ) + return { + "object": "list", + "data": openai_data, + "model": response.get("model"), + } + # Ollama single output + elif isinstance(response, dict) and "embedding" in response: + return { + "object": "list", + "data": [ + { + "object": "embedding", + "embedding": response["embedding"], + "index": 0, + } + ], + "model": response.get("model"), + } + # Already OpenAI-compatible? + elif ( + isinstance(response, dict) + and "data" in response + and isinstance(response["data"], list) + ): + return response + + # Fallback: return as is if unrecognized + return response diff --git a/backend/open_webui/utils/tools.py b/backend/open_webui/utils/tools.py index 0774522dbd..dda2635ec7 100644 --- a/backend/open_webui/utils/tools.py +++ b/backend/open_webui/utils/tools.py @@ -479,7 +479,7 @@ async def get_tool_server_data(token: str, url: str) -> Dict[str, Any]: "specs": convert_openapi_to_tool_payload(res), } - log.info("Fetched data:", data) + log.info(f"Fetched data: {data}") return data @@ -644,5 +644,5 @@ async def execute_tool_server( except Exception as err: error = str(err) - log.exception("API Request Error:", error) + log.exception(f"API Request Error: {error}") return {"error": error} diff --git a/backend/requirements.txt b/backend/requirements.txt index c714060478..9b9020766f 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -7,14 +7,13 @@ python-socketio==5.13.0 python-jose==3.4.0 passlib[bcrypt]==1.7.4 -requests==2.32.3 +requests==2.32.4 aiohttp==3.11.11 async-timeout aiocache aiofiles starlette-compress==1.6.0 - sqlalchemy==2.0.38 alembic==1.14.0 peewee==3.18.1 @@ -96,7 +95,7 @@ authlib==1.4.1 black==25.1.0 langfuse==2.44.0 -youtube-transcript-api==1.0.3 +youtube-transcript-api==1.1.0 pytube==15.0.0 extract_msg diff --git a/backend/start.sh b/backend/start.sh index 84d5ec8958..9e106760c8 100755 --- a/backend/start.sh +++ b/backend/start.sh @@ -14,7 +14,11 @@ if [[ "${WEB_LOADER_ENGINE,,}" == "playwright" ]]; then python -c "import nltk; nltk.download('punkt_tab')" fi -KEY_FILE=.webui_secret_key +if [ -n "${WEBUI_SECRET_KEY_FILE}" ]; then + KEY_FILE="${WEBUI_SECRET_KEY_FILE}" +else + KEY_FILE=".webui_secret_key" +fi PORT="${PORT:-8080}" HOST="${HOST:-0.0.0.0}" diff --git a/backend/start_windows.bat b/backend/start_windows.bat index 8d9aae3ac6..e38fdb2aa6 100644 --- a/backend/start_windows.bat +++ b/backend/start_windows.bat @@ -18,6 +18,10 @@ IF /I "%WEB_LOADER_ENGINE%" == "playwright" ( ) SET "KEY_FILE=.webui_secret_key" +IF NOT "%WEBUI_SECRET_KEY_FILE%" == "" ( + SET "KEY_FILE=%WEBUI_SECRET_KEY_FILE%" +) + IF "%PORT%"=="" SET PORT=8080 IF "%HOST%"=="" SET HOST=0.0.0.0 SET "WEBUI_SECRET_KEY=%WEBUI_SECRET_KEY%" diff --git a/package-lock.json b/package-lock.json index fbe35065e1..48835efcaf 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "open-webui", - "version": "0.6.13", + "version": "0.6.14", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "open-webui", - "version": "0.6.13", + "version": "0.6.14", "dependencies": { "@azure/msal-browser": "^4.5.0", "@codemirror/lang-javascript": "^6.2.2", @@ -31,7 +31,7 @@ "@tiptap/starter-kit": "^2.10.0", "@xyflow/svelte": "^0.1.19", "async": "^3.2.5", - "bits-ui": "^0.19.7", + "bits-ui": "^0.21.15", "codemirror": "^6.0.1", "codemirror-lang-elixir": "^4.0.0", "codemirror-lang-hcl": "^0.1.0", @@ -1201,26 +1201,29 @@ } }, "node_modules/@floating-ui/core": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.6.0.tgz", - "integrity": "sha512-PcF++MykgmTj3CIyOQbKA/hDzOAiqI3mhuoN44WRCopIs1sgoDoU4oty4Jtqaj/y3oDU6fnVSm4QG0a3t5i0+g==", + "version": "1.7.1", + "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.1.tgz", + "integrity": "sha512-azI0DrjMMfIug/ExbBaeDVJXcY0a7EPvPjb2xAJPa4HeimBX+Z18HK8QQR3jb6356SnDDdxx+hinMLcJEDdOjw==", + "license": "MIT", "dependencies": { - "@floating-ui/utils": "^0.2.1" + "@floating-ui/utils": "^0.2.9" } }, "node_modules/@floating-ui/dom": { - "version": "1.6.3", - "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.3.tgz", - "integrity": "sha512-RnDthu3mzPlQ31Ss/BTwQ1zjzIhr3lk1gZB1OC56h/1vEtaXkESrOqL5fQVMfXpwGtRwX+YsZBdyHtJMQnkArw==", + "version": "1.7.1", + "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.1.tgz", + "integrity": "sha512-cwsmW/zyw5ltYTUeeYJ60CnQuPqmGwuGVhG9w0PRaRKkAyi38BT5CKrpIbb+jtahSwUl04cWzSx9ZOIxeS6RsQ==", + "license": "MIT", "dependencies": { - "@floating-ui/core": "^1.0.0", - "@floating-ui/utils": "^0.2.0" + "@floating-ui/core": "^1.7.1", + "@floating-ui/utils": "^0.2.9" } }, "node_modules/@floating-ui/utils": { - "version": "0.2.1", - "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.1.tgz", - "integrity": "sha512-9TANp6GPoMtYzQdt54kfAyMmz1+osLlXdg2ENroU7zzrtflTLrrC/lgrIfaSe+Wu0b89GKccT7vxXA0MoAIO+Q==" + "version": "0.2.9", + "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.9.tgz", + "integrity": "sha512-MDWhGtE+eHw5JW7lq4qhc5yRLS11ERl1c7Z6Xd0a58DozHES6EnNNwUWbMiG4J9Cgj053Bhk8zvlhFYKVhULwg==", + "license": "MIT" }, "node_modules/@gulpjs/to-absolute-glob": { "version": "4.0.0", @@ -1750,9 +1753,10 @@ } }, "node_modules/@internationalized/date": { - "version": "3.5.2", - "resolved": "https://registry.npmjs.org/@internationalized/date/-/date-3.5.2.tgz", - "integrity": "sha512-vo1yOMUt2hzp63IutEaTUxROdvQg1qlMRsbCvbay2AK2Gai7wIgCyK5weEX3nHkiLgo4qCXHijFNC/ILhlRpOQ==", + "version": "3.8.2", + "resolved": "https://registry.npmjs.org/@internationalized/date/-/date-3.8.2.tgz", + "integrity": "sha512-/wENk7CbvLbkUvX1tu0mwq49CVkkWpkXubGel6birjRPyo6uQ4nQpnq5xZu823zRCwwn82zgHrvgF1vZyvmVgA==", + "license": "Apache-2.0", "dependencies": { "@swc/helpers": "^0.5.0" } @@ -2032,9 +2036,10 @@ "integrity": "sha512-CZWV/q6TTe8ta61cZXjfnnHsfWIdFhms03M9T7Cnd5y2mdpylJM0rF1qRq+wsQVRMLz1OYPVEBU9ph2Bx8cxrg==" }, "node_modules/@melt-ui/svelte": { - "version": "0.76.0", - "resolved": "https://registry.npmjs.org/@melt-ui/svelte/-/svelte-0.76.0.tgz", - "integrity": "sha512-X1ktxKujjLjOBt8LBvfckHGDMrkHWceRt1jdsUTf0EH76ikNPP1ofSoiV0IhlduDoCBV+2YchJ8kXCDfDXfC9Q==", + "version": "0.76.2", + "resolved": "https://registry.npmjs.org/@melt-ui/svelte/-/svelte-0.76.2.tgz", + "integrity": "sha512-7SbOa11tXUS95T3fReL+dwDs5FyJtCEqrqG3inRziDws346SYLsxOQ6HmX+4BkIsQh1R8U3XNa+EMmdMt38lMA==", + "license": "MIT", "dependencies": { "@floating-ui/core": "^1.3.1", "@floating-ui/dom": "^1.4.5", @@ -2610,11 +2615,12 @@ } }, "node_modules/@swc/helpers": { - "version": "0.5.7", - "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.7.tgz", - "integrity": "sha512-BVvNZhx362+l2tSwSuyEUV4h7+jk9raNdoTSdLfwTshXJSaGmYKluGRJznziCI3KX02Z19DdsQrdfrpXAU3Hfg==", + "version": "0.5.17", + "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.17.tgz", + "integrity": "sha512-5IKx/Y13RsYd+sauPb2x+U/xZikHjolzfuDgTAl/Tdf3Q8rslRvC19NKDLgAJQ6wsqADk10ntlv08nPFw/gO/A==", + "license": "Apache-2.0", "dependencies": { - "tslib": "^2.4.0" + "tslib": "^2.8.0" } }, "node_modules/@tailwindcss/container-queries": { @@ -4381,16 +4387,20 @@ } }, "node_modules/bits-ui": { - "version": "0.19.7", - "resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-0.19.7.tgz", - "integrity": "sha512-GHUpKvN7QyazhnZNkUy0lxg6W1M6KJHWSZ4a/UGCjPE6nQgk6vKbGysY67PkDtQMknZTZAzVoMj1Eic4IKeCRQ==", + "version": "0.21.15", + "resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-0.21.15.tgz", + "integrity": "sha512-+m5WSpJnFdCcNdXSTIVC1WYBozipO03qRh03GFWgrdxoHiolCfwW71EYG4LPCWYPG6KcTZV0Cj6iHSiZ7cdKdg==", + "license": "MIT", "dependencies": { "@internationalized/date": "^3.5.1", - "@melt-ui/svelte": "0.76.0", + "@melt-ui/svelte": "0.76.2", "nanoid": "^5.0.5" }, + "funding": { + "url": "https://github.com/sponsors/huntabyte" + }, "peerDependencies": { - "svelte": "^4.0.0" + "svelte": "^4.0.0 || ^5.0.0-next.118" } }, "node_modules/bl": { @@ -11842,9 +11852,10 @@ } }, "node_modules/tslib": { - "version": "2.6.2", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", - "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==" + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", + "license": "0BSD" }, "node_modules/tunnel-agent": { "version": "0.6.0", diff --git a/package.json b/package.json index 8737788a92..3769edebe4 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "open-webui", - "version": "0.6.13", + "version": "0.6.14", "private": true, "scripts": { "dev": "npm run pyodide:fetch && vite dev --host", @@ -75,7 +75,7 @@ "@tiptap/starter-kit": "^2.10.0", "@xyflow/svelte": "^0.1.19", "async": "^3.2.5", - "bits-ui": "^0.19.7", + "bits-ui": "^0.21.15", "codemirror": "^6.0.1", "codemirror-lang-elixir": "^4.0.0", "codemirror-lang-hcl": "^0.1.0", diff --git a/pyproject.toml b/pyproject.toml index 51ea658909..35462692b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ authors = [ license = { file = "LICENSE" } dependencies = [ "fastapi==0.115.7", - "uvicorn[standard]==0.34.0", + "uvicorn[standard]==0.34.2", "pydantic==2.10.6", "python-multipart==0.0.20", @@ -15,12 +15,11 @@ dependencies = [ "python-jose==3.4.0", "passlib[bcrypt]==1.7.4", - "requests==2.32.3", + "requests==2.32.4", "aiohttp==3.11.11", "async-timeout", "aiocache", "aiofiles", - "starlette-compress==1.6.0", "sqlalchemy==2.0.38", @@ -83,13 +82,13 @@ dependencies = [ "openpyxl==3.1.5", "pyxlsb==1.0.10", "xlrd==2.0.1", - "validators==0.34.0", + "validators==0.35.0", "psutil", "sentencepiece", "soundfile==0.13.1", - "azure-ai-documentintelligence==1.0.0", + "azure-ai-documentintelligence==1.0.2", - "pillow==11.1.0", + "pillow==11.2.1", "opencv-python-headless==4.11.0.86", "rapidocr-onnxruntime==1.4.4", "rank-bm25==0.2.2", @@ -103,7 +102,7 @@ dependencies = [ "black==25.1.0", "langfuse==2.44.0", - "youtube-transcript-api==1.0.3", + "youtube-transcript-api==1.1.0", "pytube==15.0.0", "extract_msg", diff --git a/scripts/prepare-pyodide.js b/scripts/prepare-pyodide.js index 70f3cf5c6c..664683a30d 100644 --- a/scripts/prepare-pyodide.js +++ b/scripts/prepare-pyodide.js @@ -12,7 +12,8 @@ const packages = [ 'sympy', 'tiktoken', 'seaborn', - 'pytz' + 'pytz', + 'black' ]; import { loadPyodide } from 'pyodide'; diff --git a/src/lib/apis/auths/index.ts b/src/lib/apis/auths/index.ts index 169a6c14fc..842edd9c9d 100644 --- a/src/lib/apis/auths/index.ts +++ b/src/lib/apis/auths/index.ts @@ -336,7 +336,7 @@ export const userSignOut = async () => { }) .then(async (res) => { if (!res.ok) throw await res.json(); - return res; + return res.json(); }) .catch((err) => { console.error(err); diff --git a/src/lib/components/AddConnectionModal.svelte b/src/lib/components/AddConnectionModal.svelte index 2104d8f939..f998cced05 100644 --- a/src/lib/components/AddConnectionModal.svelte +++ b/src/lib/components/AddConnectionModal.svelte @@ -194,15 +194,16 @@
-
+

{#if edit} {$i18n.t('Edit Connection')} {:else} {$i18n.t('Add Connection')} {/if} -

+
+ + {#if RAGConfig.DOCLING_PICTURE_DESCRIPTION_MODE === 'local'} +
+
+
+ {$i18n.t('Picture Description Local Config')} +
+
+ +