diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml
index 821ffb7206..e597ff8055 100644
--- a/.github/workflows/docker-build.yaml
+++ b/.github/workflows/docker-build.yaml
@@ -419,6 +419,108 @@ jobs:
if-no-files-found: error
retention-days: 1
+ build-slim-image:
+ runs-on: ${{ matrix.runner }}
+ permissions:
+ contents: read
+ packages: write
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - platform: linux/amd64
+ runner: ubuntu-latest
+ - platform: linux/arm64
+ runner: ubuntu-24.04-arm
+
+ steps:
+ # GitHub Packages requires the entire repository name to be in lowercase
+ # although the repository owner has a lowercase username, this prevents some people from running actions after forking
+ - name: Set repository and image name to lowercase
+ run: |
+ echo "IMAGE_NAME=${IMAGE_NAME,,}" >>${GITHUB_ENV}
+ echo "FULL_IMAGE_NAME=ghcr.io/${IMAGE_NAME,,}" >>${GITHUB_ENV}
+ env:
+ IMAGE_NAME: '${{ github.repository }}'
+
+ - name: Prepare
+ run: |
+ platform=${{ matrix.platform }}
+ echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Log in to the Container registry
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.REGISTRY }}
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Extract metadata for Docker images (slim tag)
+ id: meta
+ uses: docker/metadata-action@v5
+ with:
+ images: ${{ env.FULL_IMAGE_NAME }}
+ tags: |
+ type=ref,event=branch
+ type=ref,event=tag
+ type=sha,prefix=git-
+ type=semver,pattern={{version}}
+ type=semver,pattern={{major}}.{{minor}}
+ type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=slim
+ flavor: |
+ latest=${{ github.ref == 'refs/heads/main' }}
+ suffix=-slim,onlatest=true
+
+ - name: Extract metadata for Docker cache
+ id: cache-meta
+ uses: docker/metadata-action@v5
+ with:
+ images: ${{ env.FULL_IMAGE_NAME }}
+ tags: |
+ type=ref,event=branch
+ ${{ github.ref_type == 'tag' && 'type=raw,value=main' || '' }}
+ flavor: |
+ prefix=cache-slim-${{ matrix.platform }}-
+ latest=false
+
+ - name: Build Docker image (slim)
+ uses: docker/build-push-action@v5
+ id: build
+ with:
+ context: .
+ push: true
+ platforms: ${{ matrix.platform }}
+ labels: ${{ steps.meta.outputs.labels }}
+ outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
+ cache-from: type=registry,ref=${{ steps.cache-meta.outputs.tags }}
+ cache-to: type=registry,ref=${{ steps.cache-meta.outputs.tags }},mode=max
+ build-args: |
+ BUILD_HASH=${{ github.sha }}
+ USE_SLIM=true
+
+ - name: Export digest
+ run: |
+ mkdir -p /tmp/digests
+ digest="${{ steps.build.outputs.digest }}"
+ touch "/tmp/digests/${digest#sha256:}"
+
+ - name: Upload digest
+ uses: actions/upload-artifact@v4
+ with:
+ name: digests-slim-${{ env.PLATFORM_PAIR }}
+ path: /tmp/digests/*
+ if-no-files-found: error
+ retention-days: 1
+
merge-main-images:
runs-on: ubuntu-latest
needs: [build-main-image]
@@ -640,3 +742,59 @@ jobs:
- name: Inspect image
run: |
docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }}
+
+ merge-slim-images:
+ runs-on: ubuntu-latest
+ needs: [build-slim-image]
+ steps:
+ # GitHub Packages requires the entire repository name to be in lowercase
+ # although the repository owner has a lowercase username, this prevents some people from running actions after forking
+ - name: Set repository and image name to lowercase
+ run: |
+ echo "IMAGE_NAME=${IMAGE_NAME,,}" >>${GITHUB_ENV}
+ echo "FULL_IMAGE_NAME=ghcr.io/${IMAGE_NAME,,}" >>${GITHUB_ENV}
+ env:
+ IMAGE_NAME: '${{ github.repository }}'
+
+ - name: Download digests
+ uses: actions/download-artifact@v4
+ with:
+ pattern: digests-slim-*
+ path: /tmp/digests
+ merge-multiple: true
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Log in to the Container registry
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.REGISTRY }}
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Extract metadata for Docker images (default slim tag)
+ id: meta
+ uses: docker/metadata-action@v5
+ with:
+ images: ${{ env.FULL_IMAGE_NAME }}
+ tags: |
+ type=ref,event=branch
+ type=ref,event=tag
+ type=sha,prefix=git-
+ type=semver,pattern={{version}}
+ type=semver,pattern={{major}}.{{minor}}
+ type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=slim
+ flavor: |
+ latest=${{ github.ref == 'refs/heads/main' }}
+ suffix=-slim,onlatest=true
+
+ - name: Create manifest list and push
+ working-directory: /tmp/digests
+ run: |
+ docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
+ $(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *)
+
+ - name: Inspect image
+ run: |
+ docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f7619c7c1..349b984e19 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,47 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.6.26] - 2025-08-28
+
+### Added
+
+- 🛂 **Granular Chat Interaction Permissions**: Added fine-grained permission controls for individual chat actions including "Continue Response", "Regenerate Response", "Rate Response", and "Delete Messages". Administrators can now configure these permissions per user group or set system defaults via environment variables, providing enhanced security and governance by preventing potential system prompt leakage through response continuation and enabling precise control over user interactions with AI responses.
+- 🧠 **Custom Reasoning Tags Configuration**: Added configurable reasoning tag detection for AI model responses, allowing administrators and users to customize how the system identifies and processes reasoning content. Users can now define custom reasoning tag pairs, use default tags like "think" and "reasoning", or disable reasoning detection entirely through the Advanced Parameters interface, providing enhanced control over AI thought process visibility.
+- 📱 **Pull-to-Refresh SupportA**: Added pull-to-refresh functionality allowing user to easily refresh the interface by pulling down on the navbar area. This resolves timeout issues that occurred when temporarily switching away from the app during long AI response generations, eliminating the need to close and relaunch the PWA.
+- 📁 **Configurable File Upload Processing Mode**: Added "process_in_background" query parameter to the file upload API endpoint, allowing clients to choose between asynchronous (default) and synchronous file processing. Setting "process_in_background=false" forces the upload request to wait until extraction and embedding complete, returning immediately usable files and simplifying integration for backend API consumers that prefer blocking calls over polling workflows.
+- 🔐 **Azure Document Intelligence DefaultAzureCredential Support**: Added support for authenticating with Azure Document Intelligence using DefaultAzureCredential in addition to API key authentication, enabling seamless integration with Azure Entra ID and managed identity authentication for enterprise Azure environments.
+- 🔐 **Authentication Bootstrapping Enhancements**: Added "ENABLE_INITIAL_ADMIN_SIGNUP" environment variable and "?form=true" URL parameter to enable initial admin user creation and forced login form display in SSO-only deployments. This resolves bootstrap issues where administrators couldn't create the first user when login forms were disabled, allowing proper initialization of SSO-configured deployments without requiring temporary configuration changes.
+- ⚡ **Query Generation Caching**: Added "ENABLE_QUERIES_CACHE" environment variable to enable request-scoped caching of generated search queries. When both web search and file retrieval are active, queries generated for web search are automatically reused for file retrieval, eliminating duplicate LLM API calls and reducing token usage and costs while maintaining search quality.
+- 🔧 **Configurable Tool Call Retry Limit**: Added "CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES" environment variable to control the maximum number of sequential tool calls allowed before safety stopping a chat session. This replaces the previous hardcoded limit of 10, enabling administrators to configure higher limits for complex workflows requiring extensive tool interactions.
+- 📦 **Slim Docker Image Variant**: Added new slim Docker image option via "USE_SLIM" build argument that excludes embedded AI models and Ollama installation, reducing image size by approximately 1GB. This variant enables faster image pulls and deployments for environments where AI models are managed externally, particularly beneficial for auto-scaling clusters and distributed deployments.
+- 🗂️ **Shift-to-Delete Functionality for Workspace Prompts**: Added keyboard shortcut support for quick prompt deletion on the Workspace Prompts page. Hold Shift and hover over any prompt to reveal a trash icon for instant deletion, bringing consistent interaction patterns across all workspace sections (Models, Tools, Functions, and now Prompts) and streamlining prompt management workflows.
+- ♿ **Accessibility Enhancements**: Enhanced user interface accessibility with improved keyboard navigation, ARIA labels, and screen reader compatibility across key platform components.
+- 📄 **Optimized PDF Export for Smaller File Size**: PDF exports are now significantly optimized, producing much smaller files for faster downloads and easier sharing or archiving of your chats and documents.
+- 📦 **Slimmed Default Install with Optional Full Dependencies**: Installing Open WebUI via pip now defaults to a slimmer package; PostgreSQL support is no longer included by default—simply use 'pip install open-webui[all]' to include all optional dependencies for full feature compatibility.
+- 🔄 **General Backend Refactoring**: Implemented various backend improvements to enhance performance, stability, and security, ensuring a more resilient and reliable platform for all users.
+- 🌐 **Localization & Internationalization Improvements**: Enhanced and expanded translations for Finnish, Spanish, Japanese, Polish, Portuguese (Brazil), and Chinese, including missing translations and typo corrections, providing a more natural and professional user experience for speakers of these languages across the entire interface.
+
+### Fixed
+
+- ⚠️ **Chat Error Feedback Restored**: Fixed an issue where various backend errors (tool server failures, API connection issues, malformed responses) would cause chats to load indefinitely without providing user feedback. The system now properly displays error messages when failures occur during chat generation, allowing users to understand issues and retry as needed instead of waiting indefinitely.
+- 🖼️ **Image Generation Steps Setting Visibility Fixed**: Fixed a UI issue where the "Set Steps" configuration option was incorrectly displayed for OpenAI and Gemini image generation engines that don't support this parameter. The setting is now only visible for compatible engines like ComfyUI and Automatic1111, eliminating user confusion about non-functional configuration options.
+- 📄 **Datalab Marker API Document Loader Fixed**: Fixed broken Datalab Marker API document loader functionality by correcting URL path handling for both hosted Datalab services and self-hosted Marker servers. Removed hardcoded "/marker" paths from the loader code and restored proper default URL structure, ensuring PDF and document processing works correctly with both deployment types.
+- 📋 **Citation Error Handling Improved**: Fixed an issue where malformed citation or source objects from external tools, pipes, or filters would cause JavaScript errors and make the chat interface completely unresponsive. The system now gracefully handles missing or undefined citation properties, allowing conversations to load properly even when tools generate defective citation events.
+- 👥 **Group User Add API Endpoint Fixed**: Fixed an issue where the "/api/v1/groups/id/{group_id}/users/add" API endpoint would accept requests without errors but fail to actually add users to groups. The system now properly initializes and deduplicates user ID lists, ensuring users are correctly added to and removed from groups via API calls.
+- 🛠️ **External Tool Server Error Handling Improved**: Fixed an issue where unreachable or misconfigured external tool servers would cause JavaScript errors and prevent the interface from loading properly. The system now gracefully handles connection failures, displays appropriate error messages, and filters out inaccessible servers while maintaining full functionality for working connections.
+- 📋 **Code Block Copy Button Content Fixed**: Fixed an issue where the copy button in code blocks would copy the original AI-generated code instead of any user-edited content, ensuring the copy function always captures the currently displayed code as modified by users.
+- 📄 **PDF Export Content Mismatch Fixed**: Resolved an issue where exporting a PDF of one chat while viewing another chat would incorrectly generate the PDF using the currently viewed chat's content instead of the intended chat's content. Additionally optimized the PDF generation algorithm with improved canvas slicing, better memory management, and enhanced image quality, while removing the problematic PDF export option from individual chat menus to prevent further confusion.
+- 🖱️ **Windows Sidebar Cursor Icon Corrected**: Fixed confusing cursor icons on Windows systems where sidebar toggle buttons displayed resize cursors (ew-resize) instead of appropriate pointer cursors. The sidebar buttons now show standard pointer cursors on Windows, eliminating user confusion about whether the buttons expand/collapse the sidebar or resize it.
+- 📝 **Safari IME Composition Bug Fixed**: Resolved an issue where pressing Enter while composing Chinese text using Input Method Editors (IMEs) on Safari would prematurely send messages instead of completing text composition. The system now properly detects composition states and ignores keydown events that occur immediately after composition ends, ensuring smooth multilingual text input across all browsers.
+- 🔍 **Hybrid Search Parameter Handling Fixed**: Fixed an issue where the "hybrid" parameter in collection query requests was not being properly evaluated, causing the system to ignore user-specified hybrid search preferences and only check global configuration. Additionally resolved a division by zero error that occurred in hybrid search when BM25Retriever was called with empty document lists, ensuring robust search functionality across all collection states.
+- 💬 **RTL Text Orientation in Messages Fixed**: Fixed text alignment issues in user messages and AI responses for Right-to-Left languages, ensuring proper text direction based on user language settings. Code blocks now consistently use Left-to-Right orientation regardless of the user's language preference, maintaining code readability across all supported languages.
+- 📁 **File Content Preview in Modal Restored**: Fixed an issue where clicking on uploaded files would display an empty preview modal, even when the files were successfully processed and available for AI context. File content now displays correctly in the preview modal, ensuring users can verify and review their uploaded documents as intended.
+- 🌐 **Playwright Timeout Configuration Corrected**: Fixed an issue where Playwright timeout values were incorrectly converted from milliseconds to seconds with an additional 1000x multiplier, causing excessively long web loading timeouts. The timeout parameter now correctly uses the configured millisecond values as intended, ensuring responsive web search and document loading operations.
+
+### Changed
+
+- 🔄 **Follow-Up Question Language Constraint Removed**: Follow-up question suggestions no longer strictly adhere to the chat's primary language setting, allowing for more flexible and diverse suggestion generation that may include questions in different languages based on conversation context and relevance rather than enforced language matching.
+
## [0.6.25] - 2025-08-22
### Fixed
diff --git a/Dockerfile b/Dockerfile
index 83a74365f0..9c982e69e2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,6 +3,7 @@
# use build args in the docker build command with --build-arg="BUILDARG=true"
ARG USE_CUDA=false
ARG USE_OLLAMA=false
+ARG USE_SLIM=false
# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)
ARG USE_CUDA_VER=cu128
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
@@ -43,6 +44,7 @@ FROM python:3.11-slim-bookworm AS base
ARG USE_CUDA
ARG USE_OLLAMA
ARG USE_CUDA_VER
+ARG USE_SLIM
ARG USE_EMBEDDING_MODEL
ARG USE_RERANKING_MODEL
ARG UID
@@ -54,6 +56,7 @@ ENV ENV=prod \
# pass build args to the build
USE_OLLAMA_DOCKER=${USE_OLLAMA} \
USE_CUDA_DOCKER=${USE_CUDA} \
+ USE_SLIM_DOCKER=${USE_SLIM} \
USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \
USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} \
USE_RERANKING_MODEL_DOCKER=${USE_RERANKING_MODEL}
@@ -120,6 +123,7 @@ RUN apt-get update && \
COPY --chown=$UID:$GID ./backend/requirements.txt ./requirements.txt
RUN pip3 install --no-cache-dir uv && \
+ if [ "$USE_SLIM" != "true" ]; then \
if [ "$USE_CUDA" = "true" ]; then \
# If you use CUDA the whisper and embedding model will be downloaded on first use
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
@@ -134,10 +138,13 @@ RUN pip3 install --no-cache-dir uv && \
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \
fi; \
- chown -R $UID:$GID /app/backend/data/
+ else \
+ uv pip install --system -r requirements.txt --no-cache-dir; \
+ fi; \
+ mkdir -p /app/backend/data && chown -R $UID:$GID /app/backend/data/
# Install Ollama if requested
-RUN if [ "$USE_OLLAMA" = "true" ]; then \
+RUN if [ "$USE_OLLAMA" = "true" ] && [ "$USE_SLIM" != "true" ]; then \
date +%s > /tmp/ollama_build_hash && \
echo "Cache broken at timestamp: `cat /tmp/ollama_build_hash`" && \
curl -fsSL https://ollama.com/install.sh | sh && \
diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py
index 8f12ca68e2..188bb50cff 100644
--- a/backend/open_webui/config.py
+++ b/backend/open_webui/config.py
@@ -1208,6 +1208,23 @@ USER_PERMISSIONS_CHAT_DELETE = (
os.environ.get("USER_PERMISSIONS_CHAT_DELETE", "True").lower() == "true"
)
+USER_PERMISSIONS_CHAT_DELETE_MESSAGE = (
+ os.environ.get("USER_PERMISSIONS_CHAT_DELETE_MESSAGE", "True").lower() == "true"
+)
+
+USER_PERMISSIONS_CHAT_CONTINUE_RESPONSE = (
+ os.environ.get("USER_PERMISSIONS_CHAT_CONTINUE_RESPONSE", "True").lower() == "true"
+)
+
+USER_PERMISSIONS_CHAT_REGENERATE_RESPONSE = (
+ os.environ.get("USER_PERMISSIONS_CHAT_REGENERATE_RESPONSE", "True").lower()
+ == "true"
+)
+
+USER_PERMISSIONS_CHAT_RATE_RESPONSE = (
+ os.environ.get("USER_PERMISSIONS_CHAT_RATE_RESPONSE", "True").lower() == "true"
+)
+
USER_PERMISSIONS_CHAT_EDIT = (
os.environ.get("USER_PERMISSIONS_CHAT_EDIT", "True").lower() == "true"
)
@@ -1290,6 +1307,10 @@ DEFAULT_USER_PERMISSIONS = {
"params": USER_PERMISSIONS_CHAT_PARAMS,
"file_upload": USER_PERMISSIONS_CHAT_FILE_UPLOAD,
"delete": USER_PERMISSIONS_CHAT_DELETE,
+ "delete_message": USER_PERMISSIONS_CHAT_DELETE_MESSAGE,
+ "continue_response": USER_PERMISSIONS_CHAT_CONTINUE_RESPONSE,
+ "regenerate_response": USER_PERMISSIONS_CHAT_REGENERATE_RESPONSE,
+ "rate_response": USER_PERMISSIONS_CHAT_RATE_RESPONSE,
"edit": USER_PERMISSIONS_CHAT_EDIT,
"share": USER_PERMISSIONS_CHAT_SHARE,
"export": USER_PERMISSIONS_CHAT_EXPORT,
@@ -1576,7 +1597,7 @@ FOLLOW_UP_GENERATION_PROMPT_TEMPLATE = PersistentConfig(
)
DEFAULT_FOLLOW_UP_GENERATION_PROMPT_TEMPLATE = """### Task:
-Suggest 3-5 relevant follow-up questions or prompts in the chat's primary language that the user might naturally ask next in this conversation as a **user**, based on the chat history, to help continue or deepen the discussion.
+Suggest 3-5 relevant follow-up questions or prompts that the user might naturally ask next in this conversation as a **user**, based on the chat history, to help continue or deepen the discussion.
### Guidelines:
- Write all follow-up questions from the user’s point of view, directed to the assistant.
- Make questions concise, clear, and directly related to the discussed topic(s).
diff --git a/backend/open_webui/env.py b/backend/open_webui/env.py
index 83625031ea..f0b26ae25c 100644
--- a/backend/open_webui/env.py
+++ b/backend/open_webui/env.py
@@ -362,6 +362,8 @@ ENABLE_REALTIME_CHAT_SAVE = (
os.environ.get("ENABLE_REALTIME_CHAT_SAVE", "False").lower() == "true"
)
+ENABLE_QUERIES_CACHE = os.environ.get("ENABLE_QUERIES_CACHE", "False").lower() == "true"
+
####################################
# REDIS
####################################
@@ -402,6 +404,10 @@ except ValueError:
####################################
WEBUI_AUTH = os.environ.get("WEBUI_AUTH", "True").lower() == "true"
+
+ENABLE_INITIAL_ADMIN_SIGNUP = (
+ os.environ.get("ENABLE_INITIAL_ADMIN_SIGNUP", "False").lower() == "true"
+)
ENABLE_SIGNUP_PASSWORD_CONFIRMATION = (
os.environ.get("ENABLE_SIGNUP_PASSWORD_CONFIRMATION", "False").lower() == "true"
)
@@ -527,6 +533,19 @@ else:
CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE = 1
+CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES = os.environ.get(
+ "CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES", "10"
+)
+
+if CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES == "":
+ CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES = 10
+else:
+ try:
+ CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES = int(CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES)
+ except Exception:
+ CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES = 10
+
+
####################################
# WEBSOCKET SUPPORT
####################################
diff --git a/backend/open_webui/functions.py b/backend/open_webui/functions.py
index d8f2a61257..db367ccbd0 100644
--- a/backend/open_webui/functions.py
+++ b/backend/open_webui/functions.py
@@ -232,7 +232,7 @@ async def generate_function_chat_completion(
"__metadata__": metadata,
"__request__": request,
}
- extra_params["__tools__"] = get_tools(
+ extra_params["__tools__"] = await get_tools(
request,
tool_ids,
user,
diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py
index 290a1e7dda..a4d752c759 100644
--- a/backend/open_webui/main.py
+++ b/backend/open_webui/main.py
@@ -1439,11 +1439,15 @@ async def chat_completion(
stream_delta_chunk_size = form_data.get("params", {}).get(
"stream_delta_chunk_size"
)
+ reasoning_tags = form_data.get("params", {}).get("reasoning_tags")
# Model Params
if model_info_params.get("stream_delta_chunk_size"):
stream_delta_chunk_size = model_info_params.get("stream_delta_chunk_size")
+ if model_info_params.get("reasoning_tags") is not None:
+ reasoning_tags = model_info_params.get("reasoning_tags")
+
metadata = {
"user_id": user.id,
"chat_id": form_data.pop("chat_id", None),
@@ -1459,6 +1463,7 @@ async def chat_completion(
"direct": model_item.get("direct", False),
"params": {
"stream_delta_chunk_size": stream_delta_chunk_size,
+ "reasoning_tags": reasoning_tags,
"function_calling": (
"native"
if (
diff --git a/backend/open_webui/models/groups.py b/backend/open_webui/models/groups.py
index 6615f95142..a09b2b73f9 100644
--- a/backend/open_webui/models/groups.py
+++ b/backend/open_webui/models/groups.py
@@ -288,13 +288,17 @@ class GroupTable:
if not group:
return None
- if not group.user_ids:
- group.user_ids = []
+ group_user_ids = group.user_ids
+ if not group_user_ids or not isinstance(group_user_ids, list):
+ group_user_ids = []
+
+ group_user_ids = list(set(group_user_ids)) # Deduplicate
for user_id in user_ids:
- if user_id not in group.user_ids:
- group.user_ids.append(user_id)
+ if user_id not in group_user_ids:
+ group_user_ids.append(user_id)
+ group.user_ids = group_user_ids
group.updated_at = int(time.time())
db.commit()
db.refresh(group)
@@ -312,14 +316,20 @@ class GroupTable:
if not group:
return None
- if not group.user_ids:
+ group_user_ids = group.user_ids
+
+ if not group_user_ids or not isinstance(group_user_ids, list):
return GroupModel.model_validate(group)
- for user_id in user_ids:
- if user_id in group.user_ids:
- group.user_ids.remove(user_id)
+ group_user_ids = list(set(group_user_ids)) # Deduplicate
+ for user_id in user_ids:
+ if user_id in group_user_ids:
+ group_user_ids.remove(user_id)
+
+ group.user_ids = group_user_ids
group.updated_at = int(time.time())
+
db.commit()
db.refresh(group)
return GroupModel.model_validate(group)
diff --git a/backend/open_webui/retrieval/loaders/datalab_marker.py b/backend/open_webui/retrieval/loaders/datalab_marker.py
index cc6c7ce79d..8d14be0a40 100644
--- a/backend/open_webui/retrieval/loaders/datalab_marker.py
+++ b/backend/open_webui/retrieval/loaders/datalab_marker.py
@@ -64,7 +64,7 @@ class DatalabMarkerLoader:
return mime_map.get(ext, "application/octet-stream")
def check_marker_request_status(self, request_id: str) -> dict:
- url = f"{self.api_base_url}/marker/{request_id}"
+ url = f"{self.api_base_url}/{request_id}"
headers = {"X-Api-Key": self.api_key}
try:
response = requests.get(url, headers=headers)
@@ -111,7 +111,7 @@ class DatalabMarkerLoader:
with open(self.file_path, "rb") as f:
files = {"file": (filename, f, mime_type)}
response = requests.post(
- f"{self.api_base_url}/marker",
+ f"{self.api_base_url}",
data=form_data,
files=files,
headers=headers,
diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py
index 241cd7dbe8..9b90dca041 100644
--- a/backend/open_webui/retrieval/loaders/main.py
+++ b/backend/open_webui/retrieval/loaders/main.py
@@ -4,6 +4,7 @@ import ftfy
import sys
import json
+from azure.identity import DefaultAzureCredential
from langchain_community.document_loaders import (
AzureAIDocumentIntelligenceLoader,
BSHTMLLoader,
@@ -283,7 +284,7 @@ class Loader:
):
api_base_url = self.kwargs.get("DATALAB_MARKER_API_BASE_URL", "")
if not api_base_url or api_base_url.strip() == "":
- api_base_url = "https://www.datalab.to/api/v1"
+ api_base_url = "https://www.datalab.to/api/v1/marker" # https://github.com/open-webui/open-webui/pull/16867#issuecomment-3218424349
loader = DatalabMarkerLoader(
file_path=file_path,
@@ -327,7 +328,6 @@ class Loader:
elif (
self.engine == "document_intelligence"
and self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT") != ""
- and self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY") != ""
and (
file_ext in ["pdf", "xls", "xlsx", "docx", "ppt", "pptx"]
or file_content_type
@@ -340,11 +340,18 @@ class Loader:
]
)
):
- loader = AzureAIDocumentIntelligenceLoader(
- file_path=file_path,
- api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"),
- api_key=self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY"),
- )
+ if self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY") != "":
+ loader = AzureAIDocumentIntelligenceLoader(
+ file_path=file_path,
+ api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"),
+ api_key=self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY"),
+ )
+ else:
+ loader = AzureAIDocumentIntelligenceLoader(
+ file_path=file_path,
+ api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"),
+ azure_credential=DefaultAzureCredential(),
+ )
elif (
self.engine == "mistral_ocr"
and self.kwargs.get("MISTRAL_OCR_API_KEY") != ""
diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py
index 862972187d..100c92c6c0 100644
--- a/backend/open_webui/retrieval/utils.py
+++ b/backend/open_webui/retrieval/utils.py
@@ -124,6 +124,10 @@ def query_doc_with_hybrid_search(
hybrid_bm25_weight: float,
) -> dict:
try:
+ if not collection_result.documents[0]:
+ log.warning(f"query_doc_with_hybrid_search:no_docs {collection_name}")
+ return {"documents": [], "metadatas": [], "distances": []}
+
# BM_25 required only if weight is greater than 0
if hybrid_bm25_weight > 0:
log.debug(f"query_doc_with_hybrid_search:doc {collection_name}")
diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py
index 5a90a86e0f..bf9b01a39f 100644
--- a/backend/open_webui/retrieval/web/utils.py
+++ b/backend/open_webui/retrieval/web/utils.py
@@ -614,7 +614,7 @@ def get_web_loader(
WebLoaderClass = SafeWebBaseLoader
if WEB_LOADER_ENGINE.value == "playwright":
WebLoaderClass = SafePlaywrightURLLoader
- web_loader_args["playwright_timeout"] = PLAYWRIGHT_TIMEOUT.value * 1000
+ web_loader_args["playwright_timeout"] = PLAYWRIGHT_TIMEOUT.value
if PLAYWRIGHT_WS_URL.value:
web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URL.value
diff --git a/backend/open_webui/routers/auths.py b/backend/open_webui/routers/auths.py
index 11254ec78c..b8670edeaa 100644
--- a/backend/open_webui/routers/auths.py
+++ b/backend/open_webui/routers/auths.py
@@ -29,6 +29,7 @@ from open_webui.env import (
WEBUI_AUTH_COOKIE_SAME_SITE,
WEBUI_AUTH_COOKIE_SECURE,
WEBUI_AUTH_SIGNOUT_REDIRECT_URL,
+ ENABLE_INITIAL_ADMIN_SIGNUP,
SRC_LOG_LEVELS,
)
from fastapi import APIRouter, Depends, HTTPException, Request, status
@@ -569,9 +570,10 @@ async def signup(request: Request, response: Response, form_data: SignupForm):
not request.app.state.config.ENABLE_SIGNUP
or not request.app.state.config.ENABLE_LOGIN_FORM
):
- raise HTTPException(
- status.HTTP_403_FORBIDDEN, detail=ERROR_MESSAGES.ACCESS_PROHIBITED
- )
+ if has_users or not ENABLE_INITIAL_ADMIN_SIGNUP:
+ raise HTTPException(
+ status.HTTP_403_FORBIDDEN, detail=ERROR_MESSAGES.ACCESS_PROHIBITED
+ )
else:
if has_users:
raise HTTPException(
diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py
index 3b46d0bd8a..d08c5396ce 100644
--- a/backend/open_webui/routers/files.py
+++ b/backend/open_webui/routers/files.py
@@ -143,9 +143,18 @@ def upload_file(
file: UploadFile = File(...),
metadata: Optional[dict | str] = Form(None),
process: bool = Query(True),
+ process_in_background: bool = Query(True),
user=Depends(get_verified_user),
):
- return upload_file_handler(request, file, metadata, process, user, background_tasks)
+ return upload_file_handler(
+ request,
+ file=file,
+ metadata=metadata,
+ process=process,
+ process_in_background=process_in_background,
+ user=user,
+ background_tasks=background_tasks,
+ )
def upload_file_handler(
@@ -153,6 +162,7 @@ def upload_file_handler(
file: UploadFile = File(...),
metadata: Optional[dict | str] = Form(None),
process: bool = Query(True),
+ process_in_background: bool = Query(True),
user=Depends(get_verified_user),
background_tasks: Optional[BackgroundTasks] = None,
):
@@ -225,7 +235,7 @@ def upload_file_handler(
)
if process:
- if background_tasks:
+ if background_tasks and process_in_background:
background_tasks.add_task(
process_uploaded_file,
request,
diff --git a/backend/open_webui/routers/ollama.py b/backend/open_webui/routers/ollama.py
index 11bf5b914f..1a6b75c555 100644
--- a/backend/open_webui/routers/ollama.py
+++ b/backend/open_webui/routers/ollama.py
@@ -329,12 +329,13 @@ def merge_ollama_models_lists(model_lists):
for idx, model_list in enumerate(model_lists):
if model_list is not None:
for model in model_list:
- id = model["model"]
- if id not in merged_models:
- model["urls"] = [idx]
- merged_models[id] = model
- else:
- merged_models[id]["urls"].append(idx)
+ id = model.get("model")
+ if id is not None:
+ if id not in merged_models:
+ model["urls"] = [idx]
+ merged_models[id] = model
+ else:
+ merged_models[id]["urls"].append(idx)
return list(merged_models.values())
diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index 738f2d05fc..fdb7786258 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -2075,7 +2075,9 @@ def query_doc_handler(
user=Depends(get_verified_user),
):
try:
- if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH:
+ if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and (
+ form_data.hybrid is None or form_data.hybrid
+ ):
collection_results = {}
collection_results[form_data.collection_name] = VECTOR_DB_CLIENT.get(
collection_name=form_data.collection_name
@@ -2145,7 +2147,9 @@ def query_collection_handler(
user=Depends(get_verified_user),
):
try:
- if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH:
+ if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and (
+ form_data.hybrid is None or form_data.hybrid
+ ):
return query_collection_with_hybrid_search(
collection_names=form_data.collection_names,
queries=[form_data.query],
diff --git a/backend/open_webui/routers/tasks.py b/backend/open_webui/routers/tasks.py
index 73296afb18..d3d3e1bdcc 100644
--- a/backend/open_webui/routers/tasks.py
+++ b/backend/open_webui/routers/tasks.py
@@ -477,6 +477,10 @@ async def generate_queries(
detail=f"Query generation is disabled",
)
+ if getattr(request.state, "cached_queries", None):
+ log.info(f"Reusing cached queries: {request.state.cached_queries}")
+ return request.state.cached_queries
+
if getattr(request.state, "direct", False) and hasattr(request.state, "model"):
models = {
request.state.model["id"]: request.state.model,
diff --git a/backend/open_webui/routers/users.py b/backend/open_webui/routers/users.py
index 7b27b45b9d..4d2539a18e 100644
--- a/backend/open_webui/routers/users.py
+++ b/backend/open_webui/routers/users.py
@@ -146,6 +146,10 @@ class ChatPermissions(BaseModel):
params: bool = True
file_upload: bool = True
delete: bool = True
+ delete_message: bool = True
+ continue_response: bool = True
+ regenerate_response: bool = True
+ rate_response: bool = True
edit: bool = True
share: bool = True
export: bool = True
diff --git a/backend/open_webui/socket/main.py b/backend/open_webui/socket/main.py
index 5570348093..b64eab08ac 100644
--- a/backend/open_webui/socket/main.py
+++ b/backend/open_webui/socket/main.py
@@ -115,7 +115,7 @@ if WEBSOCKET_MANAGER == "redis":
clean_up_lock = RedisLock(
redis_url=WEBSOCKET_REDIS_URL,
- lock_name="usage_cleanup_lock",
+ lock_name=f"{REDIS_KEY_PREFIX}:usage_cleanup_lock",
timeout_secs=WEBSOCKET_REDIS_LOCK_TIMEOUT,
redis_sentinels=redis_sentinels,
redis_cluster=WEBSOCKET_REDIS_CLUSTER,
@@ -705,6 +705,42 @@ def get_event_emitter(request_info, update_db=True):
},
)
+ if "type" in event_data and event_data["type"] == "files":
+ message = Chats.get_message_by_id_and_message_id(
+ request_info["chat_id"],
+ request_info["message_id"],
+ )
+
+ files = event_data.get("data", {}).get("files", [])
+ files.extend(message.get("files", []))
+
+ Chats.upsert_message_to_chat_by_id_and_message_id(
+ request_info["chat_id"],
+ request_info["message_id"],
+ {
+ "files": files,
+ },
+ )
+
+ if event_data.get("type") in ["source", "citation"]:
+ data = event_data.get("data", {})
+ if data.get("type") == None:
+ message = Chats.get_message_by_id_and_message_id(
+ request_info["chat_id"],
+ request_info["message_id"],
+ )
+
+ sources = message.get("sources", [])
+ sources.append(data)
+
+ Chats.upsert_message_to_chat_by_id_and_message_id(
+ request_info["chat_id"],
+ request_info["message_id"],
+ {
+ "sources": sources,
+ },
+ )
+
return __event_emitter__
diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py
index b991c1b986..a298ebeb31 100644
--- a/backend/open_webui/utils/middleware.py
+++ b/backend/open_webui/utils/middleware.py
@@ -98,8 +98,10 @@ from open_webui.env import (
SRC_LOG_LEVELS,
GLOBAL_LOG_LEVEL,
CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE,
+ CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES,
BYPASS_MODEL_ACCESS_CONTROL,
ENABLE_REALTIME_CHAT_SAVE,
+ ENABLE_QUERIES_CACHE,
)
from open_webui.constants import TASKS
@@ -109,6 +111,20 @@ log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["MAIN"])
+DEFAULT_REASONING_TAGS = [
+ ("