diff --git a/.github/dependabot.yml b/.github/dependabot.yml index ed93957ea4..1c83fd305b 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -12,12 +12,6 @@ updates: interval: monthly target-branch: 'dev' - - package-ecosystem: npm - directory: '/' - schedule: - interval: monthly - target-branch: 'dev' - - package-ecosystem: 'github-actions' directory: '/' schedule: diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml index 821ffb7206..e597ff8055 100644 --- a/.github/workflows/docker-build.yaml +++ b/.github/workflows/docker-build.yaml @@ -419,6 +419,108 @@ jobs: if-no-files-found: error retention-days: 1 + build-slim-image: + runs-on: ${{ matrix.runner }} + permissions: + contents: read + packages: write + strategy: + fail-fast: false + matrix: + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm + + steps: + # GitHub Packages requires the entire repository name to be in lowercase + # although the repository owner has a lowercase username, this prevents some people from running actions after forking + - name: Set repository and image name to lowercase + run: | + echo "IMAGE_NAME=${IMAGE_NAME,,}" >>${GITHUB_ENV} + echo "FULL_IMAGE_NAME=ghcr.io/${IMAGE_NAME,,}" >>${GITHUB_ENV} + env: + IMAGE_NAME: '${{ github.repository }}' + + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata for Docker images (slim tag) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FULL_IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=tag + type=sha,prefix=git- + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=slim + flavor: | + latest=${{ github.ref == 'refs/heads/main' }} + suffix=-slim,onlatest=true + + - name: Extract metadata for Docker cache + id: cache-meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FULL_IMAGE_NAME }} + tags: | + type=ref,event=branch + ${{ github.ref_type == 'tag' && 'type=raw,value=main' || '' }} + flavor: | + prefix=cache-slim-${{ matrix.platform }}- + latest=false + + - name: Build Docker image (slim) + uses: docker/build-push-action@v5 + id: build + with: + context: . + push: true + platforms: ${{ matrix.platform }} + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true + cache-from: type=registry,ref=${{ steps.cache-meta.outputs.tags }} + cache-to: type=registry,ref=${{ steps.cache-meta.outputs.tags }},mode=max + build-args: | + BUILD_HASH=${{ github.sha }} + USE_SLIM=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-slim-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + merge-main-images: runs-on: ubuntu-latest needs: [build-main-image] @@ -640,3 +742,59 @@ jobs: - name: Inspect image run: | docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }} + + merge-slim-images: + runs-on: ubuntu-latest + needs: [build-slim-image] + steps: + # GitHub Packages requires the entire repository name to be in lowercase + # although the repository owner has a lowercase username, this prevents some people from running actions after forking + - name: Set repository and image name to lowercase + run: | + echo "IMAGE_NAME=${IMAGE_NAME,,}" >>${GITHUB_ENV} + echo "FULL_IMAGE_NAME=ghcr.io/${IMAGE_NAME,,}" >>${GITHUB_ENV} + env: + IMAGE_NAME: '${{ github.repository }}' + + - name: Download digests + uses: actions/download-artifact@v4 + with: + pattern: digests-slim-* + path: /tmp/digests + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata for Docker images (default slim tag) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FULL_IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=tag + type=sha,prefix=git- + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=slim + flavor: | + latest=${{ github.ref == 'refs/heads/main' }} + suffix=-slim,onlatest=true + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f7619c7c1..3a5663bb58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,47 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.6.26] - 2025-08-28 + +### Added + +- 🛂 **Granular Chat Interaction Permissions**: Added fine-grained permission controls for individual chat actions including "Continue Response", "Regenerate Response", "Rate Response", and "Delete Messages". Administrators can now configure these permissions per user group or set system defaults via environment variables, providing enhanced security and governance by preventing potential system prompt leakage through response continuation and enabling precise control over user interactions with AI responses. +- 🧠 **Custom Reasoning Tags Configuration**: Added configurable reasoning tag detection for AI model responses, allowing administrators and users to customize how the system identifies and processes reasoning content. Users can now define custom reasoning tag pairs, use default tags like "think" and "reasoning", or disable reasoning detection entirely through the Advanced Parameters interface, providing enhanced control over AI thought process visibility. +- 📱 **Pull-to-Refresh Support**: Added pull-to-refresh functionality allowing user to easily refresh the interface by pulling down on the navbar area. This resolves timeout issues that occurred when temporarily switching away from the app during long AI response generations, eliminating the need to close and relaunch the PWA. +- 📁 **Configurable File Upload Processing Mode**: Added "process_in_background" query parameter to the file upload API endpoint, allowing clients to choose between asynchronous (default) and synchronous file processing. Setting "process_in_background=false" forces the upload request to wait until extraction and embedding complete, returning immediately usable files and simplifying integration for backend API consumers that prefer blocking calls over polling workflows. +- 🔐 **Azure Document Intelligence DefaultAzureCredential Support**: Added support for authenticating with Azure Document Intelligence using DefaultAzureCredential in addition to API key authentication, enabling seamless integration with Azure Entra ID and managed identity authentication for enterprise Azure environments. +- 🔐 **Authentication Bootstrapping Enhancements**: Added "ENABLE_INITIAL_ADMIN_SIGNUP" environment variable and "?form=true" URL parameter to enable initial admin user creation and forced login form display in SSO-only deployments. This resolves bootstrap issues where administrators couldn't create the first user when login forms were disabled, allowing proper initialization of SSO-configured deployments without requiring temporary configuration changes. +- ⚡ **Query Generation Caching**: Added "ENABLE_QUERIES_CACHE" environment variable to enable request-scoped caching of generated search queries. When both web search and file retrieval are active, queries generated for web search are automatically reused for file retrieval, eliminating duplicate LLM API calls and reducing token usage and costs while maintaining search quality. +- 🔧 **Configurable Tool Call Retry Limit**: Added "CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES" environment variable to control the maximum number of sequential tool calls allowed before safety stopping a chat session. This replaces the previous hardcoded limit of 10, enabling administrators to configure higher limits for complex workflows requiring extensive tool interactions. +- 📦 **Slim Docker Image Variant**: Added new slim Docker image option via "USE_SLIM" build argument that excludes embedded AI models and Ollama installation, reducing image size by approximately 1GB. This variant enables faster image pulls and deployments for environments where AI models are managed externally, particularly beneficial for auto-scaling clusters and distributed deployments. +- 🗂️ **Shift-to-Delete Functionality for Workspace Prompts**: Added keyboard shortcut support for quick prompt deletion on the Workspace Prompts page. Hold Shift and hover over any prompt to reveal a trash icon for instant deletion, bringing consistent interaction patterns across all workspace sections (Models, Tools, Functions, and now Prompts) and streamlining prompt management workflows. +- ♿ **Accessibility Enhancements**: Enhanced user interface accessibility with improved keyboard navigation, ARIA labels, and screen reader compatibility across key platform components. +- 📄 **Optimized PDF Export for Smaller File Size**: PDF exports are now significantly optimized, producing much smaller files for faster downloads and easier sharing or archiving of your chats and documents. +- 📦 **Slimmed Default Install with Optional Full Dependencies**: Installing Open WebUI via pip now defaults to a slimmer package; PostgreSQL support is no longer included by default—simply use 'pip install open-webui[all]' to include all optional dependencies for full feature compatibility. +- 🔄 **General Backend Refactoring**: Implemented various backend improvements to enhance performance, stability, and security, ensuring a more resilient and reliable platform for all users. +- 🌐 **Localization & Internationalization Improvements**: Enhanced and expanded translations for Finnish, Spanish, Japanese, Polish, Portuguese (Brazil), and Chinese, including missing translations and typo corrections, providing a more natural and professional user experience for speakers of these languages across the entire interface. + +### Fixed + +- ⚠️ **Chat Error Feedback Restored**: Fixed an issue where various backend errors (tool server failures, API connection issues, malformed responses) would cause chats to load indefinitely without providing user feedback. The system now properly displays error messages when failures occur during chat generation, allowing users to understand issues and retry as needed instead of waiting indefinitely. +- 🖼️ **Image Generation Steps Setting Visibility Fixed**: Fixed a UI issue where the "Set Steps" configuration option was incorrectly displayed for OpenAI and Gemini image generation engines that don't support this parameter. The setting is now only visible for compatible engines like ComfyUI and Automatic1111, eliminating user confusion about non-functional configuration options. +- 📄 **Datalab Marker API Document Loader Fixed**: Fixed broken Datalab Marker API document loader functionality by correcting URL path handling for both hosted Datalab services and self-hosted Marker servers. Removed hardcoded "/marker" paths from the loader code and restored proper default URL structure, ensuring PDF and document processing works correctly with both deployment types. +- 📋 **Citation Error Handling Improved**: Fixed an issue where malformed citation or source objects from external tools, pipes, or filters would cause JavaScript errors and make the chat interface completely unresponsive. The system now gracefully handles missing or undefined citation properties, allowing conversations to load properly even when tools generate defective citation events. +- 👥 **Group User Add API Endpoint Fixed**: Fixed an issue where the "/api/v1/groups/id/{group_id}/users/add" API endpoint would accept requests without errors but fail to actually add users to groups. The system now properly initializes and deduplicates user ID lists, ensuring users are correctly added to and removed from groups via API calls. +- 🛠️ **External Tool Server Error Handling Improved**: Fixed an issue where unreachable or misconfigured external tool servers would cause JavaScript errors and prevent the interface from loading properly. The system now gracefully handles connection failures, displays appropriate error messages, and filters out inaccessible servers while maintaining full functionality for working connections. +- 📋 **Code Block Copy Button Content Fixed**: Fixed an issue where the copy button in code blocks would copy the original AI-generated code instead of any user-edited content, ensuring the copy function always captures the currently displayed code as modified by users. +- 📄 **PDF Export Content Mismatch Fixed**: Resolved an issue where exporting a PDF of one chat while viewing another chat would incorrectly generate the PDF using the currently viewed chat's content instead of the intended chat's content. Additionally optimized the PDF generation algorithm with improved canvas slicing, better memory management, and enhanced image quality, while removing the problematic PDF export option from individual chat menus to prevent further confusion. +- 🖱️ **Windows Sidebar Cursor Icon Corrected**: Fixed confusing cursor icons on Windows systems where sidebar toggle buttons displayed resize cursors (ew-resize) instead of appropriate pointer cursors. The sidebar buttons now show standard pointer cursors on Windows, eliminating user confusion about whether the buttons expand/collapse the sidebar or resize it. +- 📝 **Safari IME Composition Bug Fixed**: Resolved an issue where pressing Enter while composing Chinese text using Input Method Editors (IMEs) on Safari would prematurely send messages instead of completing text composition. The system now properly detects composition states and ignores keydown events that occur immediately after composition ends, ensuring smooth multilingual text input across all browsers. +- 🔍 **Hybrid Search Parameter Handling Fixed**: Fixed an issue where the "hybrid" parameter in collection query requests was not being properly evaluated, causing the system to ignore user-specified hybrid search preferences and only check global configuration. Additionally resolved a division by zero error that occurred in hybrid search when BM25Retriever was called with empty document lists, ensuring robust search functionality across all collection states. +- 💬 **RTL Text Orientation in Messages Fixed**: Fixed text alignment issues in user messages and AI responses for Right-to-Left languages, ensuring proper text direction based on user language settings. Code blocks now consistently use Left-to-Right orientation regardless of the user's language preference, maintaining code readability across all supported languages. +- 📁 **File Content Preview in Modal Restored**: Fixed an issue where clicking on uploaded files would display an empty preview modal, even when the files were successfully processed and available for AI context. File content now displays correctly in the preview modal, ensuring users can verify and review their uploaded documents as intended. +- 🌐 **Playwright Timeout Configuration Corrected**: Fixed an issue where Playwright timeout values were incorrectly converted from milliseconds to seconds with an additional 1000x multiplier, causing excessively long web loading timeouts. The timeout parameter now correctly uses the configured millisecond values as intended, ensuring responsive web search and document loading operations. + +### Changed + +- 🔄 **Follow-Up Question Language Constraint Removed**: Follow-up question suggestions no longer strictly adhere to the chat's primary language setting, allowing for more flexible and diverse suggestion generation that may include questions in different languages based on conversation context and relevance rather than enforced language matching. + ## [0.6.25] - 2025-08-22 ### Fixed diff --git a/Dockerfile b/Dockerfile index 83a74365f0..ad393338d8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,8 @@ # use build args in the docker build command with --build-arg="BUILDARG=true" ARG USE_CUDA=false ARG USE_OLLAMA=false +ARG USE_SLIM=false +ARG USE_PERMISSION_HARDENING=false # Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default) ARG USE_CUDA_VER=cu128 # any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers @@ -24,6 +26,9 @@ ARG GID=0 FROM --platform=$BUILDPLATFORM node:22-alpine3.20 AS build ARG BUILD_HASH +# Set Node.js options (heap limit Allocation failed - JavaScript heap out of memory) +# ENV NODE_OPTIONS="--max-old-space-size=4096" + WORKDIR /app # to store git revision in build @@ -43,6 +48,8 @@ FROM python:3.11-slim-bookworm AS base ARG USE_CUDA ARG USE_OLLAMA ARG USE_CUDA_VER +ARG USE_SLIM +ARG USE_PERMISSION_HARDENING ARG USE_EMBEDDING_MODEL ARG USE_RERANKING_MODEL ARG UID @@ -54,6 +61,7 @@ ENV ENV=prod \ # pass build args to the build USE_OLLAMA_DOCKER=${USE_OLLAMA} \ USE_CUDA_DOCKER=${USE_CUDA} \ + USE_SLIM_DOCKER=${USE_SLIM} \ USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \ USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} \ USE_RERANKING_MODEL_DOCKER=${USE_RERANKING_MODEL} @@ -130,11 +138,14 @@ RUN pip3 install --no-cache-dir uv && \ else \ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \ uv pip install --system -r requirements.txt --no-cache-dir && \ + if [ "$USE_SLIM" != "true" ]; then \ python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \ python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \ python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \ fi; \ - chown -R $UID:$GID /app/backend/data/ + fi; \ + mkdir -p /app/backend/data && chown -R $UID:$GID /app/backend/data/ && \ + rm -rf /var/lib/apt/lists/*; # Install Ollama if requested RUN if [ "$USE_OLLAMA" = "true" ]; then \ @@ -163,11 +174,13 @@ HEALTHCHECK CMD curl --silent --fail http://localhost:${PORT:-8080}/health | jq # Minimal, atomic permission hardening for OpenShift (arbitrary UID): # - Group 0 owns /app and /root # - Directories are group-writable and have SGID so new files inherit GID 0 -RUN set -eux; \ +RUN if [ "$USE_PERMISSION_HARDENING" = "true" ]; then \ + set -eux; \ chgrp -R 0 /app /root || true; \ chmod -R g+rwX /app /root || true; \ find /app -type d -exec chmod g+s {} + || true; \ - find /root -type d -exec chmod g+s {} + || true + find /root -type d -exec chmod g+s {} + || true; \ + fi USER $UID:$GID diff --git a/backend/open_webui/env.py b/backend/open_webui/env.py index 83625031ea..f0b26ae25c 100644 --- a/backend/open_webui/env.py +++ b/backend/open_webui/env.py @@ -362,6 +362,8 @@ ENABLE_REALTIME_CHAT_SAVE = ( os.environ.get("ENABLE_REALTIME_CHAT_SAVE", "False").lower() == "true" ) +ENABLE_QUERIES_CACHE = os.environ.get("ENABLE_QUERIES_CACHE", "False").lower() == "true" + #################################### # REDIS #################################### @@ -402,6 +404,10 @@ except ValueError: #################################### WEBUI_AUTH = os.environ.get("WEBUI_AUTH", "True").lower() == "true" + +ENABLE_INITIAL_ADMIN_SIGNUP = ( + os.environ.get("ENABLE_INITIAL_ADMIN_SIGNUP", "False").lower() == "true" +) ENABLE_SIGNUP_PASSWORD_CONFIRMATION = ( os.environ.get("ENABLE_SIGNUP_PASSWORD_CONFIRMATION", "False").lower() == "true" ) @@ -527,6 +533,19 @@ else: CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE = 1 +CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES = os.environ.get( + "CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES", "10" +) + +if CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES == "": + CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES = 10 +else: + try: + CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES = int(CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES) + except Exception: + CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES = 10 + + #################################### # WEBSOCKET SUPPORT #################################### diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index cbaefe1f3e..d24bd5dcf1 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -1437,11 +1437,15 @@ async def chat_completion( stream_delta_chunk_size = form_data.get("params", {}).get( "stream_delta_chunk_size" ) + reasoning_tags = form_data.get("params", {}).get("reasoning_tags") # Model Params if model_info_params.get("stream_delta_chunk_size"): stream_delta_chunk_size = model_info_params.get("stream_delta_chunk_size") + if model_info_params.get("reasoning_tags") is not None: + reasoning_tags = model_info_params.get("reasoning_tags") + metadata = { "user_id": user.id, "chat_id": form_data.pop("chat_id", None), @@ -1457,6 +1461,7 @@ async def chat_completion( "direct": model_item.get("direct", False), "params": { "stream_delta_chunk_size": stream_delta_chunk_size, + "reasoning_tags": reasoning_tags, "function_calling": ( "native" if ( diff --git a/backend/open_webui/retrieval/loaders/datalab_marker.py b/backend/open_webui/retrieval/loaders/datalab_marker.py index cc6c7ce79d..8d14be0a40 100644 --- a/backend/open_webui/retrieval/loaders/datalab_marker.py +++ b/backend/open_webui/retrieval/loaders/datalab_marker.py @@ -64,7 +64,7 @@ class DatalabMarkerLoader: return mime_map.get(ext, "application/octet-stream") def check_marker_request_status(self, request_id: str) -> dict: - url = f"{self.api_base_url}/marker/{request_id}" + url = f"{self.api_base_url}/{request_id}" headers = {"X-Api-Key": self.api_key} try: response = requests.get(url, headers=headers) @@ -111,7 +111,7 @@ class DatalabMarkerLoader: with open(self.file_path, "rb") as f: files = {"file": (filename, f, mime_type)} response = requests.post( - f"{self.api_base_url}/marker", + f"{self.api_base_url}", data=form_data, files=files, headers=headers, diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index c301d0b7c8..9b90dca041 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -284,7 +284,7 @@ class Loader: ): api_base_url = self.kwargs.get("DATALAB_MARKER_API_BASE_URL", "") if not api_base_url or api_base_url.strip() == "": - api_base_url = "https://www.datalab.to/api/v1" + api_base_url = "https://www.datalab.to/api/v1/marker" # https://github.com/open-webui/open-webui/pull/16867#issuecomment-3218424349 loader = DatalabMarkerLoader( file_path=file_path, diff --git a/backend/open_webui/routers/auths.py b/backend/open_webui/routers/auths.py index 11254ec78c..b8670edeaa 100644 --- a/backend/open_webui/routers/auths.py +++ b/backend/open_webui/routers/auths.py @@ -29,6 +29,7 @@ from open_webui.env import ( WEBUI_AUTH_COOKIE_SAME_SITE, WEBUI_AUTH_COOKIE_SECURE, WEBUI_AUTH_SIGNOUT_REDIRECT_URL, + ENABLE_INITIAL_ADMIN_SIGNUP, SRC_LOG_LEVELS, ) from fastapi import APIRouter, Depends, HTTPException, Request, status @@ -569,9 +570,10 @@ async def signup(request: Request, response: Response, form_data: SignupForm): not request.app.state.config.ENABLE_SIGNUP or not request.app.state.config.ENABLE_LOGIN_FORM ): - raise HTTPException( - status.HTTP_403_FORBIDDEN, detail=ERROR_MESSAGES.ACCESS_PROHIBITED - ) + if has_users or not ENABLE_INITIAL_ADMIN_SIGNUP: + raise HTTPException( + status.HTTP_403_FORBIDDEN, detail=ERROR_MESSAGES.ACCESS_PROHIBITED + ) else: if has_users: raise HTTPException( diff --git a/backend/open_webui/routers/tasks.py b/backend/open_webui/routers/tasks.py index e49602094f..7585466f69 100644 --- a/backend/open_webui/routers/tasks.py +++ b/backend/open_webui/routers/tasks.py @@ -470,6 +470,10 @@ async def generate_queries( detail=f"Query generation is disabled", ) + if getattr(request.state, "cached_queries", None): + log.info(f"Reusing cached queries: {request.state.cached_queries}") + return request.state.cached_queries + if getattr(request.state, "direct", False) and hasattr(request.state, "model"): models = { request.state.model["id"]: request.state.model, diff --git a/backend/open_webui/socket/main.py b/backend/open_webui/socket/main.py index 5570348093..b64eab08ac 100644 --- a/backend/open_webui/socket/main.py +++ b/backend/open_webui/socket/main.py @@ -115,7 +115,7 @@ if WEBSOCKET_MANAGER == "redis": clean_up_lock = RedisLock( redis_url=WEBSOCKET_REDIS_URL, - lock_name="usage_cleanup_lock", + lock_name=f"{REDIS_KEY_PREFIX}:usage_cleanup_lock", timeout_secs=WEBSOCKET_REDIS_LOCK_TIMEOUT, redis_sentinels=redis_sentinels, redis_cluster=WEBSOCKET_REDIS_CLUSTER, @@ -705,6 +705,42 @@ def get_event_emitter(request_info, update_db=True): }, ) + if "type" in event_data and event_data["type"] == "files": + message = Chats.get_message_by_id_and_message_id( + request_info["chat_id"], + request_info["message_id"], + ) + + files = event_data.get("data", {}).get("files", []) + files.extend(message.get("files", [])) + + Chats.upsert_message_to_chat_by_id_and_message_id( + request_info["chat_id"], + request_info["message_id"], + { + "files": files, + }, + ) + + if event_data.get("type") in ["source", "citation"]: + data = event_data.get("data", {}) + if data.get("type") == None: + message = Chats.get_message_by_id_and_message_id( + request_info["chat_id"], + request_info["message_id"], + ) + + sources = message.get("sources", []) + sources.append(data) + + Chats.upsert_message_to_chat_by_id_and_message_id( + request_info["chat_id"], + request_info["message_id"], + { + "sources": sources, + }, + ) + return __event_emitter__ diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index b991c1b986..a298ebeb31 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -98,8 +98,10 @@ from open_webui.env import ( SRC_LOG_LEVELS, GLOBAL_LOG_LEVEL, CHAT_RESPONSE_STREAM_DELTA_CHUNK_SIZE, + CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES, BYPASS_MODEL_ACCESS_CONTROL, ENABLE_REALTIME_CHAT_SAVE, + ENABLE_QUERIES_CACHE, ) from open_webui.constants import TASKS @@ -109,6 +111,20 @@ log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["MAIN"]) +DEFAULT_REASONING_TAGS = [ + ("", ""), + ("", ""), + ("", ""), + ("", ""), + ("", ""), + ("", ""), + ("<|begin_of_thought|>", "<|end_of_thought|>"), + ("◁think▷", "◁/think▷"), +] +DEFAULT_SOLUTION_TAGS = [("<|begin_of_solution|>", "<|end_of_solution|>")] +DEFAULT_CODE_INTERPRETER_TAGS = [("", "")] + + async def chat_completion_tools_handler( request: Request, body: dict, extra_params: dict, user: UserModel, models, tools ) -> tuple[dict, dict]: @@ -390,6 +406,9 @@ async def chat_web_search_handler( except Exception as e: queries = [response] + if ENABLE_QUERIES_CACHE: + request.state.cached_queries = queries + except Exception as e: log.exception(e) queries = [user_message] @@ -689,6 +708,7 @@ def apply_params_to_form_data(form_data, model): "stream_response": bool, "stream_delta_chunk_size": int, "function_calling": str, + "reasoning_tags": list, "system": str, } @@ -1285,6 +1305,13 @@ async def process_chat_response( "error": {"content": error}, }, ) + if isinstance(error, str) or isinstance(error, dict): + await event_emitter( + { + "type": "chat:message:error", + "data": {"error": {"content": error}}, + }, + ) if "selected_model_id" in response_data: Chats.upsert_message_to_chat_by_id_and_message_id( @@ -1806,27 +1833,23 @@ async def process_chat_response( } ] - # We might want to disable this by default - DETECT_REASONING = True - DETECT_SOLUTION = True + reasoning_tags_param = metadata.get("params", {}).get("reasoning_tags") + DETECT_REASONING_TAGS = reasoning_tags_param is not False DETECT_CODE_INTERPRETER = metadata.get("features", {}).get( "code_interpreter", False ) - reasoning_tags = [ - ("", ""), - ("", ""), - ("", ""), - ("", ""), - ("", ""), - ("", ""), - ("<|begin_of_thought|>", "<|end_of_thought|>"), - ("◁think▷", "◁/think▷"), - ] - - code_interpreter_tags = [("", "")] - - solution_tags = [("<|begin_of_solution|>", "<|end_of_solution|>")] + reasoning_tags = [] + if DETECT_REASONING_TAGS: + if ( + isinstance(reasoning_tags_param, list) + and len(reasoning_tags_param) == 2 + ): + reasoning_tags = [ + (reasoning_tags_param[0], reasoning_tags_param[1]) + ] + else: + reasoning_tags = DEFAULT_REASONING_TAGS try: for event in events: @@ -2078,7 +2101,7 @@ async def process_chat_response( content_blocks[-1]["content"] + value ) - if DETECT_REASONING: + if DETECT_REASONING_TAGS: content, content_blocks, _ = ( tag_content_handler( "reasoning", @@ -2088,11 +2111,20 @@ async def process_chat_response( ) ) + content, content_blocks, _ = ( + tag_content_handler( + "solution", + DEFAULT_SOLUTION_TAGS, + content, + content_blocks, + ) + ) + if DETECT_CODE_INTERPRETER: content, content_blocks, end = ( tag_content_handler( "code_interpreter", - code_interpreter_tags, + DEFAULT_CODE_INTERPRETER_TAGS, content, content_blocks, ) @@ -2101,16 +2133,6 @@ async def process_chat_response( if end: break - if DETECT_SOLUTION: - content, content_blocks, _ = ( - tag_content_handler( - "solution", - solution_tags, - content, - content_blocks, - ) - ) - if ENABLE_REALTIME_CHAT_SAVE: # Save message in the database Chats.upsert_message_to_chat_by_id_and_message_id( @@ -2185,10 +2207,12 @@ async def process_chat_response( await stream_body_handler(response, form_data) - MAX_TOOL_CALL_RETRIES = 10 tool_call_retries = 0 - while len(tool_calls) > 0 and tool_call_retries < MAX_TOOL_CALL_RETRIES: + while ( + len(tool_calls) > 0 + and tool_call_retries < CHAT_RESPONSE_MAX_TOOL_CALL_RETRIES + ): tool_call_retries += 1 diff --git a/backend/open_webui/utils/payload.py b/backend/open_webui/utils/payload.py index 811ba75c9f..39c785854a 100644 --- a/backend/open_webui/utils/payload.py +++ b/backend/open_webui/utils/payload.py @@ -63,6 +63,7 @@ def remove_open_webui_params(params: dict) -> dict: "stream_response": bool, "stream_delta_chunk_size": int, "function_calling": str, + "reasoning_tags": list, "system": str, } diff --git a/backend/requirements.txt b/backend/requirements.txt index 793e7d5332..03eeba2a1e 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -42,7 +42,7 @@ asgiref==3.8.1 # AI libraries openai anthropic -google-genai==1.28.0 +google-genai==1.32.0 google-generativeai==0.8.5 tiktoken @@ -102,7 +102,6 @@ PyJWT[crypto]==2.10.1 authlib==1.6.1 black==25.1.0 -langfuse==2.44.0 youtube-transcript-api==1.1.0 pytube==15.0.0 diff --git a/package-lock.json b/package-lock.json index 42b16afc8f..f5960587a1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "open-webui", - "version": "0.6.25", + "version": "0.6.26", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "open-webui", - "version": "0.6.25", + "version": "0.6.26", "dependencies": { "@azure/msal-browser": "^4.5.0", "@codemirror/lang-javascript": "^6.2.2", diff --git a/package.json b/package.json index 90ab645e7a..d4f736d598 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "open-webui", - "version": "0.6.25", + "version": "0.6.26", "private": true, "scripts": { "dev": "npm run pyodide:fetch && vite dev --host", diff --git a/pyproject.toml b/pyproject.toml index abedf3fe89..73409618fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,10 @@ dependencies = [ "python-jose==3.4.0", "passlib[bcrypt]==1.7.4", "cryptography", + "bcrypt==4.3.0", + "argon2-cffi==23.1.0", + "PyJWT[crypto]==2.10.1", + "authlib==1.6.1", "requests==2.32.4", "aiohttp==3.12.15", @@ -28,31 +32,24 @@ dependencies = [ "alembic==1.14.0", "peewee==3.18.1", "peewee-migrate==1.12.2", - "psycopg2-binary==2.9.9", - "pgvector==0.4.0", - "PyMySQL==1.1.1", - "bcrypt==4.3.0", - - "pymongo", - "redis", - "boto3==1.40.5", - - "argon2-cffi==23.1.0", - "APScheduler==3.10.4", "pycrdt==0.12.25", + "redis", + "PyMySQL==1.1.1", + "boto3==1.40.5", + "APScheduler==3.10.4", "RestrictedPython==8.0", "loguru==0.7.3", "asgiref==3.8.1", + "tiktoken", "openai", "anthropic", - "google-genai==1.28.0", + "google-genai==1.32.0", "google-generativeai==0.8.5", - "tiktoken", "langchain==0.3.26", "langchain-community==0.3.26", @@ -100,14 +97,9 @@ dependencies = [ "rank-bm25==0.2.2", "onnxruntime==1.20.1", - "faster-whisper==1.1.1", - "PyJWT[crypto]==2.10.1", - "authlib==1.6.1", - "black==25.1.0", - "langfuse==2.44.0", "youtube-transcript-api==1.1.0", "pytube==15.0.0", @@ -118,9 +110,7 @@ dependencies = [ "google-auth-httplib2", "google-auth-oauthlib", - "docker~=7.1.0", - "pytest~=8.3.2", - "pytest-docker~=3.1.1", + "googleapis-common-protos==1.63.2", "google-cloud-storage==2.19.0", @@ -131,12 +121,8 @@ dependencies = [ "ldap3==2.9.1", "firecrawl-py==1.12.0", - "tencentcloud-sdk-python==3.0.1336", - - "gcp-storage-emulator>=2024.8.3", - - "moto[s3]>=5.0.26", + "oracledb>=3.2.0", "posthog==5.4.0", @@ -154,6 +140,23 @@ classifiers = [ "Topic :: Multimedia", ] +[project.optional-dependencies] +postgres = [ + "psycopg2-binary==2.9.9", + "pgvector==0.4.0", +] + +all = [ + "pymongo", + "psycopg2-binary==2.9.9", + "pgvector==0.4.0", + "moto[s3]>=5.0.26", + "gcp-storage-emulator>=2024.8.3", + "docker~=7.1.0", + "pytest~=8.3.2", + "pytest-docker~=3.1.1", +] + [project.scripts] open-webui = "open_webui:app" diff --git a/src/lib/components/admin/Settings.svelte b/src/lib/components/admin/Settings.svelte index d6a9e8a925..765a1d3ec9 100644 --- a/src/lib/components/admin/Settings.svelte +++ b/src/lib/components/admin/Settings.svelte @@ -204,7 +204,7 @@ /> -
{$i18n.t('Tools')}
+
{$i18n.t('External Tools')}
+ + + + {#if ![true, false, null].includes(params?.reasoning_tags ?? null) && (params?.reasoning_tags ?? []).length === 2} +
+
+ +
+ +
+ +
+
+ {/if} + +
-
{$i18n.t('Tools')}
+
{$i18n.t('External Tools')}
{/if} {:else if tabId === 'personalization'} diff --git a/src/lib/components/common/FileItem.svelte b/src/lib/components/common/FileItem.svelte index c12b75d6f8..238bbbe6ff 100644 --- a/src/lib/components/common/FileItem.svelte +++ b/src/lib/components/common/FileItem.svelte @@ -51,7 +51,7 @@ : 'rounded-2xl'} text-left" type="button" on:click={async () => { - if (item?.file?.data?.content || modal) { + if (item?.file?.data?.content || item?.type === 'file' || modal) { showModal = !showModal; } else { if (url) { diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte index f84f9c047c..e912a807ae 100644 --- a/src/lib/components/common/FileItemModal.svelte +++ b/src/lib/components/common/FileItemModal.svelte @@ -25,6 +25,8 @@ let isAudio = false; let loading = false; + let selectedTab = ''; + $: isPDF = item?.meta?.content_type === 'application/pdf' || (item?.name && item?.name.toLowerCase().endsWith('.pdf')); @@ -115,7 +117,7 @@
-
+
{#if item?.type === 'collection'} {#if item?.type}
{item.type}
@@ -141,13 +143,13 @@ {#if item?.file?.data?.content}
- {getLineCount(item?.file?.data?.content ?? '')} extracted lines + {$i18n.t('{{COUNT}} extracted lines', { + COUNT: getLineCount(item?.file?.data?.content ?? '') + })}
- - - Formatting may be inconsistent from source. + • {$i18n.t('Formatting may be inconsistent from source.')}
{/if} @@ -202,11 +204,41 @@ {/each}
{:else if isPDF} -