diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index bfbee10a1b..7aa2fec16b 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -8,6 +8,10 @@ assignees: '' # Bug Report +**Important: Before submitting a bug report, please check whether a similar issue or feature request has already been posted in the Issues or Discussions section. It's likely we're already tracking it. In case of uncertainty, initiate a discussion post first. This helps us all to efficiently focus on improving the project.** + +**Let's collaborate respectfully. If you bring negativity, please understand our capacity to engage may be limited. If you're open to learning and communicating constructively, we're more than happy to assist you. Remember, Open WebUI is a volunteer-driven project maintained by a single maintainer, supported by our amazing contributors who also manage full-time jobs. We respect your time; please respect ours. If you have an issue, We highly encourage you to submit a pull request or to fork the project. We actively work to prevent contributor burnout to preserve the quality and continuity of Open WebUI.** + ## Installation Method [Describe the method you used to install the project, e.g., git clone, Docker, pip, etc.] diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 2f28cead03..27abd35b4a 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -6,6 +6,12 @@ labels: '' assignees: '' --- +# Feature Request + +**Important: Before submitting a feature request, please check whether a similar issue or feature request has already been posted in the Issues or Discussions section. It's likely we're already tracking it. In case of uncertainty, initiate a discussion post first. This helps us all to efficiently focus on improving the project.** + +**Let's collaborate respectfully. If you bring negativity, please understand our capacity to engage may be limited. If you're open to learning and communicating constructively, we're more than happy to assist you. Remember, Open WebUI is a volunteer-driven project maintained by a single maintainer, supported by our amazing contributors who also manage full-time jobs. We respect your time; please respect ours. If you have an issue, We highly encourage you to submit a pull request or to fork the project. We actively work to prevent contributor burnout to preserve the quality and continuity of Open WebUI.** + **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] diff --git a/.github/workflows/format-backend.yaml b/.github/workflows/format-backend.yaml index 2e980de41f..4458766975 100644 --- a/.github/workflows/format-backend.yaml +++ b/.github/workflows/format-backend.yaml @@ -23,7 +23,7 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/format-build-frontend.yaml b/.github/workflows/format-build-frontend.yaml index 9ee57f475a..53d3aaa5ec 100644 --- a/.github/workflows/format-build-frontend.yaml +++ b/.github/workflows/format-build-frontend.yaml @@ -21,7 +21,7 @@ jobs: - name: Setup Node.js uses: actions/setup-node@v4 with: - node-version: '20' # Or specify any other version you want to use + node-version: '22' # Or specify any other version you want to use - name: Install Dependencies run: npm install @@ -48,7 +48,7 @@ jobs: - name: Setup Node.js uses: actions/setup-node@v4 with: - node-version: '20' + node-version: '22' - name: Install Dependencies run: npm ci diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 54db965d86..cb404f1fc1 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -85,7 +85,7 @@ jobs: # - uses: actions/checkout@v4 # - name: Set up Python - # uses: actions/setup-python@v4 + # uses: actions/setup-python@v5 # with: # python-version: ${{ matrix.python-version }} @@ -182,6 +182,9 @@ jobs: WEBUI_SECRET_KEY: secret-key GLOBAL_LOG_LEVEL: debug DATABASE_URL: postgresql://postgres:postgres@localhost:5432/postgres + DATABASE_POOL_SIZE: 10 + DATABASE_POOL_MAX_OVERFLOW: 10 + DATABASE_POOL_TIMEOUT: 30 run: | cd backend uvicorn open_webui.main:app --port "8081" --forwarded-allow-ips '*' & diff --git a/.github/workflows/lint-backend.disabled b/.github/workflows/lint-backend.disabled index d220031cc3..dd0bdc7fa7 100644 --- a/.github/workflows/lint-backend.disabled +++ b/.github/workflows/lint-backend.disabled @@ -16,7 +16,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Use Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 - name: Use Bun uses: oven-sh/setup-bun@v1 - name: Install dependencies diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fc731a56e..ff5af104f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,42 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.3.31] - 2024-10-06 + +### Added + +- **📚 Knowledge Feature**: Reimagined documents feature, now more performant with a better UI for enhanced organization; includes streamlined API integration for Retrieval-Augmented Generation (RAG). Detailed documentation forthcoming: https://docs.openwebui.com/ +- **🌐 New Landing Page**: Freshly designed landing page; toggle between the new UI and the classic chat UI from Settings > Interface for a personalized experience. +- **📁 Full Document Retrieval Mode**: Toggle between full document retrieval or traditional snippets by clicking on the file item. This mode enhances document capabilities and supports comprehensive tasks like summarization by utilizing the entire content instead of RAG. +- **📄 Extracted File Content Display**: View extracted content directly by clicking on the file item, simplifying file analysis. +- **🎨 Artifacts Feature**: Render web content and SVGs directly in the interface, supporting quick iterations and live changes. +- **🖊️ Editable Code Blocks**: Supercharged code blocks now allow live editing directly in the LLM response, with live reloads supported by artifacts. +- **🔧 Code Block Enhancements**: Introduced a floating copy button in code blocks to facilitate easier code copying without scrolling. +- **🔍 SVG Pan/Zoom**: Enhanced interaction with SVG images, including Mermaid diagrams, via new pan and zoom capabilities. +- **🔍 Text Select Quick Actions**: New floating buttons appear when text is highlighted in LLM responses, offering deeper interactions like "Ask a Question" or "Explain". +- **🗃️ Database Pool Configuration**: Enhanced database handling to support scalable user growth. +- **🔊 Experimental Audio Compression**: Compress audio files to navigate around the 25MB limit for OpenAI's speech-to-text processing. +- **🔍 Query Embedding**: Adjusted embedding behavior to enhance system performance by not repeating query embedding. +- **💾 Lazy Load Optimizations**: Implemented lazy loading of large dependencies to minimize initial memory usage, boosting performance. +- **🍏 Apple Touch Icon Support**: Optimizes the display of icons for web bookmarks on Apple mobile devices. +- **🔽 Expandable Content Markdown Support**: Introducing 'details', 'summary' tag support for creating expandable content sections in markdown, facilitating cleaner, organized documentation and interactive content display. + +### Fixed + +- **🔘 Action Button Issue**: Resolved a bug where action buttons were not functioning, enhancing UI reliability. +- **🔄 Multi-Model Chat Loop**: Fixed an infinite loop issue in multi-model chat environments, ensuring smoother chat operations. +- **📄 Chat PDF/TXT Export Issue**: Resolved problems with exporting chat logs to PDF and TXT formats. +- **🔊 Call to Text-to-Speech Issues**: Rectified problems with text-to-speech functions to improve audio interactions. + +### Changed + +- **⚙️ Endpoint Renaming**: Renamed 'rag' endpoints to 'retrieval' for clearer function description. +- **🎨 Styling and Interface Updates**: Multiple refinements across the platform to enhance visual appeal and user interaction. + +### Removed + +- **🗑️ Deprecated 'DOCS_DIR'**: Removed the outdated 'docs_dir' variable in favor of more direct file management solutions, with direct file directory syncing and API uploads for a more integrated experience. + ## [0.3.30] - 2024-09-26 ### Fixed diff --git a/Dockerfile b/Dockerfile index c944f54e69..2e898dc890 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,7 +17,7 @@ ARG UID=0 ARG GID=0 ######## WebUI frontend ######## -FROM --platform=$BUILDPLATFORM node:21-alpine3.19 as build +FROM --platform=$BUILDPLATFORM node:22-alpine3.20 AS build ARG BUILD_HASH WORKDIR /app @@ -30,7 +30,7 @@ ENV APP_BUILD_HASH=${BUILD_HASH} RUN npm run build ######## WebUI backend ######## -FROM python:3.11-slim-bookworm as base +FROM python:3.11-slim-bookworm AS base # Use args ARG USE_CUDA @@ -82,7 +82,7 @@ ENV HF_HOME="/app/backend/data/cache/embedding/models" WORKDIR /app/backend -ENV HOME /root +ENV HOME=/root # Create user and group if not root RUN if [ $UID -ne 0 ]; then \ if [ $GID -ne 0 ]; then \ @@ -161,6 +161,6 @@ USER $UID:$GID ARG BUILD_HASH ENV WEBUI_BUILD_VERSION=${BUILD_HASH} -ENV DOCKER true +ENV DOCKER=true CMD [ "bash", "start.sh"] diff --git a/README.md b/README.md index e83324ead3..c4ca343ca0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Open WebUI (Formerly Ollama WebUI) 👋 +# Open WebUI 👋 ![GitHub stars](https://img.shields.io/github/stars/open-webui/open-webui?style=social) ![GitHub forks](https://img.shields.io/github/forks/open-webui/open-webui?style=social) @@ -170,7 +170,7 @@ docker run --rm --volume /var/run/docker.sock:/var/run/docker.sock containrrr/wa In the last part of the command, replace `open-webui` with your container name if it is different. -Check our Migration Guide available in our [Open WebUI Documentation](https://docs.openwebui.com/migration/). +Check our Migration Guide available in our [Open WebUI Documentation](https://docs.openwebui.com/tutorials/migration/). ### Using the Dev Branch 🌙 @@ -220,4 +220,4 @@ If you have any questions, suggestions, or need assistance, please open an issue --- -Created by [Timothy J. Baek](https://github.com/tjbck) - Let's make Open WebUI even more amazing together! 💪 +Created by [Timothy Jaeryang Baek](https://github.com/tjbck) - Let's make Open WebUI even more amazing together! 💪 diff --git a/backend/open_webui/apps/audio/main.py b/backend/open_webui/apps/audio/main.py index a1e6e94fa0..0e56720138 100644 --- a/backend/open_webui/apps/audio/main.py +++ b/backend/open_webui/apps/audio/main.py @@ -5,6 +5,8 @@ import os import uuid from functools import lru_cache from pathlib import Path +from pydub import AudioSegment +from pydub.silence import split_on_silence import requests from open_webui.config import ( @@ -35,7 +37,12 @@ from fastapi import Depends, FastAPI, File, HTTPException, Request, UploadFile, from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse from pydantic import BaseModel -from open_webui.utils.utils import get_admin_user, get_current_user, get_verified_user +from open_webui.utils.utils import get_admin_user, get_verified_user + +# Constants +MAX_FILE_SIZE_MB = 25 +MAX_FILE_SIZE = MAX_FILE_SIZE_MB * 1024 * 1024 # Convert MB to bytes + log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["AUDIO"]) @@ -353,10 +360,103 @@ async def speech(request: Request, user=Depends(get_verified_user)): ) +def transcribe(file_path): + print("transcribe", file_path) + filename = os.path.basename(file_path) + file_dir = os.path.dirname(file_path) + id = filename.split(".")[0] + + if app.state.config.STT_ENGINE == "": + from faster_whisper import WhisperModel + + whisper_kwargs = { + "model_size_or_path": WHISPER_MODEL, + "device": whisper_device_type, + "compute_type": "int8", + "download_root": WHISPER_MODEL_DIR, + "local_files_only": not WHISPER_MODEL_AUTO_UPDATE, + } + + log.debug(f"whisper_kwargs: {whisper_kwargs}") + + try: + model = WhisperModel(**whisper_kwargs) + except Exception: + log.warning( + "WhisperModel initialization failed, attempting download with local_files_only=False" + ) + whisper_kwargs["local_files_only"] = False + model = WhisperModel(**whisper_kwargs) + + segments, info = model.transcribe(file_path, beam_size=5) + log.info( + "Detected language '%s' with probability %f" + % (info.language, info.language_probability) + ) + + transcript = "".join([segment.text for segment in list(segments)]) + + data = {"text": transcript.strip()} + + # save the transcript to a json file + transcript_file = f"{file_dir}/{id}.json" + with open(transcript_file, "w") as f: + json.dump(data, f) + + print(data) + return data + elif app.state.config.STT_ENGINE == "openai": + if is_mp4_audio(file_path): + print("is_mp4_audio") + os.rename(file_path, file_path.replace(".wav", ".mp4")) + # Convert MP4 audio file to WAV format + convert_mp4_to_wav(file_path.replace(".wav", ".mp4"), file_path) + + headers = {"Authorization": f"Bearer {app.state.config.STT_OPENAI_API_KEY}"} + + files = {"file": (filename, open(file_path, "rb"))} + data = {"model": app.state.config.STT_MODEL} + + print(files, data) + + r = None + try: + r = requests.post( + url=f"{app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions", + headers=headers, + files=files, + data=data, + ) + + r.raise_for_status() + + data = r.json() + + # save the transcript to a json file + transcript_file = f"{file_dir}/{id}.json" + with open(transcript_file, "w") as f: + json.dump(data, f) + + print(data) + return data + except Exception as e: + log.exception(e) + error_detail = "Open WebUI: Server Connection Error" + if r is not None: + try: + res = r.json() + if "error" in res: + error_detail = f"External: {res['error']['message']}" + except Exception: + error_detail = f"External: {e}" + + raise error_detail + + @app.post("/transcriptions") -def transcribe( +def transcription( file: UploadFile = File(...), - user=Depends(get_current_user), + user=Depends(get_verified_user), ): log.info(f"file.content_type: {file.content_type}") @@ -368,111 +468,52 @@ def transcribe( try: ext = file.filename.split(".")[-1] - id = uuid.uuid4() + filename = f"{id}.{ext}" + contents = file.file.read() file_dir = f"{CACHE_DIR}/audio/transcriptions" os.makedirs(file_dir, exist_ok=True) file_path = f"{file_dir}/{filename}" - print(filename) - - contents = file.file.read() with open(file_path, "wb") as f: f.write(contents) - f.close() - if app.state.config.STT_ENGINE == "": - from faster_whisper import WhisperModel + try: + if os.path.getsize(file_path) > MAX_FILE_SIZE: # file is bigger than 25MB + log.debug(f"File size is larger than {MAX_FILE_SIZE_MB}MB") + audio = AudioSegment.from_file(file_path) + audio = audio.set_frame_rate(16000).set_channels(1) # Compress audio + compressed_path = f"{file_dir}/{id}_compressed.opus" + audio.export(compressed_path, format="opus", bitrate="32k") + log.debug(f"Compressed audio to {compressed_path}") + file_path = compressed_path - whisper_kwargs = { - "model_size_or_path": WHISPER_MODEL, - "device": whisper_device_type, - "compute_type": "int8", - "download_root": WHISPER_MODEL_DIR, - "local_files_only": not WHISPER_MODEL_AUTO_UPDATE, - } + if ( + os.path.getsize(file_path) > MAX_FILE_SIZE + ): # Still larger than 25MB after compression + log.debug( + f"Compressed file size is still larger than {MAX_FILE_SIZE_MB}MB: {os.path.getsize(file_path)}" + ) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.FILE_TOO_LARGE( + size=f"{MAX_FILE_SIZE_MB}MB" + ), + ) - log.debug(f"whisper_kwargs: {whisper_kwargs}") - - try: - model = WhisperModel(**whisper_kwargs) - except Exception: - log.warning( - "WhisperModel initialization failed, attempting download with local_files_only=False" - ) - whisper_kwargs["local_files_only"] = False - model = WhisperModel(**whisper_kwargs) - - segments, info = model.transcribe(file_path, beam_size=5) - log.info( - "Detected language '%s' with probability %f" - % (info.language, info.language_probability) - ) - - transcript = "".join([segment.text for segment in list(segments)]) - - data = {"text": transcript.strip()} - - # save the transcript to a json file - transcript_file = f"{file_dir}/{id}.json" - with open(transcript_file, "w") as f: - json.dump(data, f) - - print(data) + data = transcribe(file_path) + else: + data = transcribe(file_path) return data - - elif app.state.config.STT_ENGINE == "openai": - if is_mp4_audio(file_path): - print("is_mp4_audio") - os.rename(file_path, file_path.replace(".wav", ".mp4")) - # Convert MP4 audio file to WAV format - convert_mp4_to_wav(file_path.replace(".wav", ".mp4"), file_path) - - headers = {"Authorization": f"Bearer {app.state.config.STT_OPENAI_API_KEY}"} - - files = {"file": (filename, open(file_path, "rb"))} - data = {"model": app.state.config.STT_MODEL} - - print(files, data) - - r = None - try: - r = requests.post( - url=f"{app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions", - headers=headers, - files=files, - data=data, - ) - - r.raise_for_status() - - data = r.json() - - # save the transcript to a json file - transcript_file = f"{file_dir}/{id}.json" - with open(transcript_file, "w") as f: - json.dump(data, f) - - print(data) - return data - except Exception as e: - log.exception(e) - error_detail = "Open WebUI: Server Connection Error" - if r is not None: - try: - res = r.json() - if "error" in res: - error_detail = f"External: {res['error']['message']}" - except Exception: - error_detail = f"External: {e}" - - raise HTTPException( - status_code=r.status_code if r != None else 500, - detail=error_detail, - ) + except Exception as e: + log.exception(e) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT(e), + ) except Exception as e: log.exception(e) diff --git a/backend/open_webui/apps/ollama/main.py b/backend/open_webui/apps/ollama/main.py index 1337fbb31d..33d9846557 100644 --- a/backend/open_webui/apps/ollama/main.py +++ b/backend/open_webui/apps/ollama/main.py @@ -12,7 +12,6 @@ import aiohttp import requests from open_webui.apps.webui.models.models import Models from open_webui.config import ( - AIOHTTP_CLIENT_TIMEOUT, CORS_ALLOW_ORIGIN, ENABLE_MODEL_FILTER, ENABLE_OLLAMA_API, @@ -21,6 +20,9 @@ from open_webui.config import ( UPLOAD_DIR, AppConfig, ) +from open_webui.env import AIOHTTP_CLIENT_TIMEOUT + + from open_webui.constants import ERROR_MESSAGES from open_webui.env import SRC_LOG_LEVELS from fastapi import Depends, FastAPI, File, HTTPException, Request, UploadFile @@ -117,7 +119,7 @@ async def update_ollama_api_url(form_data: UrlUpdateForm, user=Depends(get_admin async def fetch_url(url): - timeout = aiohttp.ClientTimeout(total=5) + timeout = aiohttp.ClientTimeout(total=3) try: async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session: async with session.get(url) as response: @@ -787,6 +789,7 @@ async def generate_chat_completion( ): payload = {**form_data.model_dump(exclude_none=True)} log.debug(f"{payload = }") + if "metadata" in payload: del payload["metadata"] diff --git a/backend/open_webui/apps/openai/main.py b/backend/open_webui/apps/openai/main.py index e0a40a1f54..70cefb29ca 100644 --- a/backend/open_webui/apps/openai/main.py +++ b/backend/open_webui/apps/openai/main.py @@ -9,7 +9,6 @@ import aiohttp import requests from open_webui.apps.webui.models.models import Models from open_webui.config import ( - AIOHTTP_CLIENT_TIMEOUT, CACHE_DIR, CORS_ALLOW_ORIGIN, ENABLE_MODEL_FILTER, @@ -19,6 +18,8 @@ from open_webui.config import ( OPENAI_API_KEYS, AppConfig, ) +from open_webui.env import AIOHTTP_CLIENT_TIMEOUT + from open_webui.constants import ERROR_MESSAGES from open_webui.env import SRC_LOG_LEVELS from fastapi import Depends, FastAPI, HTTPException, Request @@ -27,7 +28,6 @@ from fastapi.responses import FileResponse, StreamingResponse from pydantic import BaseModel from starlette.background import BackgroundTask - from open_webui.utils.payload import ( apply_model_params_to_body_openai, apply_model_system_prompt_to_body, @@ -47,7 +47,6 @@ app.add_middleware( allow_headers=["*"], ) - app.state.config = AppConfig() app.state.config.ENABLE_MODEL_FILTER = ENABLE_MODEL_FILTER @@ -180,7 +179,7 @@ async def speech(request: Request, user=Depends(get_verified_user)): async def fetch_url(url, key): - timeout = aiohttp.ClientTimeout(total=5) + timeout = aiohttp.ClientTimeout(total=3) try: headers = {"Authorization": f"Bearer {key}"} async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session: @@ -407,20 +406,25 @@ async def generate_chat_completion( url = app.state.config.OPENAI_API_BASE_URLS[idx] key = app.state.config.OPENAI_API_KEYS[idx] + is_o1 = payload["model"].lower().startswith("o1-") # Change max_completion_tokens to max_tokens (Backward compatible) - if "api.openai.com" not in url and not payload["model"].lower().startswith("o1-"): + if "api.openai.com" not in url and not is_o1: if "max_completion_tokens" in payload: # Remove "max_completion_tokens" from the payload payload["max_tokens"] = payload["max_completion_tokens"] del payload["max_completion_tokens"] else: - if payload["model"].lower().startswith("o1-") and "max_tokens" in payload: + if is_o1 and "max_tokens" in payload: payload["max_completion_tokens"] = payload["max_tokens"] del payload["max_tokens"] if "max_tokens" in payload and "max_completion_tokens" in payload: del payload["max_tokens"] + # Fix: O1 does not support the "system" parameter, Modify "system" to "user" + if is_o1 and payload["messages"][0]["role"] == "system": + payload["messages"][0]["role"] = "user" + # Convert the modified body back to JSON payload = json.dumps(payload) diff --git a/backend/open_webui/apps/retrieval/loaders/main.py b/backend/open_webui/apps/retrieval/loaders/main.py new file mode 100644 index 0000000000..f0e8f804ee --- /dev/null +++ b/backend/open_webui/apps/retrieval/loaders/main.py @@ -0,0 +1,190 @@ +import requests +import logging +import ftfy + +from langchain_community.document_loaders import ( + BSHTMLLoader, + CSVLoader, + Docx2txtLoader, + OutlookMessageLoader, + PyPDFLoader, + TextLoader, + UnstructuredEPubLoader, + UnstructuredExcelLoader, + UnstructuredMarkdownLoader, + UnstructuredPowerPointLoader, + UnstructuredRSTLoader, + UnstructuredXMLLoader, + YoutubeLoader, +) +from langchain_core.documents import Document +from open_webui.env import SRC_LOG_LEVELS + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["RAG"]) + +known_source_ext = [ + "go", + "py", + "java", + "sh", + "bat", + "ps1", + "cmd", + "js", + "ts", + "css", + "cpp", + "hpp", + "h", + "c", + "cs", + "sql", + "log", + "ini", + "pl", + "pm", + "r", + "dart", + "dockerfile", + "env", + "php", + "hs", + "hsc", + "lua", + "nginxconf", + "conf", + "m", + "mm", + "plsql", + "perl", + "rb", + "rs", + "db2", + "scala", + "bash", + "swift", + "vue", + "svelte", + "msg", + "ex", + "exs", + "erl", + "tsx", + "jsx", + "hs", + "lhs", +] + + +class TikaLoader: + def __init__(self, url, file_path, mime_type=None): + self.url = url + self.file_path = file_path + self.mime_type = mime_type + + def load(self) -> list[Document]: + with open(self.file_path, "rb") as f: + data = f.read() + + if self.mime_type is not None: + headers = {"Content-Type": self.mime_type} + else: + headers = {} + + endpoint = self.url + if not endpoint.endswith("/"): + endpoint += "/" + endpoint += "tika/text" + + r = requests.put(endpoint, data=data, headers=headers) + + if r.ok: + raw_metadata = r.json() + text = raw_metadata.get("X-TIKA:content", "") + + if "Content-Type" in raw_metadata: + headers["Content-Type"] = raw_metadata["Content-Type"] + + log.info("Tika extracted text: %s", text) + + return [Document(page_content=text, metadata=headers)] + else: + raise Exception(f"Error calling Tika: {r.reason}") + + +class Loader: + def __init__(self, engine: str = "", **kwargs): + self.engine = engine + self.kwargs = kwargs + + def load( + self, filename: str, file_content_type: str, file_path: str + ) -> list[Document]: + loader = self._get_loader(filename, file_content_type, file_path) + docs = loader.load() + + return [ + Document( + page_content=ftfy.fix_text(doc.page_content), metadata=doc.metadata + ) + for doc in docs + ] + + def _get_loader(self, filename: str, file_content_type: str, file_path: str): + file_ext = filename.split(".")[-1].lower() + + if self.engine == "tika" and self.kwargs.get("TIKA_SERVER_URL"): + if file_ext in known_source_ext or ( + file_content_type and file_content_type.find("text/") >= 0 + ): + loader = TextLoader(file_path, autodetect_encoding=True) + else: + loader = TikaLoader( + url=self.kwargs.get("TIKA_SERVER_URL"), + file_path=file_path, + mime_type=file_content_type, + ) + else: + if file_ext == "pdf": + loader = PyPDFLoader( + file_path, extract_images=self.kwargs.get("PDF_EXTRACT_IMAGES") + ) + elif file_ext == "csv": + loader = CSVLoader(file_path) + elif file_ext == "rst": + loader = UnstructuredRSTLoader(file_path, mode="elements") + elif file_ext == "xml": + loader = UnstructuredXMLLoader(file_path) + elif file_ext in ["htm", "html"]: + loader = BSHTMLLoader(file_path, open_encoding="unicode_escape") + elif file_ext == "md": + loader = UnstructuredMarkdownLoader(file_path) + elif file_content_type == "application/epub+zip": + loader = UnstructuredEPubLoader(file_path) + elif ( + file_content_type + == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + or file_ext == "docx" + ): + loader = Docx2txtLoader(file_path) + elif file_content_type in [ + "application/vnd.ms-excel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ] or file_ext in ["xls", "xlsx"]: + loader = UnstructuredExcelLoader(file_path) + elif file_content_type in [ + "application/vnd.ms-powerpoint", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + ] or file_ext in ["ppt", "pptx"]: + loader = UnstructuredPowerPointLoader(file_path) + elif file_ext == "msg": + loader = OutlookMessageLoader(file_path) + elif file_ext in known_source_ext or ( + file_content_type and file_content_type.find("text/") >= 0 + ): + loader = TextLoader(file_path, autodetect_encoding=True) + else: + loader = TextLoader(file_path, autodetect_encoding=True) + + return loader diff --git a/backend/open_webui/apps/rag/main.py b/backend/open_webui/apps/retrieval/main.py similarity index 63% rename from backend/open_webui/apps/rag/main.py rename to backend/open_webui/apps/retrieval/main.py index 74855b336a..9fedd65751 100644 --- a/backend/open_webui/apps/rag/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -1,37 +1,41 @@ +# TODO: Merge this with the webui_app and make it a single app + import json import logging import mimetypes import os import shutil -import socket -import urllib.parse + import uuid from datetime import datetime from pathlib import Path from typing import Iterator, Optional, Sequence, Union - -import numpy as np -import torch -import requests -import validators - from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile, status from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel -from open_webui.apps.rag.search.main import SearchResult -from open_webui.apps.rag.search.brave import search_brave -from open_webui.apps.rag.search.duckduckgo import search_duckduckgo -from open_webui.apps.rag.search.google_pse import search_google_pse -from open_webui.apps.rag.search.jina_search import search_jina -from open_webui.apps.rag.search.searchapi import search_searchapi -from open_webui.apps.rag.search.searxng import search_searxng -from open_webui.apps.rag.search.serper import search_serper -from open_webui.apps.rag.search.serply import search_serply -from open_webui.apps.rag.search.serpstack import search_serpstack -from open_webui.apps.rag.search.tavily import search_tavily -from open_webui.apps.rag.utils import ( +from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT + +# Document loaders +from open_webui.apps.retrieval.loaders.main import Loader + +# Web search engines +from open_webui.apps.retrieval.web.main import SearchResult +from open_webui.apps.retrieval.web.utils import get_web_loader +from open_webui.apps.retrieval.web.brave import search_brave +from open_webui.apps.retrieval.web.duckduckgo import search_duckduckgo +from open_webui.apps.retrieval.web.google_pse import search_google_pse +from open_webui.apps.retrieval.web.jina_search import search_jina +from open_webui.apps.retrieval.web.searchapi import search_searchapi +from open_webui.apps.retrieval.web.searxng import search_searxng +from open_webui.apps.retrieval.web.serper import search_serper +from open_webui.apps.retrieval.web.serply import search_serply +from open_webui.apps.retrieval.web.serpstack import search_serpstack +from open_webui.apps.retrieval.web.tavily import search_tavily + + +from open_webui.apps.retrieval.utils import ( get_embedding_function, get_model_path, query_collection, @@ -39,7 +43,7 @@ from open_webui.apps.rag.utils import ( query_doc, query_doc_with_hybrid_search, ) -from open_webui.apps.webui.models.documents import DocumentForm, Documents + from open_webui.apps.webui.models.files import Files from open_webui.config import ( BRAVE_SEARCH_API_KEY, @@ -47,7 +51,6 @@ from open_webui.config import ( CHUNK_SIZE, CONTENT_EXTRACTION_ENGINE, CORS_ALLOW_ORIGIN, - DOCS_DIR, ENABLE_RAG_HYBRID_SEARCH, ENABLE_RAG_LOCAL_WEB_FETCH, ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, @@ -98,28 +101,13 @@ from open_webui.utils.misc import ( sanitize_filename, ) from open_webui.utils.utils import get_admin_user, get_verified_user -from open_webui.apps.rag.vector.connector import VECTOR_DB_CLIENT from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import ( - BSHTMLLoader, - CSVLoader, - Docx2txtLoader, - OutlookMessageLoader, - PyPDFLoader, - TextLoader, - UnstructuredEPubLoader, - UnstructuredExcelLoader, - UnstructuredMarkdownLoader, - UnstructuredPowerPointLoader, - UnstructuredRSTLoader, - UnstructuredXMLLoader, - WebBaseLoader, YoutubeLoader, ) from langchain_core.documents import Document -from colbert.infra import ColBERTConfig -from colbert.modeling.checkpoint import Checkpoint + log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -200,86 +188,12 @@ def update_reranking_model( ): if reranking_model: if any(model in reranking_model for model in ["jinaai/jina-colbert-v2"]): - - class ColBERT: - def __init__(self, name) -> None: - print("ColBERT: Loading model", name) - self.device = "cuda" if torch.cuda.is_available() else "cpu" - - if DOCKER: - # This is a workaround for the issue with the docker container - # where the torch extension is not loaded properly - # and the following error is thrown: - # /root/.cache/torch_extensions/py311_cpu/segmented_maxsim_cpp/segmented_maxsim_cpp.so: cannot open shared object file: No such file or directory - - lock_file = "/root/.cache/torch_extensions/py311_cpu/segmented_maxsim_cpp/lock" - if os.path.exists(lock_file): - os.remove(lock_file) - - self.ckpt = Checkpoint( - name, - colbert_config=ColBERTConfig(model_name=name), - ).to(self.device) - pass - - def calculate_similarity_scores( - self, query_embeddings, document_embeddings - ): - - query_embeddings = query_embeddings.to(self.device) - document_embeddings = document_embeddings.to(self.device) - - # Validate dimensions to ensure compatibility - if query_embeddings.dim() != 3: - raise ValueError( - f"Expected query embeddings to have 3 dimensions, but got {query_embeddings.dim()}." - ) - if document_embeddings.dim() != 3: - raise ValueError( - f"Expected document embeddings to have 3 dimensions, but got {document_embeddings.dim()}." - ) - if query_embeddings.size(0) not in [1, document_embeddings.size(0)]: - raise ValueError( - "There should be either one query or queries equal to the number of documents." - ) - - # Transpose the query embeddings to align for matrix multiplication - transposed_query_embeddings = query_embeddings.permute(0, 2, 1) - # Compute similarity scores using batch matrix multiplication - computed_scores = torch.matmul( - document_embeddings, transposed_query_embeddings - ) - # Apply max pooling to extract the highest semantic similarity across each document's sequence - maximum_scores = torch.max(computed_scores, dim=1).values - - # Sum up the maximum scores across features to get the overall document relevance scores - final_scores = maximum_scores.sum(dim=1) - - normalized_scores = torch.softmax(final_scores, dim=0) - - return normalized_scores.detach().cpu().numpy().astype(np.float32) - - def predict(self, sentences): - - query = sentences[0][0] - docs = [i[1] for i in sentences] - - # Embedding the documents - embedded_docs = self.ckpt.docFromText(docs, bsize=32)[0] - # Embedding the queries - embedded_queries = self.ckpt.queryFromText([query], bsize=32) - embedded_query = embedded_queries[0] - - # Calculate retrieval scores for the query against all documents - scores = self.calculate_similarity_scores( - embedded_query.unsqueeze(0), embedded_docs - ) - - return scores - try: + from open_webui.apps.retrieval.models.colbert import ColBERT + app.state.sentence_transformer_rf = ColBERT( - get_model_path(reranking_model, auto_update) + get_model_path(reranking_model, auto_update), + env="docker" if DOCKER else None, ) except Exception as e: log.error(f"ColBERT: {e}") @@ -332,10 +246,10 @@ app.add_middleware( class CollectionNameForm(BaseModel): - collection_name: Optional[str] = "test" + collection_name: Optional[str] = None -class UrlForm(CollectionNameForm): +class ProcessUrlForm(CollectionNameForm): url: str @@ -707,107 +621,317 @@ async def update_query_settings( } -class QueryDocForm(BaseModel): - collection_name: str - query: str - k: Optional[int] = None - r: Optional[float] = None - hybrid: Optional[bool] = None +#################################### +# +# Document process and retrieval +# +#################################### -@app.post("/query/doc") -def query_doc_handler( - form_data: QueryDocForm, +def save_docs_to_vector_db( + docs, + collection_name, + metadata: Optional[dict] = None, + overwrite: bool = False, + split: bool = True, + add: bool = False, +) -> bool: + log.info(f"save_docs_to_vector_db {docs} {collection_name}") + + # Check if entries with the same hash (metadata.hash) already exist + if metadata and "hash" in metadata: + result = VECTOR_DB_CLIENT.query( + collection_name=collection_name, + filter={"hash": metadata["hash"]}, + ) + + if result: + existing_doc_ids = result.ids[0] + if existing_doc_ids: + log.info(f"Document with hash {metadata['hash']} already exists") + raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT) + + if split: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=app.state.config.CHUNK_SIZE, + chunk_overlap=app.state.config.CHUNK_OVERLAP, + add_start_index=True, + ) + docs = text_splitter.split_documents(docs) + + if len(docs) == 0: + raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT) + + texts = [doc.page_content for doc in docs] + metadatas = [{**doc.metadata, **(metadata if metadata else {})} for doc in docs] + + # ChromaDB does not like datetime formats + # for meta-data so convert them to string. + for metadata in metadatas: + for key, value in metadata.items(): + if isinstance(value, datetime): + metadata[key] = str(value) + + try: + if VECTOR_DB_CLIENT.has_collection(collection_name=collection_name): + log.info(f"collection {collection_name} already exists") + + if overwrite: + VECTOR_DB_CLIENT.delete_collection(collection_name=collection_name) + log.info(f"deleting existing collection {collection_name}") + + if add is False: + return True + + log.info(f"adding to collection {collection_name}") + embedding_function = get_embedding_function( + app.state.config.RAG_EMBEDDING_ENGINE, + app.state.config.RAG_EMBEDDING_MODEL, + app.state.sentence_transformer_ef, + app.state.config.OPENAI_API_KEY, + app.state.config.OPENAI_API_BASE_URL, + app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE, + ) + + embeddings = embedding_function( + list(map(lambda x: x.replace("\n", " "), texts)) + ) + + items = [ + { + "id": str(uuid.uuid4()), + "text": text, + "vector": embeddings[idx], + "metadata": metadatas[idx], + } + for idx, text in enumerate(texts) + ] + + VECTOR_DB_CLIENT.insert( + collection_name=collection_name, + items=items, + ) + + return True + except Exception as e: + log.exception(e) + return False + + +class ProcessFileForm(BaseModel): + file_id: str + content: Optional[str] = None + collection_name: Optional[str] = None + + +@app.post("/process/file") +def process_file( + form_data: ProcessFileForm, user=Depends(get_verified_user), ): try: - if app.state.config.ENABLE_RAG_HYBRID_SEARCH: - return query_doc_with_hybrid_search( - collection_name=form_data.collection_name, - query=form_data.query, - embedding_function=app.state.EMBEDDING_FUNCTION, - k=form_data.k if form_data.k else app.state.config.TOP_K, - reranking_function=app.state.sentence_transformer_rf, - r=( - form_data.r if form_data.r else app.state.config.RELEVANCE_THRESHOLD - ), + file = Files.get_file_by_id(form_data.file_id) + + collection_name = form_data.collection_name + + if collection_name is None: + collection_name = f"file-{file.id}" + + if form_data.content: + # Update the content in the file + # Usage: /files/{file_id}/data/content/update + + VECTOR_DB_CLIENT.delete( + collection_name=f"file-{file.id}", + filter={"file_id": file.id}, ) + + docs = [ + Document( + page_content=form_data.content, + metadata={ + "name": file.meta.get("name", file.filename), + "created_by": file.user_id, + "file_id": file.id, + **file.meta, + }, + ) + ] + + text_content = form_data.content + elif form_data.collection_name: + # Check if the file has already been processed and save the content + # Usage: /knowledge/{id}/file/add, /knowledge/{id}/file/update + + result = VECTOR_DB_CLIENT.query( + collection_name=f"file-{file.id}", filter={"file_id": file.id} + ) + + if len(result.ids[0]) > 0: + docs = [ + Document( + page_content=result.documents[0][idx], + metadata=result.metadatas[0][idx], + ) + for idx, id in enumerate(result.ids[0]) + ] + else: + docs = [ + Document( + page_content=file.data.get("content", ""), + metadata={ + "name": file.meta.get("name", file.filename), + "created_by": file.user_id, + "file_id": file.id, + **file.meta, + }, + ) + ] + + text_content = file.data.get("content", "") else: - return query_doc( - collection_name=form_data.collection_name, - query=form_data.query, - embedding_function=app.state.EMBEDDING_FUNCTION, - k=form_data.k if form_data.k else app.state.config.TOP_K, - ) - except Exception as e: - log.exception(e) - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.DEFAULT(e), + # Process the file and save the content + # Usage: /files/ + + file_path = file.meta.get("path", None) + if file_path: + loader = Loader( + engine=app.state.config.CONTENT_EXTRACTION_ENGINE, + TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL, + PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES, + ) + + docs = loader.load( + file.filename, file.meta.get("content_type"), file_path + ) + else: + docs = [ + Document( + page_content=file.data.get("content", ""), + metadata={ + "name": file.filename, + "created_by": file.user_id, + "file_id": file.id, + **file.meta, + }, + ) + ] + + text_content = " ".join([doc.page_content for doc in docs]) + + log.debug(f"text_content: {text_content}") + Files.update_file_data_by_id( + file.id, + {"content": text_content}, ) + hash = calculate_sha256_string(text_content) + Files.update_file_hash_by_id(file.id, hash) -class QueryCollectionsForm(BaseModel): - collection_names: list[str] - query: str - k: Optional[int] = None - r: Optional[float] = None - hybrid: Optional[bool] = None + try: + result = save_docs_to_vector_db( + docs=docs, + collection_name=collection_name, + metadata={ + "file_id": file.id, + "name": file.meta.get("name", file.filename), + "hash": hash, + }, + add=(True if form_data.collection_name else False), + ) + + if result: + Files.update_file_metadata_by_id( + file.id, + { + "collection_name": collection_name, + }, + ) + + return { + "status": True, + "collection_name": collection_name, + "filename": file.meta.get("name", file.filename), + "content": text_content, + } + except Exception as e: + raise e + except Exception as e: + log.exception(e) + if "No pandoc was found" in str(e): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED, + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) -@app.post("/query/collection") -def query_collection_handler( - form_data: QueryCollectionsForm, +class ProcessTextForm(BaseModel): + name: str + content: str + collection_name: Optional[str] = None + + +@app.post("/process/text") +def process_text( + form_data: ProcessTextForm, user=Depends(get_verified_user), ): - try: - if app.state.config.ENABLE_RAG_HYBRID_SEARCH: - return query_collection_with_hybrid_search( - collection_names=form_data.collection_names, - query=form_data.query, - embedding_function=app.state.EMBEDDING_FUNCTION, - k=form_data.k if form_data.k else app.state.config.TOP_K, - reranking_function=app.state.sentence_transformer_rf, - r=( - form_data.r if form_data.r else app.state.config.RELEVANCE_THRESHOLD - ), - ) - else: - return query_collection( - collection_names=form_data.collection_names, - query=form_data.query, - embedding_function=app.state.EMBEDDING_FUNCTION, - k=form_data.k if form_data.k else app.state.config.TOP_K, - ) + collection_name = form_data.collection_name + if collection_name is None: + collection_name = calculate_sha256_string(form_data.content) - except Exception as e: - log.exception(e) + docs = [ + Document( + page_content=form_data.content, + metadata={"name": form_data.name, "created_by": user.id}, + ) + ] + text_content = form_data.content + log.debug(f"text_content: {text_content}") + + result = save_docs_to_vector_db(docs, collection_name) + + if result: + return { + "status": True, + "collection_name": collection_name, + "content": text_content, + } + else: raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.DEFAULT(e), + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=ERROR_MESSAGES.DEFAULT(), ) -@app.post("/youtube") -def store_youtube_video(form_data: UrlForm, user=Depends(get_verified_user)): +@app.post("/process/youtube") +def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_user)): try: + collection_name = form_data.collection_name + if not collection_name: + collection_name = calculate_sha256_string(form_data.url)[:63] + loader = YoutubeLoader.from_youtube_url( form_data.url, add_video_info=True, language=app.state.config.YOUTUBE_LOADER_LANGUAGE, translation=app.state.YOUTUBE_LOADER_TRANSLATION, ) - data = loader.load() + docs = loader.load() + text_content = " ".join([doc.page_content for doc in docs]) + log.debug(f"text_content: {text_content}") + save_docs_to_vector_db(docs, collection_name, overwrite=True) - collection_name = form_data.collection_name - if collection_name == "": - collection_name = calculate_sha256_string(form_data.url)[:63] - - store_data_in_vector_db(data, collection_name, overwrite=True) return { "status": True, "collection_name": collection_name, "filename": form_data.url, + "content": text_content, } except Exception as e: log.exception(e) @@ -817,25 +941,28 @@ def store_youtube_video(form_data: UrlForm, user=Depends(get_verified_user)): ) -@app.post("/web") -def store_web(form_data: UrlForm, user=Depends(get_verified_user)): - # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" +@app.post("/process/web") +def process_web(form_data: ProcessUrlForm, user=Depends(get_verified_user)): try: + collection_name = form_data.collection_name + if not collection_name: + collection_name = calculate_sha256_string(form_data.url)[:63] + loader = get_web_loader( form_data.url, verify_ssl=app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, + requests_per_second=app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, ) - data = loader.load() + docs = loader.load() + text_content = " ".join([doc.page_content for doc in docs]) + log.debug(f"text_content: {text_content}") + save_docs_to_vector_db(docs, collection_name, overwrite=True) - collection_name = form_data.collection_name - if collection_name == "": - collection_name = calculate_sha256_string(form_data.url)[:63] - - store_data_in_vector_db(data, collection_name, overwrite=True) return { "status": True, "collection_name": collection_name, "filename": form_data.url, + "content": text_content, } except Exception as e: log.exception(e) @@ -845,53 +972,6 @@ def store_web(form_data: UrlForm, user=Depends(get_verified_user)): ) -def get_web_loader(url: Union[str, Sequence[str]], verify_ssl: bool = True): - # Check if the URL is valid - if not validate_url(url): - raise ValueError(ERROR_MESSAGES.INVALID_URL) - return SafeWebBaseLoader( - url, - verify_ssl=verify_ssl, - requests_per_second=RAG_WEB_SEARCH_CONCURRENT_REQUESTS, - continue_on_failure=True, - ) - - -def validate_url(url: Union[str, Sequence[str]]): - if isinstance(url, str): - if isinstance(validators.url(url), validators.ValidationError): - raise ValueError(ERROR_MESSAGES.INVALID_URL) - if not ENABLE_RAG_LOCAL_WEB_FETCH: - # Local web fetch is disabled, filter out any URLs that resolve to private IP addresses - parsed_url = urllib.parse.urlparse(url) - # Get IPv4 and IPv6 addresses - ipv4_addresses, ipv6_addresses = resolve_hostname(parsed_url.hostname) - # Check if any of the resolved addresses are private - # This is technically still vulnerable to DNS rebinding attacks, as we don't control WebBaseLoader - for ip in ipv4_addresses: - if validators.ipv4(ip, private=True): - raise ValueError(ERROR_MESSAGES.INVALID_URL) - for ip in ipv6_addresses: - if validators.ipv6(ip, private=True): - raise ValueError(ERROR_MESSAGES.INVALID_URL) - return True - elif isinstance(url, Sequence): - return all(validate_url(u) for u in url) - else: - return False - - -def resolve_hostname(hostname): - # Get address information - addr_info = socket.getaddrinfo(hostname, None) - - # Extract IP addresses from address information - ipv4_addresses = [info[4][0] for info in addr_info if info[0] == socket.AF_INET] - ipv6_addresses = [info[4][0] for info in addr_info if info[0] == socket.AF_INET6] - - return ipv4_addresses, ipv6_addresses - - def search_web(engine: str, query: str) -> list[SearchResult]: """Search the web using a search engine and return the results as a list of SearchResult objects. Will look for a search engine API key in environment variables in the following order: @@ -1007,8 +1087,8 @@ def search_web(engine: str, query: str) -> list[SearchResult]: raise Exception("No search engine API key found in environment variables") -@app.post("/web/search") -def store_web_search(form_data: SearchForm, user=Depends(get_verified_user)): +@app.post("/process/web/search") +def process_web_search(form_data: SearchForm, user=Depends(get_verified_user)): try: logging.info( f"trying to web search with {app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query}" @@ -1026,15 +1106,17 @@ def store_web_search(form_data: SearchForm, user=Depends(get_verified_user)): ) try: - urls = [result.link for result in web_results] - loader = get_web_loader(urls) - data = loader.load() - collection_name = form_data.collection_name if collection_name == "": collection_name = calculate_sha256_string(form_data.query)[:63] - store_data_in_vector_db(data, collection_name, overwrite=True) + urls = [result.link for result in web_results] + + loader = get_web_loader(urls) + docs = loader.load() + + save_docs_to_vector_db(docs, collection_name, overwrite=True) + return { "status": True, "collection_name": collection_name, @@ -1048,445 +1130,116 @@ def store_web_search(form_data: SearchForm, user=Depends(get_verified_user)): ) -def store_data_in_vector_db( - data, collection_name, metadata: Optional[dict] = None, overwrite: bool = False -) -> bool: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=app.state.config.CHUNK_SIZE, - chunk_overlap=app.state.config.CHUNK_OVERLAP, - add_start_index=True, - ) - - docs = text_splitter.split_documents(data) - - if len(docs) > 0: - log.info(f"store_data_in_vector_db {docs}") - return store_docs_in_vector_db(docs, collection_name, metadata, overwrite), None - else: - raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT) +class QueryDocForm(BaseModel): + collection_name: str + query: str + k: Optional[int] = None + r: Optional[float] = None + hybrid: Optional[bool] = None -def store_text_in_vector_db( - text, metadata, collection_name, overwrite: bool = False -) -> bool: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=app.state.config.CHUNK_SIZE, - chunk_overlap=app.state.config.CHUNK_OVERLAP, - add_start_index=True, - ) - docs = text_splitter.create_documents([text], metadatas=[metadata]) - return store_docs_in_vector_db(docs, collection_name, overwrite=overwrite) - - -def store_docs_in_vector_db( - docs, collection_name, metadata: Optional[dict] = None, overwrite: bool = False -) -> bool: - log.info(f"store_docs_in_vector_db {docs} {collection_name}") - - texts = [doc.page_content for doc in docs] - metadatas = [{**doc.metadata, **(metadata if metadata else {})} for doc in docs] - - # ChromaDB does not like datetime formats - # for meta-data so convert them to string. - for metadata in metadatas: - for key, value in metadata.items(): - if isinstance(value, datetime): - metadata[key] = str(value) - - try: - if overwrite: - if VECTOR_DB_CLIENT.has_collection(collection_name=collection_name): - log.info(f"deleting existing collection {collection_name}") - VECTOR_DB_CLIENT.delete_collection(collection_name=collection_name) - - if VECTOR_DB_CLIENT.has_collection(collection_name=collection_name): - log.info(f"collection {collection_name} already exists") - return True - else: - embedding_function = get_embedding_function( - app.state.config.RAG_EMBEDDING_ENGINE, - app.state.config.RAG_EMBEDDING_MODEL, - app.state.sentence_transformer_ef, - app.state.config.OPENAI_API_KEY, - app.state.config.OPENAI_API_BASE_URL, - app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE, - ) - - VECTOR_DB_CLIENT.insert( - collection_name=collection_name, - items=[ - { - "id": str(uuid.uuid4()), - "text": text, - "vector": embedding_function(text.replace("\n", " ")), - "metadata": metadatas[idx], - } - for idx, text in enumerate(texts) - ], - ) - - return True - except Exception as e: - log.exception(e) - return False - - -class TikaLoader: - def __init__(self, file_path, mime_type=None): - self.file_path = file_path - self.mime_type = mime_type - - def load(self) -> list[Document]: - with open(self.file_path, "rb") as f: - data = f.read() - - if self.mime_type is not None: - headers = {"Content-Type": self.mime_type} - else: - headers = {} - - endpoint = app.state.config.TIKA_SERVER_URL - if not endpoint.endswith("/"): - endpoint += "/" - endpoint += "tika/text" - - r = requests.put(endpoint, data=data, headers=headers) - - if r.ok: - raw_metadata = r.json() - text = raw_metadata.get("X-TIKA:content", "") - - if "Content-Type" in raw_metadata: - headers["Content-Type"] = raw_metadata["Content-Type"] - - log.info("Tika extracted text: %s", text) - - return [Document(page_content=text, metadata=headers)] - else: - raise Exception(f"Error calling Tika: {r.reason}") - - -def get_loader(filename: str, file_content_type: str, file_path: str): - file_ext = filename.split(".")[-1].lower() - known_type = True - - known_source_ext = [ - "go", - "py", - "java", - "sh", - "bat", - "ps1", - "cmd", - "js", - "ts", - "css", - "cpp", - "hpp", - "h", - "c", - "cs", - "sql", - "log", - "ini", - "pl", - "pm", - "r", - "dart", - "dockerfile", - "env", - "php", - "hs", - "hsc", - "lua", - "nginxconf", - "conf", - "m", - "mm", - "plsql", - "perl", - "rb", - "rs", - "db2", - "scala", - "bash", - "swift", - "vue", - "svelte", - "msg", - "ex", - "exs", - "erl", - "tsx", - "jsx", - "hs", - "lhs", - ] - - if ( - app.state.config.CONTENT_EXTRACTION_ENGINE == "tika" - and app.state.config.TIKA_SERVER_URL - ): - if file_ext in known_source_ext or ( - file_content_type and file_content_type.find("text/") >= 0 - ): - loader = TextLoader(file_path, autodetect_encoding=True) - else: - loader = TikaLoader(file_path, file_content_type) - else: - if file_ext == "pdf": - loader = PyPDFLoader( - file_path, extract_images=app.state.config.PDF_EXTRACT_IMAGES - ) - elif file_ext == "csv": - loader = CSVLoader(file_path) - elif file_ext == "rst": - loader = UnstructuredRSTLoader(file_path, mode="elements") - elif file_ext == "xml": - loader = UnstructuredXMLLoader(file_path) - elif file_ext in ["htm", "html"]: - loader = BSHTMLLoader(file_path, open_encoding="unicode_escape") - elif file_ext == "md": - loader = UnstructuredMarkdownLoader(file_path) - elif file_content_type == "application/epub+zip": - loader = UnstructuredEPubLoader(file_path) - elif ( - file_content_type - == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" - or file_ext == "docx" - ): - loader = Docx2txtLoader(file_path) - elif file_content_type in [ - "application/vnd.ms-excel", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - ] or file_ext in ["xls", "xlsx"]: - loader = UnstructuredExcelLoader(file_path) - elif file_content_type in [ - "application/vnd.ms-powerpoint", - "application/vnd.openxmlformats-officedocument.presentationml.presentation", - ] or file_ext in ["ppt", "pptx"]: - loader = UnstructuredPowerPointLoader(file_path) - elif file_ext == "msg": - loader = OutlookMessageLoader(file_path) - elif file_ext in known_source_ext or ( - file_content_type and file_content_type.find("text/") >= 0 - ): - loader = TextLoader(file_path, autodetect_encoding=True) - else: - loader = TextLoader(file_path, autodetect_encoding=True) - known_type = False - - return loader, known_type - - -@app.post("/doc") -def store_doc( - collection_name: Optional[str] = Form(None), - file: UploadFile = File(...), - user=Depends(get_verified_user), -): - # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" - - log.info(f"file.content_type: {file.content_type}") - try: - unsanitized_filename = file.filename - filename = os.path.basename(unsanitized_filename) - - file_path = f"{UPLOAD_DIR}/{filename}" - - contents = file.file.read() - with open(file_path, "wb") as f: - f.write(contents) - f.close() - - f = open(file_path, "rb") - if collection_name is None: - collection_name = calculate_sha256(f)[:63] - f.close() - - loader, known_type = get_loader(filename, file.content_type, file_path) - data = loader.load() - - try: - result = store_data_in_vector_db(data, collection_name) - - if result: - return { - "status": True, - "collection_name": collection_name, - "filename": filename, - "known_type": known_type, - } - except Exception as e: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=e, - ) - except Exception as e: - log.exception(e) - if "No pandoc was found" in str(e): - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED, - ) - else: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.DEFAULT(e), - ) - - -class ProcessDocForm(BaseModel): - file_id: str - collection_name: Optional[str] = None - - -@app.post("/process/doc") -def process_doc( - form_data: ProcessDocForm, +@app.post("/query/doc") +def query_doc_handler( + form_data: QueryDocForm, user=Depends(get_verified_user), ): try: - file = Files.get_file_by_id(form_data.file_id) - file_path = file.meta.get("path", f"{UPLOAD_DIR}/{file.filename}") - - f = open(file_path, "rb") - - collection_name = form_data.collection_name - if collection_name is None: - collection_name = calculate_sha256(f)[:63] - f.close() - - loader, known_type = get_loader( - file.filename, file.meta.get("content_type"), file_path - ) - data = loader.load() - - try: - result = store_data_in_vector_db( - data, - collection_name, - { - "file_id": form_data.file_id, - "name": file.meta.get("name", file.filename), - }, + if app.state.config.ENABLE_RAG_HYBRID_SEARCH: + return query_doc_with_hybrid_search( + collection_name=form_data.collection_name, + query=form_data.query, + embedding_function=app.state.EMBEDDING_FUNCTION, + k=form_data.k if form_data.k else app.state.config.TOP_K, + reranking_function=app.state.sentence_transformer_rf, + r=( + form_data.r if form_data.r else app.state.config.RELEVANCE_THRESHOLD + ), ) - - if result: - return { - "status": True, - "collection_name": collection_name, - "known_type": known_type, - "filename": file.meta.get("name", file.filename), - } - except Exception as e: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=e, + else: + return query_doc( + collection_name=form_data.collection_name, + query=form_data.query, + embedding_function=app.state.EMBEDDING_FUNCTION, + k=form_data.k if form_data.k else app.state.config.TOP_K, ) except Exception as e: log.exception(e) - if "No pandoc was found" in str(e): - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED, - ) - else: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.DEFAULT(e), - ) - - -class TextRAGForm(BaseModel): - name: str - content: str - collection_name: Optional[str] = None - - -@app.post("/text") -def store_text( - form_data: TextRAGForm, - user=Depends(get_verified_user), -): - collection_name = form_data.collection_name - if collection_name is None: - collection_name = calculate_sha256_string(form_data.content) - - result = store_text_in_vector_db( - form_data.content, - metadata={"name": form_data.name, "created_by": user.id}, - collection_name=collection_name, - ) - - if result: - return {"status": True, "collection_name": collection_name} - else: raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=ERROR_MESSAGES.DEFAULT(), + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT(e), ) -@app.get("/scan") -def scan_docs_dir(user=Depends(get_admin_user)): - for path in Path(DOCS_DIR).rglob("./**/*"): - try: - if path.is_file() and not path.name.startswith("."): - tags = extract_folders_after_data_docs(path) - filename = path.name - file_content_type = mimetypes.guess_type(path) +class QueryCollectionsForm(BaseModel): + collection_names: list[str] + query: str + k: Optional[int] = None + r: Optional[float] = None + hybrid: Optional[bool] = None - f = open(path, "rb") - collection_name = calculate_sha256(f)[:63] - f.close() - loader, known_type = get_loader( - filename, file_content_type[0], str(path) - ) - data = loader.load() +@app.post("/query/collection") +def query_collection_handler( + form_data: QueryCollectionsForm, + user=Depends(get_verified_user), +): + try: + if app.state.config.ENABLE_RAG_HYBRID_SEARCH: + return query_collection_with_hybrid_search( + collection_names=form_data.collection_names, + query=form_data.query, + embedding_function=app.state.EMBEDDING_FUNCTION, + k=form_data.k if form_data.k else app.state.config.TOP_K, + reranking_function=app.state.sentence_transformer_rf, + r=( + form_data.r if form_data.r else app.state.config.RELEVANCE_THRESHOLD + ), + ) + else: + return query_collection( + collection_names=form_data.collection_names, + query=form_data.query, + embedding_function=app.state.EMBEDDING_FUNCTION, + k=form_data.k if form_data.k else app.state.config.TOP_K, + ) - try: - result = store_data_in_vector_db(data, collection_name) + except Exception as e: + log.exception(e) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT(e), + ) - if result: - sanitized_filename = sanitize_filename(filename) - doc = Documents.get_doc_by_name(sanitized_filename) - if doc is None: - doc = Documents.insert_new_doc( - user.id, - DocumentForm( - **{ - "name": sanitized_filename, - "title": filename, - "collection_name": collection_name, - "filename": filename, - "content": ( - json.dumps( - { - "tags": list( - map( - lambda name: {"name": name}, - tags, - ) - ) - } - ) - if len(tags) - else "{}" - ), - } - ), - ) - except Exception as e: - log.exception(e) - pass +#################################### +# +# Vector DB operations +# +#################################### - except Exception as e: - log.exception(e) - return True +class DeleteForm(BaseModel): + collection_name: str + file_id: str + + +@app.post("/delete") +def delete_entries_from_collection(form_data: DeleteForm, user=Depends(get_admin_user)): + try: + if VECTOR_DB_CLIENT.has_collection(collection_name=form_data.collection_name): + file = Files.get_file_by_id(form_data.file_id) + hash = file.hash + + VECTOR_DB_CLIENT.delete( + collection_name=form_data.collection_name, + metadata={"hash": hash}, + ) + return {"status": True} + else: + return {"status": False} + except Exception as e: + log.exception(e) + return {"status": False} @app.post("/reset/db") @@ -1539,33 +1292,6 @@ def reset(user=Depends(get_admin_user)) -> bool: return True -class SafeWebBaseLoader(WebBaseLoader): - """WebBaseLoader with enhanced error handling for URLs.""" - - def lazy_load(self) -> Iterator[Document]: - """Lazy load text from the url(s) in web_path with error handling.""" - for path in self.web_paths: - try: - soup = self._scrape(path, bs_kwargs=self.bs_kwargs) - text = soup.get_text(**self.bs_get_text_kwargs) - - # Build metadata - metadata = {"source": path} - if title := soup.find("title"): - metadata["title"] = title.get_text() - if description := soup.find("meta", attrs={"name": "description"}): - metadata["description"] = description.get( - "content", "No description found." - ) - if html := soup.find("html"): - metadata["language"] = html.get("lang", "No language found.") - - yield Document(page_content=text, metadata=metadata) - except Exception as e: - # Log the error and continue with the next URL - log.error(f"Error loading {path}: {e}") - - if ENV == "dev": @app.get("/ef") diff --git a/backend/open_webui/apps/retrieval/models/colbert.py b/backend/open_webui/apps/retrieval/models/colbert.py new file mode 100644 index 0000000000..ea3204cb8b --- /dev/null +++ b/backend/open_webui/apps/retrieval/models/colbert.py @@ -0,0 +1,81 @@ +import os +import torch +import numpy as np +from colbert.infra import ColBERTConfig +from colbert.modeling.checkpoint import Checkpoint + + +class ColBERT: + def __init__(self, name, **kwargs) -> None: + print("ColBERT: Loading model", name) + self.device = "cuda" if torch.cuda.is_available() else "cpu" + + DOCKER = kwargs.get("env") == "docker" + if DOCKER: + # This is a workaround for the issue with the docker container + # where the torch extension is not loaded properly + # and the following error is thrown: + # /root/.cache/torch_extensions/py311_cpu/segmented_maxsim_cpp/segmented_maxsim_cpp.so: cannot open shared object file: No such file or directory + + lock_file = ( + "/root/.cache/torch_extensions/py311_cpu/segmented_maxsim_cpp/lock" + ) + if os.path.exists(lock_file): + os.remove(lock_file) + + self.ckpt = Checkpoint( + name, + colbert_config=ColBERTConfig(model_name=name), + ).to(self.device) + pass + + def calculate_similarity_scores(self, query_embeddings, document_embeddings): + + query_embeddings = query_embeddings.to(self.device) + document_embeddings = document_embeddings.to(self.device) + + # Validate dimensions to ensure compatibility + if query_embeddings.dim() != 3: + raise ValueError( + f"Expected query embeddings to have 3 dimensions, but got {query_embeddings.dim()}." + ) + if document_embeddings.dim() != 3: + raise ValueError( + f"Expected document embeddings to have 3 dimensions, but got {document_embeddings.dim()}." + ) + if query_embeddings.size(0) not in [1, document_embeddings.size(0)]: + raise ValueError( + "There should be either one query or queries equal to the number of documents." + ) + + # Transpose the query embeddings to align for matrix multiplication + transposed_query_embeddings = query_embeddings.permute(0, 2, 1) + # Compute similarity scores using batch matrix multiplication + computed_scores = torch.matmul(document_embeddings, transposed_query_embeddings) + # Apply max pooling to extract the highest semantic similarity across each document's sequence + maximum_scores = torch.max(computed_scores, dim=1).values + + # Sum up the maximum scores across features to get the overall document relevance scores + final_scores = maximum_scores.sum(dim=1) + + normalized_scores = torch.softmax(final_scores, dim=0) + + return normalized_scores.detach().cpu().numpy().astype(np.float32) + + def predict(self, sentences): + + query = sentences[0][0] + docs = [i[1] for i in sentences] + + # Embedding the documents + embedded_docs = self.ckpt.docFromText(docs, bsize=32)[0] + # Embedding the queries + embedded_queries = self.ckpt.queryFromText([query], bsize=32) + embedded_query = embedded_queries[0] + + # Calculate retrieval scores for the query against all documents + scores = self.calculate_similarity_scores( + embedded_query.unsqueeze(0), embedded_docs + ) + + return scores diff --git a/backend/open_webui/apps/rag/utils.py b/backend/open_webui/apps/retrieval/utils.py similarity index 84% rename from backend/open_webui/apps/rag/utils.py rename to backend/open_webui/apps/retrieval/utils.py index 73ccfad387..0fe206c966 100644 --- a/backend/open_webui/apps/rag/utils.py +++ b/backend/open_webui/apps/retrieval/utils.py @@ -15,7 +15,7 @@ from open_webui.apps.ollama.main import ( GenerateEmbeddingsForm, generate_ollama_embeddings, ) -from open_webui.apps.rag.vector.connector import VECTOR_DB_CLIENT +from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT from open_webui.utils.misc import get_last_user_message from open_webui.env import SRC_LOG_LEVELS @@ -65,19 +65,16 @@ class VectorSearchRetriever(BaseRetriever): def query_doc( collection_name: str, - query: str, - embedding_function, + query_embedding: list[float], k: int, ): try: result = VECTOR_DB_CLIENT.search( collection_name=collection_name, - vectors=[embedding_function(query)], + vectors=[query_embedding], limit=k, ) - print("result", result) - log.info(f"query_doc:result {result}") return result except Exception as e: @@ -184,15 +181,17 @@ def query_collection( embedding_function, k: int, ) -> dict: + results = [] + query_embedding = embedding_function(query) + for collection_name in collection_names: if collection_name: try: result = query_doc( collection_name=collection_name, - query=query, k=k, - embedding_function=embedding_function, + query_embedding=query_embedding, ) results.append(result.model_dump()) except Exception as e: @@ -319,58 +318,71 @@ def get_rag_context( relevant_contexts = [] for file in files: - context = None - - collection_names = ( - file["collection_names"] - if file["type"] == "collection" - else [file["collection_name"]] if file["collection_name"] else [] - ) - - collection_names = set(collection_names).difference(extracted_collections) - if not collection_names: - log.debug(f"skipping {file} as it has already been extracted") - continue - - try: + if file.get("context") == "full": + context = { + "documents": [[file.get("file").get("data", {}).get("content")]], + "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]], + } + else: context = None - if file["type"] == "text": - context = file["content"] - else: - if hybrid_search: - try: - context = query_collection_with_hybrid_search( + + collection_names = [] + if file.get("type") == "collection": + if file.get("legacy"): + collection_names = file.get("collection_names", []) + else: + collection_names.append(file["id"]) + elif file.get("collection_name"): + collection_names.append(file["collection_name"]) + elif file.get("id"): + if file.get("legacy"): + collection_names.append(f"{file['id']}") + else: + collection_names.append(f"file-{file['id']}") + + collection_names = set(collection_names).difference(extracted_collections) + if not collection_names: + log.debug(f"skipping {file} as it has already been extracted") + continue + + try: + context = None + if file.get("type") == "text": + context = file["content"] + else: + if hybrid_search: + try: + context = query_collection_with_hybrid_search( + collection_names=collection_names, + query=query, + embedding_function=embedding_function, + k=k, + reranking_function=reranking_function, + r=r, + ) + except Exception as e: + log.debug( + "Error when using hybrid search, using" + " non hybrid search as fallback." + ) + + if (not hybrid_search) or (context is None): + context = query_collection( collection_names=collection_names, query=query, embedding_function=embedding_function, k=k, - reranking_function=reranking_function, - r=r, - ) - except Exception as e: - log.debug( - "Error when using hybrid search, using" - " non hybrid search as fallback." ) + except Exception as e: + log.exception(e) - if (not hybrid_search) or (context is None): - context = query_collection( - collection_names=collection_names, - query=query, - embedding_function=embedding_function, - k=k, - ) - except Exception as e: - log.exception(e) + extracted_collections.extend(collection_names) if context: - relevant_contexts.append({**context, "source": file}) - - extracted_collections.extend(collection_names) + relevant_contexts.append({**context, "file": file}) contexts = [] citations = [] - for context in relevant_contexts: try: if "documents" in context: @@ -383,7 +395,7 @@ def get_rag_context( if "metadatas" in context: citations.append( { - "source": context["source"], + "source": context["file"], "document": context["documents"][0], "metadata": context["metadatas"][0], } diff --git a/backend/open_webui/apps/rag/vector/connector.py b/backend/open_webui/apps/retrieval/vector/connector.py similarity index 50% rename from backend/open_webui/apps/rag/vector/connector.py rename to backend/open_webui/apps/retrieval/vector/connector.py index 073becdbeb..1f33b17219 100644 --- a/backend/open_webui/apps/rag/vector/connector.py +++ b/backend/open_webui/apps/retrieval/vector/connector.py @@ -1,10 +1,10 @@ -from open_webui.apps.rag.vector.dbs.chroma import ChromaClient -from open_webui.apps.rag.vector.dbs.milvus import MilvusClient - - from open_webui.config import VECTOR_DB if VECTOR_DB == "milvus": + from open_webui.apps.retrieval.vector.dbs.milvus import MilvusClient + VECTOR_DB_CLIENT = MilvusClient() else: + from open_webui.apps.retrieval.vector.dbs.chroma import ChromaClient + VECTOR_DB_CLIENT = ChromaClient() diff --git a/backend/open_webui/apps/rag/vector/dbs/chroma.py b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py similarity index 67% rename from backend/open_webui/apps/rag/vector/dbs/chroma.py rename to backend/open_webui/apps/retrieval/vector/dbs/chroma.py index 5f94201087..84f80b2531 100644 --- a/backend/open_webui/apps/rag/vector/dbs/chroma.py +++ b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py @@ -4,7 +4,7 @@ from chromadb.utils.batch_utils import create_batches from typing import Optional -from open_webui.apps.rag.vector.main import VectorItem, SearchResult, GetResult +from open_webui.apps.retrieval.vector.main import VectorItem, SearchResult, GetResult from open_webui.config import ( CHROMA_DATA_PATH, CHROMA_HTTP_HOST, @@ -49,22 +49,49 @@ class ChromaClient: self, collection_name: str, vectors: list[list[float | int]], limit: int ) -> Optional[SearchResult]: # Search for the nearest neighbor items based on the vectors and return 'limit' number of results. - collection = self.client.get_collection(name=collection_name) - if collection: - result = collection.query( - query_embeddings=vectors, - n_results=limit, - ) + try: + collection = self.client.get_collection(name=collection_name) + if collection: + result = collection.query( + query_embeddings=vectors, + n_results=limit, + ) - return SearchResult( - **{ - "ids": result["ids"], - "distances": result["distances"], - "documents": result["documents"], - "metadatas": result["metadatas"], - } - ) - return None + return SearchResult( + **{ + "ids": result["ids"], + "distances": result["distances"], + "documents": result["documents"], + "metadatas": result["metadatas"], + } + ) + return None + except Exception as e: + return None + + def query( + self, collection_name: str, filter: dict, limit: Optional[int] = None + ) -> Optional[GetResult]: + # Query the items from the collection based on the filter. + try: + collection = self.client.get_collection(name=collection_name) + if collection: + result = collection.get( + where=filter, + limit=limit, + ) + + return GetResult( + **{ + "ids": [result["ids"]], + "documents": [result["documents"]], + "metadatas": [result["metadatas"]], + } + ) + return None + except Exception as e: + print(e) + return None def get(self, collection_name: str) -> Optional[GetResult]: # Get all the items in the collection. @@ -111,11 +138,19 @@ class ChromaClient: ids=ids, documents=documents, embeddings=embeddings, metadatas=metadatas ) - def delete(self, collection_name: str, ids: list[str]): + def delete( + self, + collection_name: str, + ids: Optional[list[str]] = None, + filter: Optional[dict] = None, + ): # Delete the items from the collection based on the ids. collection = self.client.get_collection(name=collection_name) if collection: - collection.delete(ids=ids) + if ids: + collection.delete(ids=ids) + elif filter: + collection.delete(where=filter) def reset(self): # Resets the database. This will delete all collections and item entries. diff --git a/backend/open_webui/apps/rag/vector/dbs/milvus.py b/backend/open_webui/apps/retrieval/vector/dbs/milvus.py similarity index 64% rename from backend/open_webui/apps/rag/vector/dbs/milvus.py rename to backend/open_webui/apps/retrieval/vector/dbs/milvus.py index f205b95215..5351f860e0 100644 --- a/backend/open_webui/apps/rag/vector/dbs/milvus.py +++ b/backend/open_webui/apps/retrieval/vector/dbs/milvus.py @@ -4,7 +4,7 @@ import json from typing import Optional -from open_webui.apps.rag.vector.main import VectorItem, SearchResult, GetResult +from open_webui.apps.retrieval.vector.main import VectorItem, SearchResult, GetResult from open_webui.config import ( MILVUS_URI, ) @@ -16,8 +16,6 @@ class MilvusClient: self.client = Client(uri=MILVUS_URI) def _result_to_get_result(self, result) -> GetResult: - print(result) - ids = [] documents = [] metadatas = [] @@ -26,7 +24,6 @@ class MilvusClient: _ids = [] _documents = [] _metadatas = [] - for item in match: _ids.append(item.get("id")) _documents.append(item.get("data", {}).get("text")) @@ -45,8 +42,6 @@ class MilvusClient: ) def _result_to_search_result(self, result) -> SearchResult: - print(result) - ids = [] distances = [] documents = [] @@ -102,7 +97,10 @@ class MilvusClient: index_params = self.client.prepare_index_params() index_params.add_index( - field_name="vector", index_type="HNSW", metric_type="COSINE", params={} + field_name="vector", + index_type="HNSW", + metric_type="COSINE", + params={"M": 16, "efConstruction": 100}, ) self.client.create_collection( @@ -113,12 +111,14 @@ class MilvusClient: def has_collection(self, collection_name: str) -> bool: # Check if the collection exists based on the collection name. + collection_name = collection_name.replace("-", "_") return self.client.has_collection( collection_name=f"{self.collection_prefix}_{collection_name}" ) def delete_collection(self, collection_name: str): # Delete the collection based on the collection name. + collection_name = collection_name.replace("-", "_") return self.client.drop_collection( collection_name=f"{self.collection_prefix}_{collection_name}" ) @@ -127,6 +127,7 @@ class MilvusClient: self, collection_name: str, vectors: list[list[float | int]], limit: int ) -> Optional[SearchResult]: # Search for the nearest neighbor items based on the vectors and return 'limit' number of results. + collection_name = collection_name.replace("-", "_") result = self.client.search( collection_name=f"{self.collection_prefix}_{collection_name}", data=vectors, @@ -136,8 +137,68 @@ class MilvusClient: return self._result_to_search_result(result) + def query(self, collection_name: str, filter: dict, limit: Optional[int] = None): + # Construct the filter string for querying + collection_name = collection_name.replace("-", "_") + if not self.has_collection(collection_name): + return None + + filter_string = " && ".join( + [ + f'metadata["{key}"] == {json.dumps(value)}' + for key, value in filter.items() + ] + ) + + max_limit = 16383 # The maximum number of records per request + all_results = [] + + if limit is None: + limit = float("inf") # Use infinity as a placeholder for no limit + + # Initialize offset and remaining to handle pagination + offset = 0 + remaining = limit + + try: + # Loop until there are no more items to fetch or the desired limit is reached + while remaining > 0: + print("remaining", remaining) + current_fetch = min( + max_limit, remaining + ) # Determine how many items to fetch in this iteration + + results = self.client.query( + collection_name=f"{self.collection_prefix}_{collection_name}", + filter=filter_string, + output_fields=["*"], + limit=current_fetch, + offset=offset, + ) + + if not results: + break + + all_results.extend(results) + results_count = len(results) + remaining -= ( + results_count # Decrease remaining by the number of items fetched + ) + offset += results_count + + # Break the loop if the results returned are less than the requested fetch count + if results_count < current_fetch: + break + + print(all_results) + return self._result_to_get_result([all_results]) + except Exception as e: + print(e) + return None + def get(self, collection_name: str) -> Optional[GetResult]: # Get all the items in the collection. + collection_name = collection_name.replace("-", "_") result = self.client.query( collection_name=f"{self.collection_prefix}_{collection_name}", filter='id != ""', @@ -146,6 +207,7 @@ class MilvusClient: def insert(self, collection_name: str, items: list[VectorItem]): # Insert the items into the collection, if the collection does not exist, it will be created. + collection_name = collection_name.replace("-", "_") if not self.client.has_collection( collection_name=f"{self.collection_prefix}_{collection_name}" ): @@ -168,6 +230,7 @@ class MilvusClient: def upsert(self, collection_name: str, items: list[VectorItem]): # Update the items in the collection, if the items are not present, insert them. If the collection does not exist, it will be created. + collection_name = collection_name.replace("-", "_") if not self.client.has_collection( collection_name=f"{self.collection_prefix}_{collection_name}" ): @@ -188,17 +251,35 @@ class MilvusClient: ], ) - def delete(self, collection_name: str, ids: list[str]): + def delete( + self, + collection_name: str, + ids: Optional[list[str]] = None, + filter: Optional[dict] = None, + ): # Delete the items from the collection based on the ids. + collection_name = collection_name.replace("-", "_") + if ids: + return self.client.delete( + collection_name=f"{self.collection_prefix}_{collection_name}", + ids=ids, + ) + elif filter: + # Convert the filter dictionary to a string using JSON_CONTAINS. + filter_string = " && ".join( + [ + f'metadata["{key}"] == {json.dumps(value)}' + for key, value in filter.items() + ] + ) - return self.client.delete( - collection_name=f"{self.collection_prefix}_{collection_name}", - ids=ids, - ) + return self.client.delete( + collection_name=f"{self.collection_prefix}_{collection_name}", + filter=filter_string, + ) def reset(self): # Resets the database. This will delete all collections and item entries. - collection_names = self.client.list_collections() for collection_name in collection_names: if collection_name.startswith(self.collection_prefix): diff --git a/backend/open_webui/apps/rag/vector/main.py b/backend/open_webui/apps/retrieval/vector/main.py similarity index 100% rename from backend/open_webui/apps/rag/vector/main.py rename to backend/open_webui/apps/retrieval/vector/main.py diff --git a/backend/open_webui/apps/rag/search/brave.py b/backend/open_webui/apps/retrieval/web/brave.py similarity index 93% rename from backend/open_webui/apps/rag/search/brave.py rename to backend/open_webui/apps/retrieval/web/brave.py index 2eb256b4bc..f988b3b08e 100644 --- a/backend/open_webui/apps/rag/search/brave.py +++ b/backend/open_webui/apps/retrieval/web/brave.py @@ -2,7 +2,7 @@ import logging from typing import Optional import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/rag/search/duckduckgo.py b/backend/open_webui/apps/retrieval/web/duckduckgo.py similarity index 95% rename from backend/open_webui/apps/rag/search/duckduckgo.py rename to backend/open_webui/apps/retrieval/web/duckduckgo.py index a8a580acad..11e5122964 100644 --- a/backend/open_webui/apps/rag/search/duckduckgo.py +++ b/backend/open_webui/apps/retrieval/web/duckduckgo.py @@ -1,7 +1,7 @@ import logging from typing import Optional -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results from duckduckgo_search import DDGS from open_webui.env import SRC_LOG_LEVELS diff --git a/backend/open_webui/apps/rag/search/google_pse.py b/backend/open_webui/apps/retrieval/web/google_pse.py similarity index 94% rename from backend/open_webui/apps/rag/search/google_pse.py rename to backend/open_webui/apps/retrieval/web/google_pse.py index a7f75a6c6d..61b919583c 100644 --- a/backend/open_webui/apps/rag/search/google_pse.py +++ b/backend/open_webui/apps/retrieval/web/google_pse.py @@ -2,7 +2,7 @@ import logging from typing import Optional import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/rag/search/jina_search.py b/backend/open_webui/apps/retrieval/web/jina_search.py similarity index 94% rename from backend/open_webui/apps/rag/search/jina_search.py rename to backend/open_webui/apps/retrieval/web/jina_search.py index 41cde679d1..487bbc9483 100644 --- a/backend/open_webui/apps/rag/search/jina_search.py +++ b/backend/open_webui/apps/retrieval/web/jina_search.py @@ -1,7 +1,7 @@ import logging import requests -from open_webui.apps.rag.search.main import SearchResult +from open_webui.apps.retrieval.web.main import SearchResult from open_webui.env import SRC_LOG_LEVELS from yarl import URL diff --git a/backend/open_webui/apps/rag/search/main.py b/backend/open_webui/apps/retrieval/web/main.py similarity index 100% rename from backend/open_webui/apps/rag/search/main.py rename to backend/open_webui/apps/retrieval/web/main.py diff --git a/backend/open_webui/apps/rag/search/searchapi.py b/backend/open_webui/apps/retrieval/web/searchapi.py similarity index 93% rename from backend/open_webui/apps/rag/search/searchapi.py rename to backend/open_webui/apps/retrieval/web/searchapi.py index 9ec9a07476..412dc6b695 100644 --- a/backend/open_webui/apps/rag/search/searchapi.py +++ b/backend/open_webui/apps/retrieval/web/searchapi.py @@ -3,7 +3,7 @@ from typing import Optional from urllib.parse import urlencode import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/rag/search/searxng.py b/backend/open_webui/apps/retrieval/web/searxng.py similarity index 97% rename from backend/open_webui/apps/rag/search/searxng.py rename to backend/open_webui/apps/retrieval/web/searxng.py index 26c534aa3c..cb1eaf91d0 100644 --- a/backend/open_webui/apps/rag/search/searxng.py +++ b/backend/open_webui/apps/retrieval/web/searxng.py @@ -2,7 +2,7 @@ import logging from typing import Optional import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/rag/search/serper.py b/backend/open_webui/apps/retrieval/web/serper.py similarity index 93% rename from backend/open_webui/apps/rag/search/serper.py rename to backend/open_webui/apps/retrieval/web/serper.py index ed7cc2c5fb..436fa167e9 100644 --- a/backend/open_webui/apps/rag/search/serper.py +++ b/backend/open_webui/apps/retrieval/web/serper.py @@ -3,7 +3,7 @@ import logging from typing import Optional import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/rag/search/serply.py b/backend/open_webui/apps/retrieval/web/serply.py similarity index 95% rename from backend/open_webui/apps/rag/search/serply.py rename to backend/open_webui/apps/retrieval/web/serply.py index 260e9b30e2..1c2521c47a 100644 --- a/backend/open_webui/apps/rag/search/serply.py +++ b/backend/open_webui/apps/retrieval/web/serply.py @@ -3,7 +3,7 @@ from typing import Optional from urllib.parse import urlencode import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/rag/search/serpstack.py b/backend/open_webui/apps/retrieval/web/serpstack.py similarity index 94% rename from backend/open_webui/apps/rag/search/serpstack.py rename to backend/open_webui/apps/retrieval/web/serpstack.py index 962c1a5b30..b655934de5 100644 --- a/backend/open_webui/apps/rag/search/serpstack.py +++ b/backend/open_webui/apps/retrieval/web/serpstack.py @@ -2,7 +2,7 @@ import logging from typing import Optional import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/rag/search/tavily.py b/backend/open_webui/apps/retrieval/web/tavily.py similarity index 94% rename from backend/open_webui/apps/rag/search/tavily.py rename to backend/open_webui/apps/retrieval/web/tavily.py index a619d29edb..03b0be75ac 100644 --- a/backend/open_webui/apps/rag/search/tavily.py +++ b/backend/open_webui/apps/retrieval/web/tavily.py @@ -1,7 +1,7 @@ import logging import requests -from open_webui.apps.rag.search.main import SearchResult +from open_webui.apps.retrieval.web.main import SearchResult from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/rag/search/testdata/brave.json b/backend/open_webui/apps/retrieval/web/testdata/brave.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/brave.json rename to backend/open_webui/apps/retrieval/web/testdata/brave.json diff --git a/backend/open_webui/apps/rag/search/testdata/google_pse.json b/backend/open_webui/apps/retrieval/web/testdata/google_pse.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/google_pse.json rename to backend/open_webui/apps/retrieval/web/testdata/google_pse.json diff --git a/backend/open_webui/apps/rag/search/testdata/searchapi.json b/backend/open_webui/apps/retrieval/web/testdata/searchapi.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/searchapi.json rename to backend/open_webui/apps/retrieval/web/testdata/searchapi.json diff --git a/backend/open_webui/apps/rag/search/testdata/searxng.json b/backend/open_webui/apps/retrieval/web/testdata/searxng.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/searxng.json rename to backend/open_webui/apps/retrieval/web/testdata/searxng.json diff --git a/backend/open_webui/apps/rag/search/testdata/serper.json b/backend/open_webui/apps/retrieval/web/testdata/serper.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/serper.json rename to backend/open_webui/apps/retrieval/web/testdata/serper.json diff --git a/backend/open_webui/apps/rag/search/testdata/serply.json b/backend/open_webui/apps/retrieval/web/testdata/serply.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/serply.json rename to backend/open_webui/apps/retrieval/web/testdata/serply.json diff --git a/backend/open_webui/apps/rag/search/testdata/serpstack.json b/backend/open_webui/apps/retrieval/web/testdata/serpstack.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/serpstack.json rename to backend/open_webui/apps/retrieval/web/testdata/serpstack.json diff --git a/backend/open_webui/apps/retrieval/web/utils.py b/backend/open_webui/apps/retrieval/web/utils.py new file mode 100644 index 0000000000..2df98b33c8 --- /dev/null +++ b/backend/open_webui/apps/retrieval/web/utils.py @@ -0,0 +1,97 @@ +import socket +import urllib.parse +import validators +from typing import Union, Sequence, Iterator + +from langchain_community.document_loaders import ( + WebBaseLoader, +) +from langchain_core.documents import Document + + +from open_webui.constants import ERROR_MESSAGES +from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH +from open_webui.env import SRC_LOG_LEVELS + +import logging + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["RAG"]) + + +def validate_url(url: Union[str, Sequence[str]]): + if isinstance(url, str): + if isinstance(validators.url(url), validators.ValidationError): + raise ValueError(ERROR_MESSAGES.INVALID_URL) + if not ENABLE_RAG_LOCAL_WEB_FETCH: + # Local web fetch is disabled, filter out any URLs that resolve to private IP addresses + parsed_url = urllib.parse.urlparse(url) + # Get IPv4 and IPv6 addresses + ipv4_addresses, ipv6_addresses = resolve_hostname(parsed_url.hostname) + # Check if any of the resolved addresses are private + # This is technically still vulnerable to DNS rebinding attacks, as we don't control WebBaseLoader + for ip in ipv4_addresses: + if validators.ipv4(ip, private=True): + raise ValueError(ERROR_MESSAGES.INVALID_URL) + for ip in ipv6_addresses: + if validators.ipv6(ip, private=True): + raise ValueError(ERROR_MESSAGES.INVALID_URL) + return True + elif isinstance(url, Sequence): + return all(validate_url(u) for u in url) + else: + return False + + +def resolve_hostname(hostname): + # Get address information + addr_info = socket.getaddrinfo(hostname, None) + + # Extract IP addresses from address information + ipv4_addresses = [info[4][0] for info in addr_info if info[0] == socket.AF_INET] + ipv6_addresses = [info[4][0] for info in addr_info if info[0] == socket.AF_INET6] + + return ipv4_addresses, ipv6_addresses + + +class SafeWebBaseLoader(WebBaseLoader): + """WebBaseLoader with enhanced error handling for URLs.""" + + def lazy_load(self) -> Iterator[Document]: + """Lazy load text from the url(s) in web_path with error handling.""" + for path in self.web_paths: + try: + soup = self._scrape(path, bs_kwargs=self.bs_kwargs) + text = soup.get_text(**self.bs_get_text_kwargs) + + # Build metadata + metadata = {"source": path} + if title := soup.find("title"): + metadata["title"] = title.get_text() + if description := soup.find("meta", attrs={"name": "description"}): + metadata["description"] = description.get( + "content", "No description found." + ) + if html := soup.find("html"): + metadata["language"] = html.get("lang", "No language found.") + + yield Document(page_content=text, metadata=metadata) + except Exception as e: + # Log the error and continue with the next URL + log.error(f"Error loading {path}: {e}") + + +def get_web_loader( + url: Union[str, Sequence[str]], + verify_ssl: bool = True, + requests_per_second: int = 2, +): + # Check if the URL is valid + if not validate_url(url): + raise ValueError(ERROR_MESSAGES.INVALID_URL) + return SafeWebBaseLoader( + url, + verify_ssl=verify_ssl, + requests_per_second=requests_per_second, + continue_on_failure=True, + ) diff --git a/backend/open_webui/apps/webui/internal/db.py b/backend/open_webui/apps/webui/internal/db.py index 82dba50318..bcf913e6fd 100644 --- a/backend/open_webui/apps/webui/internal/db.py +++ b/backend/open_webui/apps/webui/internal/db.py @@ -4,11 +4,20 @@ from contextlib import contextmanager from typing import Any, Optional from open_webui.apps.webui.internal.wrappers import register_connection -from open_webui.env import OPEN_WEBUI_DIR, DATABASE_URL, SRC_LOG_LEVELS +from open_webui.env import ( + OPEN_WEBUI_DIR, + DATABASE_URL, + SRC_LOG_LEVELS, + DATABASE_POOL_MAX_OVERFLOW, + DATABASE_POOL_RECYCLE, + DATABASE_POOL_SIZE, + DATABASE_POOL_TIMEOUT, +) from peewee_migrate import Router from sqlalchemy import Dialect, create_engine, types from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import scoped_session, sessionmaker +from sqlalchemy.pool import QueuePool, NullPool from sqlalchemy.sql.type_api import _T from typing_extensions import Self @@ -71,7 +80,20 @@ if "sqlite" in SQLALCHEMY_DATABASE_URL: SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False} ) else: - engine = create_engine(SQLALCHEMY_DATABASE_URL, pool_pre_ping=True) + if DATABASE_POOL_SIZE > 0: + engine = create_engine( + SQLALCHEMY_DATABASE_URL, + pool_size=DATABASE_POOL_SIZE, + max_overflow=DATABASE_POOL_MAX_OVERFLOW, + pool_timeout=DATABASE_POOL_TIMEOUT, + pool_recycle=DATABASE_POOL_RECYCLE, + pool_pre_ping=True, + poolclass=QueuePool, + ) + else: + engine = create_engine( + SQLALCHEMY_DATABASE_URL, pool_pre_ping=True, poolclass=NullPool + ) SessionLocal = sessionmaker( diff --git a/backend/open_webui/apps/webui/main.py b/backend/open_webui/apps/webui/main.py index 6c6f197ddb..1d12d708eb 100644 --- a/backend/open_webui/apps/webui/main.py +++ b/backend/open_webui/apps/webui/main.py @@ -10,11 +10,11 @@ from open_webui.apps.webui.routers import ( auths, chats, configs, - documents, files, functions, memories, models, + knowledge, prompts, tools, users, @@ -111,15 +111,15 @@ app.include_router(auths.router, prefix="/auths", tags=["auths"]) app.include_router(users.router, prefix="/users", tags=["users"]) app.include_router(chats.router, prefix="/chats", tags=["chats"]) -app.include_router(documents.router, prefix="/documents", tags=["documents"]) app.include_router(models.router, prefix="/models", tags=["models"]) +app.include_router(knowledge.router, prefix="/knowledge", tags=["knowledge"]) app.include_router(prompts.router, prefix="/prompts", tags=["prompts"]) -app.include_router(memories.router, prefix="/memories", tags=["memories"]) app.include_router(files.router, prefix="/files", tags=["files"]) app.include_router(tools.router, prefix="/tools", tags=["tools"]) app.include_router(functions.router, prefix="/functions", tags=["functions"]) +app.include_router(memories.router, prefix="/memories", tags=["memories"]) app.include_router(utils.router, prefix="/utils", tags=["utils"]) @@ -287,17 +287,20 @@ async def generate_function_chat_completion(form_data, user): __event_emitter__ = None __event_call__ = None __task__ = None + __task_body__ = None if metadata: if all(k in metadata for k in ("session_id", "chat_id", "message_id")): __event_emitter__ = get_event_emitter(metadata) __event_call__ = get_event_call(metadata) __task__ = metadata.get("task", None) + __task_body__ = metadata.get("task_body", None) extra_params = { "__event_emitter__": __event_emitter__, "__event_call__": __event_call__, "__task__": __task__, + "__task_body__": __task_body__, "__files__": files, "__user__": { "id": user.id, diff --git a/backend/open_webui/apps/webui/models/files.py b/backend/open_webui/apps/webui/models/files.py index 7fba74479d..f8d4cf8e8e 100644 --- a/backend/open_webui/apps/webui/models/files.py +++ b/backend/open_webui/apps/webui/models/files.py @@ -5,7 +5,7 @@ from typing import Optional from open_webui.apps.webui.internal.db import Base, JSONField, get_db from open_webui.env import SRC_LOG_LEVELS from pydantic import BaseModel, ConfigDict -from sqlalchemy import BigInteger, Column, String, Text +from sqlalchemy import BigInteger, Column, String, Text, JSON log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["MODELS"]) @@ -20,19 +20,29 @@ class File(Base): id = Column(String, primary_key=True) user_id = Column(String) + hash = Column(Text, nullable=True) + filename = Column(Text) + data = Column(JSON, nullable=True) meta = Column(JSONField) + created_at = Column(BigInteger) + updated_at = Column(BigInteger) class FileModel(BaseModel): + model_config = ConfigDict(from_attributes=True) + id: str user_id: str - filename: str - meta: dict - created_at: int # timestamp in epoch + hash: Optional[str] = None - model_config = ConfigDict(from_attributes=True) + filename: str + data: Optional[dict] = None + meta: dict + + created_at: int # timestamp in epoch + updated_at: int # timestamp in epoch #################### @@ -43,14 +53,21 @@ class FileModel(BaseModel): class FileModelResponse(BaseModel): id: str user_id: str + hash: Optional[str] = None + filename: str + data: Optional[dict] = None meta: dict + created_at: int # timestamp in epoch + updated_at: int # timestamp in epoch class FileForm(BaseModel): id: str + hash: Optional[str] = None filename: str + data: dict = {} meta: dict = {} @@ -62,6 +79,7 @@ class FilesTable: **form_data.model_dump(), "user_id": user_id, "created_at": int(time.time()), + "updated_at": int(time.time()), } ) @@ -90,6 +108,16 @@ class FilesTable: with get_db() as db: return [FileModel.model_validate(file) for file in db.query(File).all()] + def get_files_by_ids(self, ids: list[str]) -> list[FileModel]: + with get_db() as db: + return [ + FileModel.model_validate(file) + for file in db.query(File) + .filter(File.id.in_(ids)) + .order_by(File.updated_at.desc()) + .all() + ] + def get_files_by_user_id(self, user_id: str) -> list[FileModel]: with get_db() as db: return [ @@ -97,6 +125,38 @@ class FilesTable: for file in db.query(File).filter_by(user_id=user_id).all() ] + def update_file_hash_by_id(self, id: str, hash: str) -> Optional[FileModel]: + with get_db() as db: + try: + file = db.query(File).filter_by(id=id).first() + file.hash = hash + db.commit() + + return FileModel.model_validate(file) + except Exception: + return None + + def update_file_data_by_id(self, id: str, data: dict) -> Optional[FileModel]: + with get_db() as db: + try: + file = db.query(File).filter_by(id=id).first() + file.data = {**(file.data if file.data else {}), **data} + db.commit() + return FileModel.model_validate(file) + except Exception as e: + + return None + + def update_file_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]: + with get_db() as db: + try: + file = db.query(File).filter_by(id=id).first() + file.meta = {**(file.meta if file.meta else {}), **meta} + db.commit() + return FileModel.model_validate(file) + except Exception: + return None + def delete_file_by_id(self, id: str) -> bool: with get_db() as db: try: diff --git a/backend/open_webui/apps/webui/models/knowledge.py b/backend/open_webui/apps/webui/models/knowledge.py new file mode 100644 index 0000000000..698cccda0d --- /dev/null +++ b/backend/open_webui/apps/webui/models/knowledge.py @@ -0,0 +1,152 @@ +import json +import logging +import time +from typing import Optional +import uuid + +from open_webui.apps.webui.internal.db import Base, get_db +from open_webui.env import SRC_LOG_LEVELS +from pydantic import BaseModel, ConfigDict +from sqlalchemy import BigInteger, Column, String, Text, JSON + + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["MODELS"]) + +#################### +# Knowledge DB Schema +#################### + + +class Knowledge(Base): + __tablename__ = "knowledge" + + id = Column(Text, unique=True, primary_key=True) + user_id = Column(Text) + + name = Column(Text) + description = Column(Text) + + data = Column(JSON, nullable=True) + meta = Column(JSON, nullable=True) + + created_at = Column(BigInteger) + updated_at = Column(BigInteger) + + +class KnowledgeModel(BaseModel): + model_config = ConfigDict(from_attributes=True) + + id: str + user_id: str + + name: str + description: str + + data: Optional[dict] = None + meta: Optional[dict] = None + + created_at: int # timestamp in epoch + updated_at: int # timestamp in epoch + + +#################### +# Forms +#################### + + +class KnowledgeResponse(BaseModel): + id: str + name: str + description: str + data: Optional[dict] = None + meta: Optional[dict] = None + created_at: int # timestamp in epoch + updated_at: int # timestamp in epoch + + +class KnowledgeForm(BaseModel): + name: str + description: str + data: Optional[dict] = None + + +class KnowledgeUpdateForm(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + data: Optional[dict] = None + + +class KnowledgeTable: + def insert_new_knowledge( + self, user_id: str, form_data: KnowledgeForm + ) -> Optional[KnowledgeModel]: + with get_db() as db: + knowledge = KnowledgeModel( + **{ + **form_data.model_dump(), + "id": str(uuid.uuid4()), + "user_id": user_id, + "created_at": int(time.time()), + "updated_at": int(time.time()), + } + ) + + try: + result = Knowledge(**knowledge.model_dump()) + db.add(result) + db.commit() + db.refresh(result) + if result: + return KnowledgeModel.model_validate(result) + else: + return None + except Exception: + return None + + def get_knowledge_items(self) -> list[KnowledgeModel]: + with get_db() as db: + return [ + KnowledgeModel.model_validate(knowledge) + for knowledge in db.query(Knowledge) + .order_by(Knowledge.updated_at.desc()) + .all() + ] + + def get_knowledge_by_id(self, id: str) -> Optional[KnowledgeModel]: + try: + with get_db() as db: + knowledge = db.query(Knowledge).filter_by(id=id).first() + return KnowledgeModel.model_validate(knowledge) if knowledge else None + except Exception: + return None + + def update_knowledge_by_id( + self, id: str, form_data: KnowledgeUpdateForm, overwrite: bool = False + ) -> Optional[KnowledgeModel]: + try: + with get_db() as db: + knowledge = self.get_knowledge_by_id(id=id) + db.query(Knowledge).filter_by(id=id).update( + { + **form_data.model_dump(exclude_none=True), + "updated_at": int(time.time()), + } + ) + db.commit() + return self.get_knowledge_by_id(id=id) + except Exception as e: + log.exception(e) + return None + + def delete_knowledge_by_id(self, id: str) -> bool: + try: + with get_db() as db: + db.query(Knowledge).filter_by(id=id).delete() + db.commit() + return True + except Exception: + return False + + +Knowledges = KnowledgeTable() diff --git a/backend/open_webui/apps/webui/routers/chats.py b/backend/open_webui/apps/webui/routers/chats.py index 21f95d9fe8..ca7e95baf4 100644 --- a/backend/open_webui/apps/webui/routers/chats.py +++ b/backend/open_webui/apps/webui/routers/chats.py @@ -52,10 +52,9 @@ async def get_session_user_chat_list( @router.delete("/", response_model=bool) async def delete_all_user_chats(request: Request, user=Depends(get_verified_user)): - if ( - user.role == "user" - and not request.app.state.config.USER_PERMISSIONS["chat"]["deletion"] - ): + if user.role == "user" and not request.app.state.config.USER_PERMISSIONS.get( + "chat", {} + ).get("deletion", {}): raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail=ERROR_MESSAGES.ACCESS_PROHIBITED, @@ -292,7 +291,9 @@ async def delete_chat_by_id(request: Request, id: str, user=Depends(get_verified result = Chats.delete_chat_by_id(id) return result else: - if not request.app.state.config.USER_PERMISSIONS["chat"]["deletion"]: + if not request.app.state.config.USER_PERMISSIONS.get("chat", {}).get( + "deletion", {} + ): raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail=ERROR_MESSAGES.ACCESS_PROHIBITED, diff --git a/backend/open_webui/apps/webui/routers/files.py b/backend/open_webui/apps/webui/routers/files.py index 1a326bcd8c..0679ae062d 100644 --- a/backend/open_webui/apps/webui/routers/files.py +++ b/backend/open_webui/apps/webui/routers/files.py @@ -4,13 +4,22 @@ import shutil import uuid from pathlib import Path from typing import Optional +from pydantic import BaseModel +import mimetypes + from open_webui.apps.webui.models.files import FileForm, FileModel, Files +from open_webui.apps.retrieval.main import process_file, ProcessFileForm + from open_webui.config import UPLOAD_DIR -from open_webui.constants import ERROR_MESSAGES from open_webui.env import SRC_LOG_LEVELS +from open_webui.constants import ERROR_MESSAGES + + from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status -from fastapi.responses import FileResponse +from fastapi.responses import FileResponse, StreamingResponse + + from open_webui.utils.utils import get_admin_user, get_verified_user log = logging.getLogger(__name__) @@ -58,6 +67,13 @@ def upload_file(file: UploadFile = File(...), user=Depends(get_verified_user)): ), ) + try: + process_file(ProcessFileForm(file_id=id)) + file = Files.get_file_by_id(id=id) + except Exception as e: + log.exception(e) + log.error(f"Error processing file: {file.id}") + if file: return file else: @@ -143,6 +159,55 @@ async def get_file_by_id(id: str, user=Depends(get_verified_user)): ) +############################ +# Get File Data Content By Id +############################ + + +@router.get("/{id}/data/content") +async def get_file_data_content_by_id(id: str, user=Depends(get_verified_user)): + file = Files.get_file_by_id(id) + + if file and (file.user_id == user.id or user.role == "admin"): + return {"content": file.data.get("content", "")} + else: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + +############################ +# Update File Data Content By Id +############################ + + +class ContentForm(BaseModel): + content: str + + +@router.post("/{id}/data/content/update") +async def update_file_data_content_by_id( + id: str, form_data: ContentForm, user=Depends(get_verified_user) +): + file = Files.get_file_by_id(id) + + if file and (file.user_id == user.id or user.role == "admin"): + try: + process_file(ProcessFileForm(file_id=id, content=form_data.content)) + file = Files.get_file_by_id(id=id) + except Exception as e: + log.exception(e) + log.error(f"Error processing file: {file.id}") + + return {"content": file.data.get("content", "")} + else: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + ############################ # Get File Content By Id ############################ @@ -176,16 +241,32 @@ async def get_file_content_by_id(id: str, user=Depends(get_verified_user)): file = Files.get_file_by_id(id) if file and (file.user_id == user.id or user.role == "admin"): - file_path = Path(file.meta["path"]) + file_path = file.meta.get("path") + if file_path: + file_path = Path(file_path) - # Check if the file already exists in the cache - if file_path.is_file(): - print(f"file_path: {file_path}") - return FileResponse(file_path) + # Check if the file already exists in the cache + if file_path.is_file(): + print(f"file_path: {file_path}") + return FileResponse(file_path) + else: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=ERROR_MESSAGES.NOT_FOUND, + ) else: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=ERROR_MESSAGES.NOT_FOUND, + # File path doesn’t exist, return the content as .txt if possible + file_content = file.content.get("content", "") + file_name = file.filename + + # Create a generator that encodes the file content + def generator(): + yield file_content.encode("utf-8") + + return StreamingResponse( + generator(), + media_type="text/plain", + headers={"Content-Disposition": f"attachment; filename={file_name}"}, ) else: raise HTTPException( diff --git a/backend/open_webui/apps/webui/routers/knowledge.py b/backend/open_webui/apps/webui/routers/knowledge.py new file mode 100644 index 0000000000..a792c24fa3 --- /dev/null +++ b/backend/open_webui/apps/webui/routers/knowledge.py @@ -0,0 +1,348 @@ +import json +from typing import Optional, Union +from pydantic import BaseModel +from fastapi import APIRouter, Depends, HTTPException, status +import logging + +from open_webui.apps.webui.models.knowledge import ( + Knowledges, + KnowledgeUpdateForm, + KnowledgeForm, + KnowledgeResponse, +) +from open_webui.apps.webui.models.files import Files, FileModel +from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT +from open_webui.apps.retrieval.main import process_file, ProcessFileForm + + +from open_webui.constants import ERROR_MESSAGES +from open_webui.utils.utils import get_admin_user, get_verified_user +from open_webui.env import SRC_LOG_LEVELS + + +log = logging.getLogger(__name__) +log.setLevel(SRC_LOG_LEVELS["MODELS"]) + +router = APIRouter() + +############################ +# GetKnowledgeItems +############################ + + +@router.get( + "/", response_model=Optional[Union[list[KnowledgeResponse], KnowledgeResponse]] +) +async def get_knowledge_items( + id: Optional[str] = None, user=Depends(get_verified_user) +): + if id: + knowledge = Knowledges.get_knowledge_by_id(id=id) + + if knowledge: + return knowledge + else: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + else: + return [ + KnowledgeResponse(**knowledge.model_dump()) + for knowledge in Knowledges.get_knowledge_items() + ] + + +############################ +# CreateNewKnowledge +############################ + + +@router.post("/create", response_model=Optional[KnowledgeResponse]) +async def create_new_knowledge(form_data: KnowledgeForm, user=Depends(get_admin_user)): + knowledge = Knowledges.insert_new_knowledge(user.id, form_data) + + if knowledge: + return knowledge + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.FILE_EXISTS, + ) + + +############################ +# GetKnowledgeById +############################ + + +class KnowledgeFilesResponse(KnowledgeResponse): + files: list[FileModel] + + +@router.get("/{id}", response_model=Optional[KnowledgeFilesResponse]) +async def get_knowledge_by_id(id: str, user=Depends(get_verified_user)): + knowledge = Knowledges.get_knowledge_by_id(id=id) + + if knowledge: + file_ids = knowledge.data.get("file_ids", []) if knowledge.data else [] + files = Files.get_files_by_ids(file_ids) + + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + else: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + +############################ +# UpdateKnowledgeById +############################ + + +@router.post("/{id}/update", response_model=Optional[KnowledgeFilesResponse]) +async def update_knowledge_by_id( + id: str, + form_data: KnowledgeUpdateForm, + user=Depends(get_admin_user), +): + knowledge = Knowledges.update_knowledge_by_id(id=id, form_data=form_data) + + if knowledge: + file_ids = knowledge.data.get("file_ids", []) if knowledge.data else [] + files = Files.get_files_by_ids(file_ids) + + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.ID_TAKEN, + ) + + +############################ +# AddFileToKnowledge +############################ + + +class KnowledgeFileIdForm(BaseModel): + file_id: str + + +@router.post("/{id}/file/add", response_model=Optional[KnowledgeFilesResponse]) +def add_file_to_knowledge_by_id( + id: str, + form_data: KnowledgeFileIdForm, + user=Depends(get_admin_user), +): + knowledge = Knowledges.get_knowledge_by_id(id=id) + file = Files.get_file_by_id(form_data.file_id) + if not file: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + if not file.data: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.FILE_NOT_PROCESSED, + ) + + # Add content to the vector database + try: + process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id)) + except Exception as e: + log.debug(e) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + + if knowledge: + data = knowledge.data or {} + file_ids = data.get("file_ids", []) + + if form_data.file_id not in file_ids: + file_ids.append(form_data.file_id) + data["file_ids"] = file_ids + + knowledge = Knowledges.update_knowledge_by_id( + id=id, form_data=KnowledgeUpdateForm(data=data) + ) + + if knowledge: + files = Files.get_files_by_ids(file_ids) + + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT("knowledge"), + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT("file_id"), + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + +@router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse]) +def update_file_from_knowledge_by_id( + id: str, + form_data: KnowledgeFileIdForm, + user=Depends(get_admin_user), +): + knowledge = Knowledges.get_knowledge_by_id(id=id) + file = Files.get_file_by_id(form_data.file_id) + if not file: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + # Remove content from the vector database + VECTOR_DB_CLIENT.delete( + collection_name=knowledge.id, filter={"file_id": form_data.file_id} + ) + + # Add content to the vector database + try: + process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id)) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + + if knowledge: + data = knowledge.data or {} + file_ids = data.get("file_ids", []) + + files = Files.get_files_by_ids(file_ids) + + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + +############################ +# RemoveFileFromKnowledge +############################ + + +@router.post("/{id}/file/remove", response_model=Optional[KnowledgeFilesResponse]) +def remove_file_from_knowledge_by_id( + id: str, + form_data: KnowledgeFileIdForm, + user=Depends(get_admin_user), +): + knowledge = Knowledges.get_knowledge_by_id(id=id) + file = Files.get_file_by_id(form_data.file_id) + if not file: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + # Remove content from the vector database + VECTOR_DB_CLIENT.delete( + collection_name=knowledge.id, filter={"file_id": form_data.file_id} + ) + + result = VECTOR_DB_CLIENT.query( + collection_name=knowledge.id, + filter={"file_id": form_data.file_id}, + ) + + Files.delete_file_by_id(form_data.file_id) + + if knowledge: + data = knowledge.data or {} + file_ids = data.get("file_ids", []) + + if form_data.file_id in file_ids: + file_ids.remove(form_data.file_id) + data["file_ids"] = file_ids + + knowledge = Knowledges.update_knowledge_by_id( + id=id, form_data=KnowledgeUpdateForm(data=data) + ) + + if knowledge: + files = Files.get_files_by_ids(file_ids) + + return KnowledgeFilesResponse( + **knowledge.model_dump(), + files=files, + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT("knowledge"), + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT("file_id"), + ) + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + +############################ +# ResetKnowledgeById +############################ + + +@router.post("/{id}/reset", response_model=Optional[KnowledgeResponse]) +async def reset_knowledge_by_id(id: str, user=Depends(get_admin_user)): + try: + VECTOR_DB_CLIENT.delete_collection(collection_name=id) + except Exception as e: + log.debug(e) + pass + + knowledge = Knowledges.update_knowledge_by_id( + id=id, form_data=KnowledgeUpdateForm(data={"file_ids": []}) + ) + return knowledge + + +############################ +# DeleteKnowledgeById +############################ + + +@router.delete("/{id}/delete", response_model=bool) +async def delete_knowledge_by_id(id: str, user=Depends(get_admin_user)): + try: + VECTOR_DB_CLIENT.delete_collection(collection_name=id) + except Exception as e: + log.debug(e) + pass + result = Knowledges.delete_knowledge_by_id(id=id) + return result diff --git a/backend/open_webui/apps/webui/routers/memories.py b/backend/open_webui/apps/webui/routers/memories.py index d659833bc2..ccf84a9d4c 100644 --- a/backend/open_webui/apps/webui/routers/memories.py +++ b/backend/open_webui/apps/webui/routers/memories.py @@ -4,7 +4,7 @@ import logging from typing import Optional from open_webui.apps.webui.models.memories import Memories, MemoryModel -from open_webui.apps.rag.vector.connector import VECTOR_DB_CLIENT +from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT from open_webui.utils.utils import get_verified_user from open_webui.env import SRC_LOG_LEVELS diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index f531a8728d..bfc9a4ded5 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -521,15 +521,6 @@ Path(UPLOAD_DIR).mkdir(parents=True, exist_ok=True) CACHE_DIR = f"{DATA_DIR}/cache" Path(CACHE_DIR).mkdir(parents=True, exist_ok=True) - -#################################### -# Docs DIR -#################################### - -DOCS_DIR = os.getenv("DOCS_DIR", f"{DATA_DIR}/docs") -Path(DOCS_DIR).mkdir(parents=True, exist_ok=True) - - #################################### # Tools DIR #################################### @@ -561,16 +552,6 @@ OLLAMA_API_BASE_URL = os.environ.get( ) OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "") -AIOHTTP_CLIENT_TIMEOUT = os.environ.get("AIOHTTP_CLIENT_TIMEOUT", "") - -if AIOHTTP_CLIENT_TIMEOUT == "": - AIOHTTP_CLIENT_TIMEOUT = None -else: - try: - AIOHTTP_CLIENT_TIMEOUT = int(AIOHTTP_CLIENT_TIMEOUT) - except Exception: - AIOHTTP_CLIENT_TIMEOUT = 300 - K8S_FLAG = os.environ.get("K8S_FLAG", "") USE_OLLAMA_DOCKER = os.environ.get("USE_OLLAMA_DOCKER", "false") @@ -921,7 +902,7 @@ CHROMA_HTTP_SSL = os.environ.get("CHROMA_HTTP_SSL", "false").lower() == "true" MILVUS_URI = os.environ.get("MILVUS_URI", f"{DATA_DIR}/vector_db/milvus.db") #################################### -# RAG +# Information Retrieval (RAG) #################################### # RAG Content Extraction diff --git a/backend/open_webui/constants.py b/backend/open_webui/constants.py index d55216bb5d..37461402b6 100644 --- a/backend/open_webui/constants.py +++ b/backend/open_webui/constants.py @@ -34,8 +34,8 @@ class ERROR_MESSAGES(str, Enum): ID_TAKEN = "Uh-oh! This id is already registered. Please choose another id string." MODEL_ID_TAKEN = "Uh-oh! This model id is already registered. Please choose another model id string." - NAME_TAG_TAKEN = "Uh-oh! This name tag is already registered. Please choose another name tag string." + INVALID_TOKEN = ( "Your session has expired or the token is invalid. Please sign in again." ) @@ -90,6 +90,15 @@ class ERROR_MESSAGES(str, Enum): "The Ollama API is disabled. Please enable it to use this feature." ) + FILE_TOO_LARGE = ( + lambda size="": f"Oops! The file you're trying to upload is too large. Please upload a file that is less than {size}." + ) + + DUPLICATE_CONTENT = ( + "Duplicate content detected. Please provide unique content to proceed." + ) + FILE_NOT_PROCESSED = "Extracted content is not available for this file. Please ensure that the file is processed before proceeding." + class TASKS(str, Enum): def __str__(self) -> str: diff --git a/backend/open_webui/env.py b/backend/open_webui/env.py index 4f1403e977..fbf22d84d2 100644 --- a/backend/open_webui/env.py +++ b/backend/open_webui/env.py @@ -258,6 +258,45 @@ DATABASE_URL = os.environ.get("DATABASE_URL", f"sqlite:///{DATA_DIR}/webui.db") if "postgres://" in DATABASE_URL: DATABASE_URL = DATABASE_URL.replace("postgres://", "postgresql://") +DATABASE_POOL_SIZE = os.environ.get("DATABASE_POOL_SIZE", 0) + +if DATABASE_POOL_SIZE == "": + DATABASE_POOL_SIZE = 0 +else: + try: + DATABASE_POOL_SIZE = int(DATABASE_POOL_SIZE) + except Exception: + DATABASE_POOL_SIZE = 0 + +DATABASE_POOL_MAX_OVERFLOW = os.environ.get("DATABASE_POOL_MAX_OVERFLOW", 0) + +if DATABASE_POOL_MAX_OVERFLOW == "": + DATABASE_POOL_MAX_OVERFLOW = 0 +else: + try: + DATABASE_POOL_MAX_OVERFLOW = int(DATABASE_POOL_MAX_OVERFLOW) + except Exception: + DATABASE_POOL_MAX_OVERFLOW = 0 + +DATABASE_POOL_TIMEOUT = os.environ.get("DATABASE_POOL_TIMEOUT", 30) + +if DATABASE_POOL_TIMEOUT == "": + DATABASE_POOL_TIMEOUT = 30 +else: + try: + DATABASE_POOL_TIMEOUT = int(DATABASE_POOL_TIMEOUT) + except Exception: + DATABASE_POOL_TIMEOUT = 30 + +DATABASE_POOL_RECYCLE = os.environ.get("DATABASE_POOL_RECYCLE", 3600) + +if DATABASE_POOL_RECYCLE == "": + DATABASE_POOL_RECYCLE = 3600 +else: + try: + DATABASE_POOL_RECYCLE = int(DATABASE_POOL_RECYCLE) + except Exception: + DATABASE_POOL_RECYCLE = 3600 RESET_CONFIG_ON_START = ( os.environ.get("RESET_CONFIG_ON_START", "False").lower() == "true" @@ -305,3 +344,14 @@ ENABLE_WEBSOCKET_SUPPORT = ( WEBSOCKET_MANAGER = os.environ.get("WEBSOCKET_MANAGER", "") WEBSOCKET_REDIS_URL = os.environ.get("WEBSOCKET_REDIS_URL", "redis://localhost:6379/0") + + +AIOHTTP_CLIENT_TIMEOUT = os.environ.get("AIOHTTP_CLIENT_TIMEOUT", "") + +if AIOHTTP_CLIENT_TIMEOUT == "": + AIOHTTP_CLIENT_TIMEOUT = None +else: + try: + AIOHTTP_CLIENT_TIMEOUT = int(AIOHTTP_CLIENT_TIMEOUT) + except Exception: + AIOHTTP_CLIENT_TIMEOUT = 300 diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 4af48906b1..7086a3cc9a 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -16,37 +16,45 @@ from typing import Optional import aiohttp import requests - -from open_webui.apps.audio.main import app as audio_app -from open_webui.apps.images.main import app as images_app -from open_webui.apps.ollama.main import app as ollama_app from open_webui.apps.ollama.main import ( - GenerateChatCompletionForm, + app as ollama_app, + get_all_models as get_ollama_models, generate_chat_completion as generate_ollama_chat_completion, generate_openai_chat_completion as generate_ollama_openai_chat_completion, + GenerateChatCompletionForm, ) -from open_webui.apps.ollama.main import get_all_models as get_ollama_models -from open_webui.apps.openai.main import app as openai_app from open_webui.apps.openai.main import ( + app as openai_app, generate_chat_completion as generate_openai_chat_completion, + get_all_models as get_openai_models, ) -from open_webui.apps.openai.main import get_all_models as get_openai_models -from open_webui.apps.rag.main import app as rag_app -from open_webui.apps.rag.utils import get_rag_context, rag_template -from open_webui.apps.socket.main import app as socket_app, periodic_usage_pool_cleanup -from open_webui.apps.socket.main import get_event_call, get_event_emitter -from open_webui.apps.webui.internal.db import Session -from open_webui.apps.webui.main import app as webui_app + +from open_webui.apps.retrieval.main import app as retrieval_app +from open_webui.apps.retrieval.utils import get_rag_context, rag_template + +from open_webui.apps.socket.main import ( + app as socket_app, + periodic_usage_pool_cleanup, + get_event_call, + get_event_emitter, +) + from open_webui.apps.webui.main import ( + app as webui_app, generate_function_chat_completion, get_pipe_models, ) +from open_webui.apps.webui.internal.db import Session + from open_webui.apps.webui.models.auths import Auths from open_webui.apps.webui.models.functions import Functions from open_webui.apps.webui.models.models import Models from open_webui.apps.webui.models.users import UserModel, Users + from open_webui.apps.webui.utils import load_function_module_by_id +from open_webui.apps.audio.main import app as audio_app +from open_webui.apps.images.main import app as images_app from authlib.integrations.starlette_client import OAuth from authlib.oidc.core import UserInfo @@ -187,8 +195,6 @@ https://github.com/open-webui/open-webui @asynccontextmanager async def lifespan(app: FastAPI): - run_migrations() - if RESET_CONFIG_ON_START: reset_config() @@ -440,37 +446,44 @@ async def chat_completion_tools_handler( if not content: return body, {} - result = json.loads(content) - - tool_function_name = result.get("name", None) - if tool_function_name not in tools: - return body, {} - - tool_function_params = result.get("parameters", {}) - try: - tool_output = await tools[tool_function_name]["callable"]( - **tool_function_params - ) + content = content[content.find("{") : content.rfind("}") + 1] + if not content: + raise Exception("No JSON object found in the response") + + result = json.loads(content) + + tool_function_name = result.get("name", None) + if tool_function_name not in tools: + return body, {} + + tool_function_params = result.get("parameters", {}) + + try: + tool_output = await tools[tool_function_name]["callable"]( + **tool_function_params + ) + except Exception as e: + tool_output = str(e) + + if tools[tool_function_name]["citation"]: + citations.append( + { + "source": { + "name": f"TOOL:{tools[tool_function_name]['toolkit_id']}/{tool_function_name}" + }, + "document": [tool_output], + "metadata": [{"source": tool_function_name}], + } + ) + if tools[tool_function_name]["file_handler"]: + skip_files = True + + if isinstance(tool_output, str): + contexts.append(tool_output) except Exception as e: - tool_output = str(e) - - if tools[tool_function_name]["citation"]: - citations.append( - { - "source": { - "name": f"TOOL:{tools[tool_function_name]['toolkit_id']}/{tool_function_name}" - }, - "document": [tool_output], - "metadata": [{"source": tool_function_name}], - } - ) - if tools[tool_function_name]["file_handler"]: - skip_files = True - - if isinstance(tool_output, str): - contexts.append(tool_output) - + log.exception(f"Error: {e}") + content = None except Exception as e: log.exception(f"Error: {e}") content = None @@ -491,11 +504,11 @@ async def chat_completion_files_handler(body) -> tuple[dict, dict[str, list]]: contexts, citations = get_rag_context( files=files, messages=body["messages"], - embedding_function=rag_app.state.EMBEDDING_FUNCTION, - k=rag_app.state.config.TOP_K, - reranking_function=rag_app.state.sentence_transformer_rf, - r=rag_app.state.config.RELEVANCE_THRESHOLD, - hybrid_search=rag_app.state.config.ENABLE_RAG_HYBRID_SEARCH, + embedding_function=retrieval_app.state.EMBEDDING_FUNCTION, + k=retrieval_app.state.config.TOP_K, + reranking_function=retrieval_app.state.sentence_transformer_rf, + r=retrieval_app.state.config.RELEVANCE_THRESHOLD, + hybrid_search=retrieval_app.state.config.ENABLE_RAG_HYBRID_SEARCH, ) log.debug(f"rag_contexts: {contexts}, citations: {citations}") @@ -608,7 +621,7 @@ class ChatCompletionMiddleware(BaseHTTPMiddleware): if prompt is None: raise Exception("No user message found") if ( - rag_app.state.config.RELEVANCE_THRESHOLD == 0 + retrieval_app.state.config.RELEVANCE_THRESHOLD == 0 and context_string.strip() == "" ): log.debug( @@ -620,14 +633,14 @@ class ChatCompletionMiddleware(BaseHTTPMiddleware): if model["owned_by"] == "ollama": body["messages"] = prepend_to_first_user_message_content( rag_template( - rag_app.state.config.RAG_TEMPLATE, context_string, prompt + retrieval_app.state.config.RAG_TEMPLATE, context_string, prompt ), body["messages"], ) else: body["messages"] = add_or_update_system_message( rag_template( - rag_app.state.config.RAG_TEMPLATE, context_string, prompt + retrieval_app.state.config.RAG_TEMPLATE, context_string, prompt ), body["messages"], ) @@ -761,10 +774,22 @@ class PipelineMiddleware(BaseHTTPMiddleware): # Parse string to JSON data = json.loads(body_str) if body_str else {} - user = get_current_user( - request, - get_http_authorization_cred(request.headers["Authorization"]), - ) + try: + user = get_current_user( + request, + get_http_authorization_cred(request.headers["Authorization"]), + ) + except KeyError as e: + if len(e.args) > 1: + return JSONResponse( + status_code=e.args[0], + content={"detail": e.args[1]}, + ) + else: + return JSONResponse( + status_code=status.HTTP_401_UNAUTHORIZED, + content={"detail": "Not authenticated"}, + ) try: data = filter_pipeline(data, user) @@ -837,7 +862,7 @@ async def check_url(request: Request, call_next): async def update_embedding_function(request: Request, call_next): response = await call_next(request) if "/embedding/update" in request.url.path: - webui_app.state.EMBEDDING_FUNCTION = rag_app.state.EMBEDDING_FUNCTION + webui_app.state.EMBEDDING_FUNCTION = retrieval_app.state.EMBEDDING_FUNCTION return response @@ -865,11 +890,12 @@ app.mount("/openai", openai_app) app.mount("/images/api/v1", images_app) app.mount("/audio/api/v1", audio_app) -app.mount("/rag/api/v1", rag_app) +app.mount("/retrieval/api/v1", retrieval_app) app.mount("/api/v1", webui_app) -webui_app.state.EMBEDDING_FUNCTION = rag_app.state.EMBEDDING_FUNCTION + +webui_app.state.EMBEDDING_FUNCTION = retrieval_app.state.EMBEDDING_FUNCTION async def get_all_models(): @@ -1466,7 +1492,7 @@ Prompt: {{prompt:middletruncate:8000}}""" } ), "chat_id": form_data.get("chat_id", None), - "metadata": {"task": str(TASKS.TITLE_GENERATION)}, + "metadata": {"task": str(TASKS.TITLE_GENERATION), "task_body": form_data}, } log.debug(payload) @@ -1543,7 +1569,7 @@ Search Query:""" "max_completion_tokens": 30, } ), - "metadata": {"task": str(TASKS.QUERY_GENERATION)}, + "metadata": {"task": str(TASKS.QUERY_GENERATION), "task_body": form_data}, } log.debug(payload) @@ -1611,7 +1637,7 @@ Message: """{{prompt}}""" } ), "chat_id": form_data.get("chat_id", None), - "metadata": {"task": str(TASKS.EMOJI_GENERATION)}, + "metadata": {"task": str(TASKS.EMOJI_GENERATION), "task_body": form_data}, } log.debug(payload) @@ -1670,7 +1696,10 @@ Responses from models: {{responses}}""" "messages": [{"role": "user", "content": content}], "stream": form_data.get("stream", False), "chat_id": form_data.get("chat_id", None), - "metadata": {"task": str(TASKS.MOA_RESPONSE_GENERATION)}, + "metadata": { + "task": str(TASKS.MOA_RESPONSE_GENERATION), + "task_body": form_data, + }, } log.debug(payload) @@ -2054,7 +2083,7 @@ async def get_app_config(request: Request): "enable_login_form": webui_app.state.config.ENABLE_LOGIN_FORM, **( { - "enable_web_search": rag_app.state.config.ENABLE_RAG_WEB_SEARCH, + "enable_web_search": retrieval_app.state.config.ENABLE_RAG_WEB_SEARCH, "enable_image_generation": images_app.state.config.ENABLED, "enable_community_sharing": webui_app.state.config.ENABLE_COMMUNITY_SHARING, "enable_message_rating": webui_app.state.config.ENABLE_MESSAGE_RATING, @@ -2080,8 +2109,8 @@ async def get_app_config(request: Request): }, }, "file": { - "max_size": rag_app.state.config.FILE_MAX_SIZE, - "max_count": rag_app.state.config.FILE_MAX_COUNT, + "max_size": retrieval_app.state.config.FILE_MAX_SIZE, + "max_count": retrieval_app.state.config.FILE_MAX_COUNT, }, "permissions": {**webui_app.state.config.USER_PERMISSIONS}, } @@ -2153,7 +2182,8 @@ async def get_app_changelog(): @app.get("/api/version/updates") async def get_app_latest_release_version(): try: - async with aiohttp.ClientSession(trust_env=True) as session: + timeout = aiohttp.ClientTimeout(total=1) + async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session: async with session.get( "https://api.github.com/repos/open-webui/open-webui/releases/latest" ) as response: @@ -2162,11 +2192,9 @@ async def get_app_latest_release_version(): latest_version = data["tag_name"] return {"current": VERSION, "latest": latest_version[1:]} - except aiohttp.ClientError: - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail=ERROR_MESSAGES.RATE_LIMIT_EXCEEDED, - ) + except Exception as e: + log.debug(e) + return {"current": VERSION, "latest": VERSION} ############################ diff --git a/backend/open_webui/migrations/scripts/revision.py b/backend/open_webui/migrations/scripts/revision.py deleted file mode 100644 index 32ebc9e35c..0000000000 --- a/backend/open_webui/migrations/scripts/revision.py +++ /dev/null @@ -1,19 +0,0 @@ -from alembic import command -from alembic.config import Config - -from open_webui.env import OPEN_WEBUI_DIR - -alembic_cfg = Config(OPEN_WEBUI_DIR / "alembic.ini") - -# Set the script location dynamically -migrations_path = OPEN_WEBUI_DIR / "migrations" -alembic_cfg.set_main_option("script_location", str(migrations_path)) - - -def revision(message: str) -> None: - command.revision(alembic_cfg, message=message, autogenerate=False) - - -if __name__ == "__main__": - input_message = input("Enter the revision message: ") - revision(input_message) diff --git a/backend/open_webui/migrations/util.py b/backend/open_webui/migrations/util.py index 401bb94d03..955066602a 100644 --- a/backend/open_webui/migrations/util.py +++ b/backend/open_webui/migrations/util.py @@ -7,3 +7,9 @@ def get_existing_tables(): inspector = Inspector.from_engine(con) tables = set(inspector.get_table_names()) return tables + + +def get_revision_id(): + import uuid + + return str(uuid.uuid4()).replace("-", "")[:12] diff --git a/backend/open_webui/migrations/versions/6a39f3d8e55c_add_knowledge_table.py b/backend/open_webui/migrations/versions/6a39f3d8e55c_add_knowledge_table.py new file mode 100644 index 0000000000..881e6ae641 --- /dev/null +++ b/backend/open_webui/migrations/versions/6a39f3d8e55c_add_knowledge_table.py @@ -0,0 +1,80 @@ +"""Add knowledge table + +Revision ID: 6a39f3d8e55c +Revises: c0fbf31ca0db +Create Date: 2024-10-01 14:02:35.241684 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.sql import table, column, select +import json + + +revision = "6a39f3d8e55c" +down_revision = "c0fbf31ca0db" +branch_labels = None +depends_on = None + + +def upgrade(): + # Creating the 'knowledge' table + print("Creating knowledge table") + knowledge_table = op.create_table( + "knowledge", + sa.Column("id", sa.Text(), primary_key=True), + sa.Column("user_id", sa.Text(), nullable=False), + sa.Column("name", sa.Text(), nullable=False), + sa.Column("description", sa.Text(), nullable=True), + sa.Column("data", sa.JSON(), nullable=True), + sa.Column("meta", sa.JSON(), nullable=True), + sa.Column("created_at", sa.BigInteger(), nullable=False), + sa.Column("updated_at", sa.BigInteger(), nullable=True), + ) + + print("Migrating data from document table to knowledge table") + # Representation of the existing 'document' table + document_table = table( + "document", + column("collection_name", sa.String()), + column("user_id", sa.String()), + column("name", sa.String()), + column("title", sa.Text()), + column("content", sa.Text()), + column("timestamp", sa.BigInteger()), + ) + + # Select all from existing document table + documents = op.get_bind().execute( + select( + document_table.c.collection_name, + document_table.c.user_id, + document_table.c.name, + document_table.c.title, + document_table.c.content, + document_table.c.timestamp, + ) + ) + + # Insert data into knowledge table from document table + for doc in documents: + op.get_bind().execute( + knowledge_table.insert().values( + id=doc.collection_name, + user_id=doc.user_id, + description=doc.name, + meta={ + "legacy": True, + "document": True, + "tags": json.loads(doc.content or "{}").get("tags", []), + }, + name=doc.title, + created_at=doc.timestamp, + updated_at=doc.timestamp, # using created_at for both created_at and updated_at in project + ) + ) + + +def downgrade(): + op.drop_table("knowledge") diff --git a/backend/open_webui/migrations/versions/c0fbf31ca0db_update_file_table.py b/backend/open_webui/migrations/versions/c0fbf31ca0db_update_file_table.py new file mode 100644 index 0000000000..5f7f2abf70 --- /dev/null +++ b/backend/open_webui/migrations/versions/c0fbf31ca0db_update_file_table.py @@ -0,0 +1,32 @@ +"""Update file table + +Revision ID: c0fbf31ca0db +Revises: ca81bd47c050 +Create Date: 2024-09-20 15:26:35.241684 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "c0fbf31ca0db" +down_revision: Union[str, None] = "ca81bd47c050" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("file", sa.Column("hash", sa.Text(), nullable=True)) + op.add_column("file", sa.Column("data", sa.JSON(), nullable=True)) + op.add_column("file", sa.Column("updated_at", sa.BigInteger(), nullable=True)) + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("file", "updated_at") + op.drop_column("file", "data") + op.drop_column("file", "hash") diff --git a/backend/open_webui/utils/misc.py b/backend/open_webui/utils/misc.py index bdce74b057..a5af492ba3 100644 --- a/backend/open_webui/utils/misc.py +++ b/backend/open_webui/utils/misc.py @@ -122,7 +122,7 @@ def openai_chat_completion_message_template( ) -> dict: template = openai_chat_message_template(model) template["object"] = "chat.completion" - if message: + if message is not None: template["choices"][0]["message"] = {"content": message, "role": "assistant"} template["choices"][0]["finish_reason"] = "stop" return template diff --git a/backend/open_webui/utils/schemas.py b/backend/open_webui/utils/schemas.py index 958e57318d..4d1d448cd7 100644 --- a/backend/open_webui/utils/schemas.py +++ b/backend/open_webui/utils/schemas.py @@ -104,5 +104,9 @@ def json_schema_to_pydantic_type(json_schema: dict[str, Any]) -> Any: return Optional[Any] # Use Optional[Any] for nullable fields elif type_ == "literal": return Literal[literal_eval(json_schema.get("enum"))] + elif type_ == "optional": + inner_schema = json_schema.get("items", {"type": "string"}) + inner_type = json_schema_to_pydantic_type(inner_schema) + return Optional[inner_type] else: raise ValueError(f"Unsupported JSON schema type: {type_}") diff --git a/backend/requirements.txt b/backend/requirements.txt index 2554bb5f88..80b4d541f7 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,6 +1,6 @@ fastapi==0.111.0 uvicorn[standard]==0.30.6 -pydantic==2.8.2 +pydantic==2.9.2 python-multipart==0.0.9 Flask==3.0.3 @@ -11,7 +11,7 @@ python-jose==3.3.0 passlib[bcrypt]==1.7.4 requests==2.32.3 -aiohttp==3.10.5 +aiohttp==3.10.8 sqlalchemy==2.0.32 alembic==1.13.2 @@ -36,16 +36,18 @@ tiktoken langchain==0.2.15 langchain-community==0.2.12 -langchain-chroma==0.1.2 +langchain-chroma==0.1.4 fake-useragent==1.5.1 -chromadb==0.5.5 -pymilvus==2.4.6 +chromadb==0.5.9 +pymilvus==2.4.7 sentence-transformers==3.0.1 colbert-ai==0.2.21 einops==0.8.0 + +ftfy==6.2.3 pypdf==4.3.1 docx2txt==0.8 python-pptx==1.0.0 @@ -53,7 +55,7 @@ unstructured==0.15.9 nltk==3.9.1 Markdown==3.7 pypandoc==1.13 -pandas==2.2.2 +pandas==2.2.3 openpyxl==3.1.5 pyxlsb==1.0.10 xlrd==2.0.1 @@ -78,7 +80,7 @@ pytube==15.0.0 extract_msg pydub -duckduckgo-search~=6.2.11 +duckduckgo-search~=6.2.13 ## Tests docker~=7.1.0 diff --git a/bun.lockb b/bun.lockb deleted file mode 100755 index e0a038da06..0000000000 Binary files a/bun.lockb and /dev/null differ diff --git a/package-lock.json b/package-lock.json index 5ab728c9c3..5bedd09d87 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,15 +1,16 @@ { "name": "open-webui", - "version": "0.3.30", + "version": "0.3.31", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "open-webui", - "version": "0.3.30", + "version": "0.3.31", "dependencies": { "@codemirror/lang-javascript": "^6.2.2", "@codemirror/lang-python": "^6.1.6", + "@codemirror/language-data": "^6.5.1", "@codemirror/theme-one-dark": "^6.1.2", "@pyscript/core": "^0.4.32", "@sveltejs/adapter-node": "^2.0.0", @@ -33,6 +34,7 @@ "marked": "^9.1.0", "mermaid": "^10.9.1", "paneforge": "^0.0.6", + "panzoom": "^9.4.3", "pyodide": "^0.26.1", "socket.io-client": "^4.2.0", "sortablejs": "^1.15.2", @@ -50,7 +52,7 @@ "@typescript-eslint/eslint-plugin": "^6.17.0", "@typescript-eslint/parser": "^6.17.0", "autoprefixer": "^10.4.16", - "cypress": "^13.8.1", + "cypress": "^13.15.0", "eslint": "^8.56.0", "eslint-config-prettier": "^9.1.0", "eslint-plugin-cypress": "^3.4.0", @@ -69,7 +71,7 @@ "vitest": "^1.6.0" }, "engines": { - "node": ">=18.13.0 <=21.x.x", + "node": ">=18.13.0 <=22.x.x", "npm": ">=6.0.0" } }, @@ -150,6 +152,77 @@ "@lezer/common": "^1.1.0" } }, + "node_modules/@codemirror/lang-angular": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/@codemirror/lang-angular/-/lang-angular-0.1.3.tgz", + "integrity": "sha512-xgeWGJQQl1LyStvndWtruUvb4SnBZDAu/gvFH/ZU+c0W25tQR8e5hq7WTwiIY2dNxnf+49mRiGI/9yxIwB6f5w==", + "dependencies": { + "@codemirror/lang-html": "^6.0.0", + "@codemirror/lang-javascript": "^6.1.2", + "@codemirror/language": "^6.0.0", + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.3.3" + } + }, + "node_modules/@codemirror/lang-cpp": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/@codemirror/lang-cpp/-/lang-cpp-6.0.2.tgz", + "integrity": "sha512-6oYEYUKHvrnacXxWxYa6t4puTlbN3dgV662BDfSH8+MfjQjVmP697/KYTDOqpxgerkvoNm7q5wlFMBeX8ZMocg==", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@lezer/cpp": "^1.0.0" + } + }, + "node_modules/@codemirror/lang-css": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/@codemirror/lang-css/-/lang-css-6.3.0.tgz", + "integrity": "sha512-CyR4rUNG9OYcXDZwMPvJdtb6PHbBDKUc/6Na2BIwZ6dKab1JQqKa4di+RNRY9Myn7JB81vayKwJeQ7jEdmNVDA==", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@lezer/common": "^1.0.2", + "@lezer/css": "^1.1.7" + } + }, + "node_modules/@codemirror/lang-go": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/@codemirror/lang-go/-/lang-go-6.0.1.tgz", + "integrity": "sha512-7fNvbyNylvqCphW9HD6WFnRpcDjr+KXX/FgqXy5H5ZS0eC5edDljukm/yNgYkwTsgp2busdod50AOTIy6Jikfg==", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/language": "^6.6.0", + "@codemirror/state": "^6.0.0", + "@lezer/common": "^1.0.0", + "@lezer/go": "^1.0.0" + } + }, + "node_modules/@codemirror/lang-html": { + "version": "6.4.9", + "resolved": "https://registry.npmjs.org/@codemirror/lang-html/-/lang-html-6.4.9.tgz", + "integrity": "sha512-aQv37pIMSlueybId/2PVSP6NPnmurFDVmZwzc7jszd2KAF8qd4VBbvNYPXWQq90WIARjsdVkPbw29pszmHws3Q==", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/lang-css": "^6.0.0", + "@codemirror/lang-javascript": "^6.0.0", + "@codemirror/language": "^6.4.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.17.0", + "@lezer/common": "^1.0.0", + "@lezer/css": "^1.1.0", + "@lezer/html": "^1.3.0" + } + }, + "node_modules/@codemirror/lang-java": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/@codemirror/lang-java/-/lang-java-6.0.1.tgz", + "integrity": "sha512-OOnmhH67h97jHzCuFaIEspbmsT98fNdhVhmA3zCxW0cn7l8rChDhZtwiwJ/JOKXgfm4J+ELxQihxaI7bj7mJRg==", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@lezer/java": "^1.0.0" + } + }, "node_modules/@codemirror/lang-javascript": { "version": "6.2.2", "resolved": "https://registry.npmjs.org/@codemirror/lang-javascript/-/lang-javascript-6.2.2.tgz", @@ -164,6 +237,68 @@ "@lezer/javascript": "^1.0.0" } }, + "node_modules/@codemirror/lang-json": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/@codemirror/lang-json/-/lang-json-6.0.1.tgz", + "integrity": "sha512-+T1flHdgpqDDlJZ2Lkil/rLiRy684WMLc74xUnjJH48GQdfJo/pudlTRreZmKwzP8/tGdKf83wlbAdOCzlJOGQ==", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@lezer/json": "^1.0.0" + } + }, + "node_modules/@codemirror/lang-less": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/@codemirror/lang-less/-/lang-less-6.0.2.tgz", + "integrity": "sha512-EYdQTG22V+KUUk8Qq582g7FMnCZeEHsyuOJisHRft/mQ+ZSZ2w51NupvDUHiqtsOy7It5cHLPGfHQLpMh9bqpQ==", + "dependencies": { + "@codemirror/lang-css": "^6.2.0", + "@codemirror/language": "^6.0.0", + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, + "node_modules/@codemirror/lang-liquid": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/@codemirror/lang-liquid/-/lang-liquid-6.2.1.tgz", + "integrity": "sha512-J1Mratcm6JLNEiX+U2OlCDTysGuwbHD76XwuL5o5bo9soJtSbz2g6RU3vGHFyS5DC8rgVmFSzi7i6oBftm7tnA==", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/lang-html": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.0.0", + "@lezer/common": "^1.0.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.3.1" + } + }, + "node_modules/@codemirror/lang-markdown": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/@codemirror/lang-markdown/-/lang-markdown-6.3.0.tgz", + "integrity": "sha512-lYrI8SdL/vhd0w0aHIEvIRLRecLF7MiiRfzXFZY94dFwHqC9HtgxgagJ8fyYNBldijGatf9wkms60d8SrAj6Nw==", + "dependencies": { + "@codemirror/autocomplete": "^6.7.1", + "@codemirror/lang-html": "^6.0.0", + "@codemirror/language": "^6.3.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.0.0", + "@lezer/common": "^1.2.1", + "@lezer/markdown": "^1.0.0" + } + }, + "node_modules/@codemirror/lang-php": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/@codemirror/lang-php/-/lang-php-6.0.1.tgz", + "integrity": "sha512-ublojMdw/PNWa7qdN5TMsjmqkNuTBD3k6ndZ4Z0S25SBAiweFGyY68AS3xNcIOlb6DDFDvKlinLQ40vSLqf8xA==", + "dependencies": { + "@codemirror/lang-html": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@lezer/common": "^1.0.0", + "@lezer/php": "^1.0.0" + } + }, "node_modules/@codemirror/lang-python": { "version": "6.1.6", "resolved": "https://registry.npmjs.org/@codemirror/lang-python/-/lang-python-6.1.6.tgz", @@ -176,6 +311,90 @@ "@lezer/python": "^1.1.4" } }, + "node_modules/@codemirror/lang-rust": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/@codemirror/lang-rust/-/lang-rust-6.0.1.tgz", + "integrity": "sha512-344EMWFBzWArHWdZn/NcgkwMvZIWUR1GEBdwG8FEp++6o6vT6KL9V7vGs2ONsKxxFUPXKI0SPcWhyYyl2zPYxQ==", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@lezer/rust": "^1.0.0" + } + }, + "node_modules/@codemirror/lang-sass": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/@codemirror/lang-sass/-/lang-sass-6.0.2.tgz", + "integrity": "sha512-l/bdzIABvnTo1nzdY6U+kPAC51czYQcOErfzQ9zSm9D8GmNPD0WTW8st/CJwBTPLO8jlrbyvlSEcN20dc4iL0Q==", + "dependencies": { + "@codemirror/lang-css": "^6.2.0", + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@lezer/common": "^1.0.2", + "@lezer/sass": "^1.0.0" + } + }, + "node_modules/@codemirror/lang-sql": { + "version": "6.8.0", + "resolved": "https://registry.npmjs.org/@codemirror/lang-sql/-/lang-sql-6.8.0.tgz", + "integrity": "sha512-aGLmY4OwGqN3TdSx3h6QeA1NrvaYtF7kkoWR/+W7/JzB0gQtJ+VJxewlnE3+VImhA4WVlhmkJr109PefOOhjLg==", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, + "node_modules/@codemirror/lang-vue": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/@codemirror/lang-vue/-/lang-vue-0.1.3.tgz", + "integrity": "sha512-QSKdtYTDRhEHCfo5zOShzxCmqKJvgGrZwDQSdbvCRJ5pRLWBS7pD/8e/tH44aVQT6FKm0t6RVNoSUWHOI5vNug==", + "dependencies": { + "@codemirror/lang-html": "^6.0.0", + "@codemirror/lang-javascript": "^6.1.2", + "@codemirror/language": "^6.0.0", + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.3.1" + } + }, + "node_modules/@codemirror/lang-wast": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/@codemirror/lang-wast/-/lang-wast-6.0.2.tgz", + "integrity": "sha512-Imi2KTpVGm7TKuUkqyJ5NRmeFWF7aMpNiwHnLQe0x9kmrxElndyH0K6H/gXtWwY6UshMRAhpENsgfpSwsgmC6Q==", + "dependencies": { + "@codemirror/language": "^6.0.0", + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, + "node_modules/@codemirror/lang-xml": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/@codemirror/lang-xml/-/lang-xml-6.1.0.tgz", + "integrity": "sha512-3z0blhicHLfwi2UgkZYRPioSgVTo9PV5GP5ducFH6FaHy0IAJRg+ixj5gTR1gnT/glAIC8xv4w2VL1LoZfs+Jg==", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/language": "^6.4.0", + "@codemirror/state": "^6.0.0", + "@codemirror/view": "^6.0.0", + "@lezer/common": "^1.0.0", + "@lezer/xml": "^1.0.0" + } + }, + "node_modules/@codemirror/lang-yaml": { + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/@codemirror/lang-yaml/-/lang-yaml-6.1.1.tgz", + "integrity": "sha512-HV2NzbK9bbVnjWxwObuZh5FuPCowx51mEfoFT9y3y+M37fA3+pbxx4I7uePuygFzDsAmCTwQSc/kXh/flab4uw==", + "dependencies": { + "@codemirror/autocomplete": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/state": "^6.0.0", + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.2.0", + "@lezer/yaml": "^1.0.0" + } + }, "node_modules/@codemirror/language": { "version": "6.10.2", "resolved": "https://registry.npmjs.org/@codemirror/language/-/language-6.10.2.tgz", @@ -189,6 +408,43 @@ "style-mod": "^4.0.0" } }, + "node_modules/@codemirror/language-data": { + "version": "6.5.1", + "resolved": "https://registry.npmjs.org/@codemirror/language-data/-/language-data-6.5.1.tgz", + "integrity": "sha512-0sWxeUSNlBr6OmkqybUTImADFUP0M3P0IiSde4nc24bz/6jIYzqYSgkOSLS+CBIoW1vU8Q9KUWXscBXeoMVC9w==", + "dependencies": { + "@codemirror/lang-angular": "^0.1.0", + "@codemirror/lang-cpp": "^6.0.0", + "@codemirror/lang-css": "^6.0.0", + "@codemirror/lang-go": "^6.0.0", + "@codemirror/lang-html": "^6.0.0", + "@codemirror/lang-java": "^6.0.0", + "@codemirror/lang-javascript": "^6.0.0", + "@codemirror/lang-json": "^6.0.0", + "@codemirror/lang-less": "^6.0.0", + "@codemirror/lang-liquid": "^6.0.0", + "@codemirror/lang-markdown": "^6.0.0", + "@codemirror/lang-php": "^6.0.0", + "@codemirror/lang-python": "^6.0.0", + "@codemirror/lang-rust": "^6.0.0", + "@codemirror/lang-sass": "^6.0.0", + "@codemirror/lang-sql": "^6.0.0", + "@codemirror/lang-vue": "^0.1.1", + "@codemirror/lang-wast": "^6.0.0", + "@codemirror/lang-xml": "^6.0.0", + "@codemirror/lang-yaml": "^6.0.0", + "@codemirror/language": "^6.0.0", + "@codemirror/legacy-modes": "^6.4.0" + } + }, + "node_modules/@codemirror/legacy-modes": { + "version": "6.4.1", + "resolved": "https://registry.npmjs.org/@codemirror/legacy-modes/-/legacy-modes-6.4.1.tgz", + "integrity": "sha512-vdg3XY7OAs5uLDx2Iw+cGfnwtd7kM+Et/eMsqAGTfT/JKiVBQZXosTzjEbWAi/FrY6DcQIz8mQjBozFHZEUWQA==", + "dependencies": { + "@codemirror/language": "^6.0.0" + } + }, "node_modules/@codemirror/lint": { "version": "6.8.0", "resolved": "https://registry.npmjs.org/@codemirror/lint/-/lint-6.8.0.tgz", @@ -246,9 +502,9 @@ } }, "node_modules/@cypress/request": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/@cypress/request/-/request-3.0.1.tgz", - "integrity": "sha512-TWivJlJi8ZDx2wGOw1dbLuHJKUYX7bWySw377nlnGOW3hP9/MUKIsEdXT/YngWxVdgNCHRBmFlBipE+5/2ZZlQ==", + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/@cypress/request/-/request-3.0.5.tgz", + "integrity": "sha512-v+XHd9XmWbufxF1/bTaVm2yhbxY+TB4YtWRqF2zaXBlDNMkls34KiATz0AVDLavL3iB6bQk9/7n3oY1EoLSWGA==", "dev": true, "dependencies": { "aws-sign2": "~0.7.0", @@ -257,14 +513,14 @@ "combined-stream": "~1.0.6", "extend": "~3.0.2", "forever-agent": "~0.6.1", - "form-data": "~2.3.2", - "http-signature": "~1.3.6", + "form-data": "~4.0.0", + "http-signature": "~1.4.0", "is-typedarray": "~1.0.0", "isstream": "~0.1.2", "json-stringify-safe": "~5.0.1", "mime-types": "~2.1.19", "performance-now": "^2.1.0", - "qs": "6.10.4", + "qs": "6.13.0", "safe-buffer": "^5.1.2", "tough-cookie": "^4.1.3", "tunnel-agent": "^0.6.0", @@ -949,6 +1205,36 @@ "resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.2.1.tgz", "integrity": "sha512-yemX0ZD2xS/73llMZIK6KplkjIjf2EvAHcinDi/TfJ9hS25G0388+ClHt6/3but0oOxinTcQHJLDXh6w1crzFQ==" }, + "node_modules/@lezer/cpp": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@lezer/cpp/-/cpp-1.1.2.tgz", + "integrity": "sha512-macwKtyeUO0EW86r3xWQCzOV9/CF8imJLpJlPv3sDY57cPGeUZ8gXWOWNlJr52TVByMV3PayFQCA5SHEERDmVQ==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, + "node_modules/@lezer/css": { + "version": "1.1.9", + "resolved": "https://registry.npmjs.org/@lezer/css/-/css-1.1.9.tgz", + "integrity": "sha512-TYwgljcDv+YrV0MZFFvYFQHCfGgbPMR6nuqLabBdmZoFH3EP1gvw8t0vae326Ne3PszQkbXfVBjCnf3ZVCr0bA==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, + "node_modules/@lezer/go": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@lezer/go/-/go-1.0.0.tgz", + "integrity": "sha512-co9JfT3QqX1YkrMmourYw2Z8meGC50Ko4d54QEcQbEYpvdUvN4yb0NBZdn/9ertgvjsySxHsKzH3lbm3vqJ4Jw==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, "node_modules/@lezer/highlight": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/@lezer/highlight/-/highlight-1.2.0.tgz", @@ -957,6 +1243,26 @@ "@lezer/common": "^1.0.0" } }, + "node_modules/@lezer/html": { + "version": "1.3.10", + "resolved": "https://registry.npmjs.org/@lezer/html/-/html-1.3.10.tgz", + "integrity": "sha512-dqpT8nISx/p9Do3AchvYGV3qYc4/rKr3IBZxlHmpIKam56P47RSHkSF5f13Vu9hebS1jM0HmtJIwLbWz1VIY6w==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, + "node_modules/@lezer/java": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@lezer/java/-/java-1.1.2.tgz", + "integrity": "sha512-3j8X70JvYf0BZt8iSRLXLkt0Ry1hVUgH6wT32yBxH/Xi55nW2VMhc1Az4SKwu4YGSmxCm1fsqDDcHTuFjC8pmg==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, "node_modules/@lezer/javascript": { "version": "1.4.16", "resolved": "https://registry.npmjs.org/@lezer/javascript/-/javascript-1.4.16.tgz", @@ -967,6 +1273,16 @@ "@lezer/lr": "^1.3.0" } }, + "node_modules/@lezer/json": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@lezer/json/-/json-1.0.2.tgz", + "integrity": "sha512-xHT2P4S5eeCYECyKNPhr4cbEL9tc8w83SPwRC373o9uEdrvGKTZoJVAGxpOsZckMlEh9W23Pc72ew918RWQOBQ==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, "node_modules/@lezer/lr": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/@lezer/lr/-/lr-1.4.1.tgz", @@ -975,6 +1291,25 @@ "@lezer/common": "^1.0.0" } }, + "node_modules/@lezer/markdown": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/@lezer/markdown/-/markdown-1.3.1.tgz", + "integrity": "sha512-DGlzU/i8DC8k0uz1F+jeePrkATl0jWakauTzftMQOcbaMkHbNSRki/4E2tOzJWsVpoKYhe7iTJ03aepdwVUXUA==", + "dependencies": { + "@lezer/common": "^1.0.0", + "@lezer/highlight": "^1.0.0" + } + }, + "node_modules/@lezer/php": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@lezer/php/-/php-1.0.2.tgz", + "integrity": "sha512-GN7BnqtGRpFyeoKSEqxvGvhJQiI4zkgmYnDk/JIyc7H7Ifc1tkPnUn/R2R8meH3h/aBf5rzjvU8ZQoyiNDtDrA==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.1.0" + } + }, "node_modules/@lezer/python": { "version": "1.1.14", "resolved": "https://registry.npmjs.org/@lezer/python/-/python-1.1.14.tgz", @@ -985,6 +1320,46 @@ "@lezer/lr": "^1.0.0" } }, + "node_modules/@lezer/rust": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@lezer/rust/-/rust-1.0.2.tgz", + "integrity": "sha512-Lz5sIPBdF2FUXcWeCu1//ojFAZqzTQNRga0aYv6dYXqJqPfMdCAI0NzajWUd4Xijj1IKJLtjoXRPMvTKWBcqKg==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, + "node_modules/@lezer/sass": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/@lezer/sass/-/sass-1.0.7.tgz", + "integrity": "sha512-8HLlOkuX/SMHOggI2DAsXUw38TuURe+3eQ5hiuk9QmYOUyC55B1dYEIMkav5A4IELVaW4e1T4P9WRiI5ka4mdw==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, + "node_modules/@lezer/xml": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@lezer/xml/-/xml-1.0.5.tgz", + "integrity": "sha512-VFouqOzmUWfIg+tfmpcdV33ewtK+NSwd4ngSe1aG7HFb4BN0ExyY1b8msp+ndFrnlG4V4iC8yXacjFtrwERnaw==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.0.0" + } + }, + "node_modules/@lezer/yaml": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@lezer/yaml/-/yaml-1.0.3.tgz", + "integrity": "sha512-GuBLekbw9jDBDhGur82nuwkxKQ+a3W5H0GfaAthDXcAu+XdpS43VlnxA9E9hllkpSP5ellRDKjLLj7Lu9Wr6xA==", + "dependencies": { + "@lezer/common": "^1.2.0", + "@lezer/highlight": "^1.0.0", + "@lezer/lr": "^1.4.0" + } + }, "node_modules/@melt-ui/svelte": { "version": "0.76.0", "resolved": "https://registry.npmjs.org/@melt-ui/svelte/-/svelte-0.76.0.tgz", @@ -1413,14 +1788,14 @@ } }, "node_modules/@sveltejs/kit": { - "version": "2.5.20", - "resolved": "https://registry.npmjs.org/@sveltejs/kit/-/kit-2.5.20.tgz", - "integrity": "sha512-47rJ5BoYwURE/Rp7FNMLp3NzdbWC9DQ/PmKd0mebxT2D/PrPxZxcLImcD3zsWdX2iS6oJk8ITJbO/N2lWnnUqA==", + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/@sveltejs/kit/-/kit-2.6.2.tgz", + "integrity": "sha512-ruogrSPXjckn5poUiZU8VYNCSPHq66SFR1AATvOikQxtP6LNI4niAZVX/AWZRe/EPDG3oY2DNJ9c5z7u0t2NAQ==", "hasInstallScript": true, "dependencies": { "@types/cookie": "^0.6.0", - "cookie": "^0.6.0", - "devalue": "^5.0.0", + "cookie": "^0.7.0", + "devalue": "^5.1.0", "esm-env": "^1.0.0", "import-meta-resolve": "^4.1.0", "kleur": "^4.1.5", @@ -1438,7 +1813,7 @@ "node": ">=18.13" }, "peerDependencies": { - "@sveltejs/vite-plugin-svelte": "^3.0.0", + "@sveltejs/vite-plugin-svelte": "^3.0.0 || ^4.0.0-next.1", "svelte": "^4.0.0 || ^5.0.0-next.0", "vite": "^5.0.3" } @@ -2061,6 +2436,14 @@ "url": "https://github.com/sponsors/epoberezkin" } }, + "node_modules/amator": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/amator/-/amator-1.1.0.tgz", + "integrity": "sha512-V5+aH8pe+Z3u/UG3L3pG3BaFQGXAyXHVQDroRwjPHdh08bcUEchAVsU1MCuJSCaU5o60wTK6KaE6te5memzgYw==", + "dependencies": { + "bezier-easing": "^2.0.3" + } + }, "node_modules/ansi-colors": { "version": "4.1.3", "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.3.tgz", @@ -2292,9 +2675,9 @@ } }, "node_modules/aws4": { - "version": "1.12.0", - "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.12.0.tgz", - "integrity": "sha512-NmWvPnx0F1SfrQbYwOi7OeaNGokp9XhzNioJ/CSBs8Qa4vxug81mhJEAVZwxXuBmYB5KDRfMq/F3RR0BIU7sWg==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.13.2.tgz", + "integrity": "sha512-lHe62zvbTB5eEABUVi/AwVh0ZKY9rMMDhmm+eeyuuUQbQ3+J+fONVQOZyj+DdrvD4BY33uYniyRJ4UJIaSKAfw==", "dev": true }, "node_modules/axobject-query": { @@ -2351,6 +2734,11 @@ "tweetnacl": "^0.14.3" } }, + "node_modules/bezier-easing": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/bezier-easing/-/bezier-easing-2.1.0.tgz", + "integrity": "sha512-gbIqZ/eslnUFC1tjEvtz0sgx+xTK20wDnYMIA27VA04R7w6xxXQPZDbibjA9DTWZRA2CXtwHykkVzlCaAJAZig==" + }, "node_modules/binary-extensions": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", @@ -3083,9 +3471,9 @@ "dev": true }, "node_modules/cookie": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz", - "integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==", + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.1.tgz", + "integrity": "sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==", "engines": { "node": ">= 0.6" } @@ -3187,13 +3575,13 @@ } }, "node_modules/cypress": { - "version": "13.8.1", - "resolved": "https://registry.npmjs.org/cypress/-/cypress-13.8.1.tgz", - "integrity": "sha512-Uk6ovhRbTg6FmXjeZW/TkbRM07KPtvM5gah1BIMp4Y2s+i/NMxgaLw0+PbYTOdw1+egE0FP3mWRiGcRkjjmhzA==", + "version": "13.15.0", + "resolved": "https://registry.npmjs.org/cypress/-/cypress-13.15.0.tgz", + "integrity": "sha512-53aO7PwOfi604qzOkCSzNlWquCynLlKE/rmmpSPcziRH6LNfaDUAklQT6WJIsD8ywxlIy+uVZsnTMCCQVd2kTw==", "dev": true, "hasInstallScript": true, "dependencies": { - "@cypress/request": "^3.0.0", + "@cypress/request": "^3.0.4", "@cypress/xvfb": "^1.2.4", "@types/sinonjs__fake-timers": "8.1.1", "@types/sizzle": "^2.3.2", @@ -3232,7 +3620,7 @@ "request-progress": "^3.0.0", "semver": "^7.5.3", "supports-color": "^8.1.1", - "tmp": "~0.2.1", + "tmp": "~0.2.3", "untildify": "^4.0.0", "yauzl": "^2.10.0" }, @@ -3870,9 +4258,9 @@ } }, "node_modules/devalue": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/devalue/-/devalue-5.0.0.tgz", - "integrity": "sha512-gO+/OMXF7488D+u3ue+G7Y4AA3ZmUnB3eHJXmBTgNHvr4ZNzl36A0ZtG+XCRNYCkYx/bFmw4qtkoFLa+wSrwAA==" + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/devalue/-/devalue-5.1.1.tgz", + "integrity": "sha512-maua5KUiapvEwiEAe+XnlZ3Rh0GD+qI1J/nb9vrJc3muPXvcF/8gXYTWF76+5DAqHyDUtOIImEuo0YKE9mshVw==" }, "node_modules/didyoumean": { "version": "1.2.2", @@ -4715,17 +5103,17 @@ } }, "node_modules/form-data": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz", - "integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", "dev": true, "dependencies": { "asynckit": "^0.4.0", - "combined-stream": "^1.0.6", + "combined-stream": "^1.0.8", "mime-types": "^2.1.12" }, "engines": { - "node": ">= 0.12" + "node": ">= 6" } }, "node_modules/fraction.js": { @@ -5211,14 +5599,14 @@ } }, "node_modules/http-signature": { - "version": "1.3.6", - "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.3.6.tgz", - "integrity": "sha512-3adrsD6zqo4GsTqtO7FyrejHNv+NgiIfAfv68+jVlFmSr9OGy7zrxONceFRLKvnnZA5jbxQBX1u9PpB6Wi32Gw==", + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.4.0.tgz", + "integrity": "sha512-G5akfn7eKbpDN+8nPS/cb57YeA1jLTVxjpCj7tmm3QKPdyDy7T+qSC40e9ptydSWvkwjSXw1VbkpyEm39ukeAg==", "dev": true, "dependencies": { "assert-plus": "^1.0.0", "jsprim": "^2.0.2", - "sshpk": "^1.14.1" + "sshpk": "^1.18.0" }, "engines": { "node": ">=0.10" @@ -6804,6 +7192,11 @@ "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", "dev": true }, + "node_modules/ngraph.events": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/ngraph.events/-/ngraph.events-1.2.2.tgz", + "integrity": "sha512-JsUbEOzANskax+WSYiAPETemLWYXmixuPAlmZmhIbIj6FH/WDgEGCGnRwUQBK0GjOnVm8Ui+e5IJ+5VZ4e32eQ==" + }, "node_modules/node-releases": { "version": "2.0.14", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.14.tgz", @@ -6888,10 +7281,13 @@ } }, "node_modules/object-inspect": { - "version": "1.13.1", - "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz", - "integrity": "sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.2.tgz", + "integrity": "sha512-IRZSRuzJiynemAXPYtPe5BoI/RESNYR7TYm50MC5Mqbd3Jmw5y790sErYw3V6SryFJD64b74qQQs9wn5Bg/k3g==", "dev": true, + "engines": { + "node": ">= 0.4" + }, "funding": { "url": "https://github.com/sponsors/ljharb" } @@ -6998,6 +7394,16 @@ "svelte": "^4.0.0 || ^5.0.0-next.1" } }, + "node_modules/panzoom": { + "version": "9.4.3", + "resolved": "https://registry.npmjs.org/panzoom/-/panzoom-9.4.3.tgz", + "integrity": "sha512-xaxCpElcRbQsUtIdwlrZA90P90+BHip4Vda2BC8MEb4tkI05PmR6cKECdqUCZ85ZvBHjpI9htJrZBxV5Gp/q/w==", + "dependencies": { + "amator": "^1.1.0", + "ngraph.events": "^1.2.2", + "wheel": "^1.0.0" + } + }, "node_modules/parent-module": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", @@ -7583,12 +7989,12 @@ } }, "node_modules/qs": { - "version": "6.10.4", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.10.4.tgz", - "integrity": "sha512-OQiU+C+Ds5qiH91qh/mg0w+8nwQuLjM4F4M/PbmhDOoYehPh+Fb0bDjtR1sOvy7YKxvj28Y/M0PhP5uVX0kB+g==", + "version": "6.13.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz", + "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==", "dev": true, "dependencies": { - "side-channel": "^1.0.4" + "side-channel": "^1.0.6" }, "engines": { "node": ">=0.6" @@ -9025,9 +9431,9 @@ } }, "node_modules/tough-cookie": { - "version": "4.1.3", - "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.3.tgz", - "integrity": "sha512-aX/y5pVRkfRnfmuX+OdbSdXvPe6ieKX/G2s7e98f4poJHnqH3281gDPm/metm6E/WRamfx7WC4HUqkWHfQHprw==", + "version": "4.1.4", + "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-4.1.4.tgz", + "integrity": "sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag==", "dev": true, "dependencies": { "psl": "^1.1.33", @@ -10104,6 +10510,11 @@ "resolved": "https://registry.npmjs.org/web-worker/-/web-worker-1.3.0.tgz", "integrity": "sha512-BSR9wyRsy/KOValMgd5kMyr3JzpdeoR9KVId8u5GVlTTAtNChlsE4yTxeY7zMdNSyOmoKBv8NH2qeRY9Tg+IaA==" }, + "node_modules/wheel": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/wheel/-/wheel-1.0.0.tgz", + "integrity": "sha512-XiCMHibOiqalCQ+BaNSwRoZ9FDTAvOsXxGHXChBugewDj7HC8VBIER71dEOiRH1fSdLbRCQzngKTSiZ06ZQzeA==" + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index 77bc183fd5..3acfcd7754 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "open-webui", - "version": "0.3.30", + "version": "0.3.31", "private": true, "scripts": { "dev": "npm run pyodide:fetch && vite dev --host", @@ -28,7 +28,7 @@ "@typescript-eslint/eslint-plugin": "^6.17.0", "@typescript-eslint/parser": "^6.17.0", "autoprefixer": "^10.4.16", - "cypress": "^13.8.1", + "cypress": "^13.15.0", "eslint": "^8.56.0", "eslint-config-prettier": "^9.1.0", "eslint-plugin-cypress": "^3.4.0", @@ -50,6 +50,7 @@ "dependencies": { "@codemirror/lang-javascript": "^6.2.2", "@codemirror/lang-python": "^6.1.6", + "@codemirror/language-data": "^6.5.1", "@codemirror/theme-one-dark": "^6.1.2", "@pyscript/core": "^0.4.32", "@sveltejs/adapter-node": "^2.0.0", @@ -73,6 +74,7 @@ "marked": "^9.1.0", "mermaid": "^10.9.1", "paneforge": "^0.0.6", + "panzoom": "^9.4.3", "pyodide": "^0.26.1", "socket.io-client": "^4.2.0", "sortablejs": "^1.15.2", @@ -82,7 +84,7 @@ "uuid": "^9.0.1" }, "engines": { - "node": ">=18.13.0 <=21.x.x", + "node": ">=18.13.0 <=22.x.x", "npm": ">=6.0.0" } } diff --git a/pyproject.toml b/pyproject.toml index b2558e4d1a..1df284f802 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,13 +46,15 @@ dependencies = [ "langchain-chroma==0.1.2", "fake-useragent==1.5.1", - "chromadb==0.5.5", - "pymilvus==2.4.6", + "chromadb==0.5.9", + "pymilvus==2.4.7", "sentence-transformers==3.0.1", "colbert-ai==0.2.21", "einops==0.8.0", + + "ftfy==6.2.3", "pypdf==4.3.1", "docx2txt==0.8", "python-pptx==1.0.0", diff --git a/src/app.css b/src/app.css index 65103b55ae..7a8bf59b01 100644 --- a/src/app.css +++ b/src/app.css @@ -62,6 +62,12 @@ li p { border-width: 1px; } +/* Dark theme scrollbar styles */ +.dark ::-webkit-scrollbar-thumb { + background-color: rgba(69, 69, 74, 0.8); /* Darker color for dark theme */ + border-color: rgba(0, 0, 0, var(--tw-border-opacity)); +} + ::-webkit-scrollbar { height: 0.4rem; width: 0.4rem; diff --git a/src/app.html b/src/app.html index d7f4513e78..f6e46c9cfb 100644 --- a/src/app.html +++ b/src/app.html @@ -3,6 +3,7 @@ + { return res; }; +export const uploadDir = async (token: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/files/upload/dir`, { + method: 'POST', + headers: { + Accept: 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const getFiles = async (token: string = '') => { let error = null; @@ -92,6 +118,40 @@ export const getFileById = async (token: string, id: string) => { return res; }; +export const updateFileDataContentById = async (token: string, id: string, content: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/files/${id}/data/content/update`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + content: content + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const getFileContentById = async (id: string) => { let error = null; diff --git a/src/lib/apis/knowledge/index.ts b/src/lib/apis/knowledge/index.ts new file mode 100644 index 0000000000..8428668996 --- /dev/null +++ b/src/lib/apis/knowledge/index.ts @@ -0,0 +1,308 @@ +import { WEBUI_API_BASE_URL } from '$lib/constants'; + +export const createNewKnowledge = async (token: string, name: string, description: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/create`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + name: name, + description: description + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const getKnowledgeItems = async (token: string = '') => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/`, { + method: 'GET', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const getKnowledgeById = async (token: string, id: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}`, { + method: 'GET', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +type KnowledgeUpdateForm = { + name?: string; + description?: string; + data?: object; +}; + +export const updateKnowledgeById = async (token: string, id: string, form: KnowledgeUpdateForm) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/update`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + name: form?.name ? form.name : undefined, + description: form?.description ? form.description : undefined, + data: form?.data ? form.data : undefined + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const addFileToKnowledgeById = async (token: string, id: string, fileId: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/add`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + file_id: fileId + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const updateFileFromKnowledgeById = async (token: string, id: string, fileId: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/update`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + file_id: fileId + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const removeFileFromKnowledgeById = async (token: string, id: string, fileId: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/file/remove`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + file_id: fileId + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const resetKnowledgeById = async (token: string, id: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/reset`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const deleteKnowledgeById = async (token: string, id: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/knowledge/${id}/delete`, { + method: 'DELETE', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .then((json) => { + return json; + }) + .catch((err) => { + error = err.detail; + + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; diff --git a/src/lib/apis/rag/index.ts b/src/lib/apis/retrieval/index.ts similarity index 78% rename from src/lib/apis/rag/index.ts rename to src/lib/apis/retrieval/index.ts index 3c0dba4b55..9f49e9c0fb 100644 --- a/src/lib/apis/rag/index.ts +++ b/src/lib/apis/retrieval/index.ts @@ -1,9 +1,9 @@ -import { RAG_API_BASE_URL } from '$lib/constants'; +import { RETRIEVAL_API_BASE_URL } from '$lib/constants'; export const getRAGConfig = async (token: string) => { let error = null; - const res = await fetch(`${RAG_API_BASE_URL}/config`, { + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/config`, { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -53,7 +53,7 @@ type RAGConfigForm = { export const updateRAGConfig = async (token: string, payload: RAGConfigForm) => { let error = null; - const res = await fetch(`${RAG_API_BASE_URL}/config/update`, { + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/config/update`, { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -83,7 +83,7 @@ export const updateRAGConfig = async (token: string, payload: RAGConfigForm) => export const getRAGTemplate = async (token: string) => { let error = null; - const res = await fetch(`${RAG_API_BASE_URL}/template`, { + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/template`, { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -110,7 +110,7 @@ export const getRAGTemplate = async (token: string) => { export const getQuerySettings = async (token: string) => { let error = null; - const res = await fetch(`${RAG_API_BASE_URL}/query/settings`, { + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/query/settings`, { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -143,7 +143,7 @@ type QuerySettings = { export const updateQuerySettings = async (token: string, settings: QuerySettings) => { let error = null; - const res = await fetch(`${RAG_API_BASE_URL}/query/settings/update`, { + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/query/settings/update`, { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -170,288 +170,10 @@ export const updateQuerySettings = async (token: string, settings: QuerySettings return res; }; -export const processDocToVectorDB = async (token: string, file_id: string) => { - let error = null; - - const res = await fetch(`${RAG_API_BASE_URL}/process/doc`, { - method: 'POST', - headers: { - Accept: 'application/json', - 'Content-Type': 'application/json', - authorization: `Bearer ${token}` - }, - body: JSON.stringify({ - file_id: file_id - }) - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .catch((err) => { - error = err.detail; - console.log(err); - return null; - }); - - if (error) { - throw error; - } - - return res; -}; - -export const uploadDocToVectorDB = async (token: string, collection_name: string, file: File) => { - const data = new FormData(); - data.append('file', file); - data.append('collection_name', collection_name); - - let error = null; - - const res = await fetch(`${RAG_API_BASE_URL}/doc`, { - method: 'POST', - headers: { - Accept: 'application/json', - authorization: `Bearer ${token}` - }, - body: data - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .catch((err) => { - error = err.detail; - console.log(err); - return null; - }); - - if (error) { - throw error; - } - - return res; -}; - -export const uploadWebToVectorDB = async (token: string, collection_name: string, url: string) => { - let error = null; - - const res = await fetch(`${RAG_API_BASE_URL}/web`, { - method: 'POST', - headers: { - Accept: 'application/json', - 'Content-Type': 'application/json', - authorization: `Bearer ${token}` - }, - body: JSON.stringify({ - url: url, - collection_name: collection_name - }) - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .catch((err) => { - error = err.detail; - console.log(err); - return null; - }); - - if (error) { - throw error; - } - - return res; -}; - -export const uploadYoutubeTranscriptionToVectorDB = async (token: string, url: string) => { - let error = null; - - const res = await fetch(`${RAG_API_BASE_URL}/youtube`, { - method: 'POST', - headers: { - Accept: 'application/json', - 'Content-Type': 'application/json', - authorization: `Bearer ${token}` - }, - body: JSON.stringify({ - url: url - }) - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .catch((err) => { - error = err.detail; - console.log(err); - return null; - }); - - if (error) { - throw error; - } - - return res; -}; - -export const queryDoc = async ( - token: string, - collection_name: string, - query: string, - k: number | null = null -) => { - let error = null; - - const res = await fetch(`${RAG_API_BASE_URL}/query/doc`, { - method: 'POST', - headers: { - Accept: 'application/json', - 'Content-Type': 'application/json', - authorization: `Bearer ${token}` - }, - body: JSON.stringify({ - collection_name: collection_name, - query: query, - k: k - }) - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .catch((err) => { - error = err.detail; - return null; - }); - - if (error) { - throw error; - } - - return res; -}; - -export const queryCollection = async ( - token: string, - collection_names: string, - query: string, - k: number | null = null -) => { - let error = null; - - const res = await fetch(`${RAG_API_BASE_URL}/query/collection`, { - method: 'POST', - headers: { - Accept: 'application/json', - 'Content-Type': 'application/json', - authorization: `Bearer ${token}` - }, - body: JSON.stringify({ - collection_names: collection_names, - query: query, - k: k - }) - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .catch((err) => { - error = err.detail; - return null; - }); - - if (error) { - throw error; - } - - return res; -}; - -export const scanDocs = async (token: string) => { - let error = null; - - const res = await fetch(`${RAG_API_BASE_URL}/scan`, { - method: 'GET', - headers: { - Accept: 'application/json', - authorization: `Bearer ${token}` - } - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .catch((err) => { - error = err.detail; - return null; - }); - - if (error) { - throw error; - } - - return res; -}; - -export const resetUploadDir = async (token: string) => { - let error = null; - - const res = await fetch(`${RAG_API_BASE_URL}/reset/uploads`, { - method: 'POST', - headers: { - Accept: 'application/json', - authorization: `Bearer ${token}` - } - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .catch((err) => { - error = err.detail; - return null; - }); - - if (error) { - throw error; - } - - return res; -}; - -export const resetVectorDB = async (token: string) => { - let error = null; - - const res = await fetch(`${RAG_API_BASE_URL}/reset/db`, { - method: 'POST', - headers: { - Accept: 'application/json', - authorization: `Bearer ${token}` - } - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .catch((err) => { - error = err.detail; - return null; - }); - - if (error) { - throw error; - } - - return res; -}; - export const getEmbeddingConfig = async (token: string) => { let error = null; - const res = await fetch(`${RAG_API_BASE_URL}/embedding`, { + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/embedding`, { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -490,7 +212,7 @@ type EmbeddingModelUpdateForm = { export const updateEmbeddingConfig = async (token: string, payload: EmbeddingModelUpdateForm) => { let error = null; - const res = await fetch(`${RAG_API_BASE_URL}/embedding/update`, { + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/embedding/update`, { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -520,7 +242,7 @@ export const updateEmbeddingConfig = async (token: string, payload: EmbeddingMod export const getRerankingConfig = async (token: string) => { let error = null; - const res = await fetch(`${RAG_API_BASE_URL}/reranking`, { + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/reranking`, { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -551,7 +273,7 @@ type RerankingModelUpdateForm = { export const updateRerankingConfig = async (token: string, payload: RerankingModelUpdateForm) => { let error = null; - const res = await fetch(`${RAG_API_BASE_URL}/reranking/update`, { + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/reranking/update`, { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -578,14 +300,119 @@ export const updateRerankingConfig = async (token: string, payload: RerankingMod return res; }; -export const runWebSearch = async ( +export interface SearchDocument { + status: boolean; + collection_name: string; + filenames: string[]; +} + +export const processFile = async ( + token: string, + file_id: string, + collection_name: string | null = null +) => { + let error = null; + + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/process/file`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + file_id: file_id, + collection_name: collection_name ? collection_name : undefined + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const processYoutubeVideo = async (token: string, url: string) => { + let error = null; + + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/process/youtube`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + url: url + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const processWeb = async (token: string, collection_name: string, url: string) => { + let error = null; + + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/process/web`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + url: url, + collection_name: collection_name + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const processWebSearch = async ( token: string, query: string, collection_name?: string ): Promise => { let error = null; - const res = await fetch(`${RAG_API_BASE_URL}/web/search`, { + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/process/web/search`, { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -613,8 +440,128 @@ export const runWebSearch = async ( return res; }; -export interface SearchDocument { - status: boolean; - collection_name: string; - filenames: string[]; -} +export const queryDoc = async ( + token: string, + collection_name: string, + query: string, + k: number | null = null +) => { + let error = null; + + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/query/doc`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + collection_name: collection_name, + query: query, + k: k + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const queryCollection = async ( + token: string, + collection_names: string, + query: string, + k: number | null = null +) => { + let error = null; + + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/query/collection`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + collection_names: collection_names, + query: query, + k: k + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const resetUploadDir = async (token: string) => { + let error = null; + + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/reset/uploads`, { + method: 'POST', + headers: { + Accept: 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const resetVectorDB = async (token: string) => { + let error = null; + + const res = await fetch(`${RETRIEVAL_API_BASE_URL}/reset/db`, { + method: 'POST', + headers: { + Accept: 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; diff --git a/src/lib/apis/streaming/index.ts b/src/lib/apis/streaming/index.ts index f91edad83c..722cca9a15 100644 --- a/src/lib/apis/streaming/index.ts +++ b/src/lib/apis/streaming/index.ts @@ -18,6 +18,8 @@ type ResponseUsage = { completion_tokens: number; /** Sum of the above two fields */ total_tokens: number; + /** Any other fields that aren't part of the base OpenAI spec */ + [other: string]: unknown; }; // createOpenAITextStream takes a responseBody with a SSE response, diff --git a/src/lib/components/AddFilesPlaceholder.svelte b/src/lib/components/AddFilesPlaceholder.svelte index 3bdbe9281a..a3057c560e 100644 --- a/src/lib/components/AddFilesPlaceholder.svelte +++ b/src/lib/components/AddFilesPlaceholder.svelte @@ -1,10 +1,18 @@
📄
-
{$i18n.t('Add Files')}
+
+ {#if title} + {title} + {:else} + {$i18n.t('Add Files')} + {/if} +
diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte index 040bc5e1aa..5e6e1dca1a 100644 --- a/src/lib/components/admin/Settings/Audio.svelte +++ b/src/lib/components/admin/Settings/Audio.svelte @@ -66,6 +66,7 @@ // do your loop if (voices.length > 0) { clearInterval(getVoicesLoop); + voices.sort((a, b) => a.name.localeCompare(b.name, $i18n.resolvedLanguage)); } }, 100); } else { @@ -76,6 +77,7 @@ if (res) { console.log(res); voices = res.voices; + voices.sort((a, b) => a.name.localeCompare(b.name, $i18n.resolvedLanguage)); } } }; @@ -295,7 +297,8 @@ {voice.name.replace('+', ', ')} {/each} diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index e06edce9dd..d6f7dc9873 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -1,13 +1,12 @@ + +
+
+ {#if overlay} +
+ {/if} + +
+ +
+ +
+
+ {#if contents.length > 0} +
+ {#if contents[selectedContentIdx].type === 'iframe'} + + {:else if contents[selectedContentIdx].type === 'svg'} + + {/if} +
+ {:else} +
+ {$i18n.t('No HTML, CSS, or JavaScript content found.')} +
+ {/if} +
+
+
+ + {#if contents.length > 0} +
+
+
+ + +
+ {$i18n.t('Version {{selectedVersion}} of {{totalVersions}}', { + selectedVersion: selectedContentIdx + 1, + totalVersions: contents.length + })} +
+ + +
+
+ +
+ + + {#if contents[selectedContentIdx].type === 'iframe'} + + + + {/if} +
+
+ {/if} +
diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte index 9db03ef532..e432575b24 100644 --- a/src/lib/components/chat/Chat.svelte +++ b/src/lib/components/chat/Chat.svelte @@ -32,7 +32,8 @@ temporaryChatEnabled, mobile, showOverview, - chatTitle + chatTitle, + showArtifacts } from '$lib/stores'; import { convertMessagesToHistory, @@ -52,7 +53,7 @@ updateChatById } from '$lib/apis/chats'; import { generateOpenAIChatCompletion } from '$lib/apis/openai'; - import { runWebSearch } from '$lib/apis/rag'; + import { processWebSearch } from '$lib/apis/retrieval'; import { createOpenAITextStream } from '$lib/apis/streaming'; import { queryMemory } from '$lib/apis/memories'; import { getAndUpdateUserLocation, getUserSettings } from '$lib/apis/users'; @@ -70,6 +71,7 @@ import Navbar from '$lib/components/layout/Navbar.svelte'; import ChatControls from './ChatControls.svelte'; import EventConfirmDialog from '../common/ConfirmDialog.svelte'; + import Placeholder from './Placeholder.svelte'; export let chatIdProp = ''; @@ -311,6 +313,11 @@ ////////////////////////// const initNewChat = async () => { + await showControls.set(false); + await showCallOverlay.set(false); + await showOverview.set(false); + await showArtifacts.set(false); + if ($page.url.pathname.includes('/c/')) { window.history.replaceState(history.state, '', `/`); } @@ -653,7 +660,7 @@ ); } else if ( files.length > 0 && - files.filter((file) => file.type !== 'image' && file.status !== 'processed').length > 0 + files.filter((file) => file.type !== 'image' && file.status === 'uploading').length > 0 ) { // Upload not done toast.error( @@ -689,7 +696,6 @@ ); files = []; - prompt = ''; // Create user message @@ -937,7 +943,26 @@ done: false } ]; - files.push(...model.info.meta.knowledge); + files.push( + ...model.info.meta.knowledge.map((item) => { + if (item?.collection_name) { + return { + id: item.collection_name, + name: item.name, + legacy: true + }; + } else if (item?.collection_names) { + return { + name: item.name, + type: 'collection', + collection_names: item.collection_names, + legacy: true + }; + } else { + return item; + } + }) + ); history.messages[responseMessageId] = responseMessage; } files.push( @@ -947,6 +972,12 @@ ...(responseMessage?.files ?? []).filter((item) => ['web_search_results'].includes(item.type)) ); + // Remove duplicates + files = files.filter( + (item, index, array) => + array.findIndex((i) => JSON.stringify(i) === JSON.stringify(item)) === index + ); + scrollToBottom(); eventTarget.dispatchEvent( @@ -1237,7 +1268,26 @@ done: false } ]; - files.push(...model.info.meta.knowledge); + files.push( + ...model.info.meta.knowledge.map((item) => { + if (item?.collection_name) { + return { + id: item.collection_name, + name: item.name, + legacy: true + }; + } else if (item?.collection_names) { + return { + name: item.name, + type: 'collection', + collection_names: item.collection_names, + legacy: true + }; + } else { + return item; + } + }) + ); history.messages[responseMessageId] = responseMessage; } files.push( @@ -1246,6 +1296,11 @@ ), ...(responseMessage?.files ?? []).filter((item) => ['web_search_results'].includes(item.type)) ); + // Remove duplicates + files = files.filter( + (item, index, array) => + array.findIndex((i) => JSON.stringify(i) === JSON.stringify(item)) === index + ); scrollToBottom(); @@ -1382,7 +1437,7 @@ } if (usage) { - responseMessage.info = { ...usage, openai: true }; + responseMessage.info = { ...usage, openai: true, usage }; } if (citations) { @@ -1737,7 +1792,7 @@ }); history.messages[responseMessageId] = responseMessage; - const results = await runWebSearch(localStorage.token, searchQuery).catch((error) => { + const results = await processWebSearch(localStorage.token, searchQuery).catch((error) => { console.log(error); toast.error(error); @@ -1880,7 +1935,7 @@ {#if $banners.length > 0 && !history.currentId && !$chatId && selectedModels.length <= 1} -
+
{#each $banners.filter( (b) => (b.dismissible ? !JSON.parse(localStorage.getItem('dismissedBannerIds') ?? '[]').includes(b.id) : true) ) as banner} - + {#if $settings?.landingPageMode === 'chat' || createMessagesList(history.currentId).length > 0} + + {/if}
diff --git a/src/lib/components/chat/ChatControls.svelte b/src/lib/components/chat/ChatControls.svelte index 9cc44ce38d..68c976b5c0 100644 --- a/src/lib/components/chat/ChatControls.svelte +++ b/src/lib/components/chat/ChatControls.svelte @@ -2,8 +2,8 @@ import { SvelteFlowProvider } from '@xyflow/svelte'; import { slide } from 'svelte/transition'; - import { onDestroy, onMount } from 'svelte'; - import { mobile, showControls, showCallOverlay, showOverview } from '$lib/stores'; + import { onDestroy, onMount, tick } from 'svelte'; + import { mobile, showControls, showCallOverlay, showOverview, showArtifacts } from '$lib/stores'; import Modal from '../common/Modal.svelte'; import Controls from './Controls/Controls.svelte'; @@ -12,12 +12,13 @@ import Overview from './Overview.svelte'; import { Pane, PaneResizer } from 'paneforge'; import EllipsisVertical from '../icons/EllipsisVertical.svelte'; - import { get } from 'svelte/store'; + import Artifacts from './Artifacts.svelte'; export let history; export let models = []; export let chatId = null; + export let chatFiles = []; export let params = {}; @@ -29,36 +30,67 @@ export let modelId; export let pane; + + let mediaQuery; let largeScreen = false; + let dragged = false; + + const handleMediaQuery = async (e) => { + if (e.matches) { + largeScreen = true; + + if ($showCallOverlay) { + showCallOverlay.set(false); + await tick(); + showCallOverlay.set(true); + } + } else { + largeScreen = false; + + if ($showCallOverlay) { + showCallOverlay.set(false); + await tick(); + showCallOverlay.set(true); + } + pane = null; + } + }; + + const onMouseDown = (event) => { + dragged = true; + }; + + const onMouseUp = (event) => { + dragged = false; + }; onMount(() => { // listen to resize 1024px - const mediaQuery = window.matchMedia('(min-width: 1024px)'); - - const handleMediaQuery = (e) => { - if (e.matches) { - largeScreen = true; - } else { - largeScreen = false; - pane = null; - } - }; + mediaQuery = window.matchMedia('(min-width: 1024px)'); mediaQuery.addEventListener('change', handleMediaQuery); - handleMediaQuery(mediaQuery); - return () => { - mediaQuery.removeEventListener('change', handleMediaQuery); - }; + document.addEventListener('mousedown', onMouseDown); + document.addEventListener('mouseup', onMouseUp); }); onDestroy(() => { showControls.set(false); + + mediaQuery.removeEventListener('change', handleMediaQuery); + document.removeEventListener('mousedown', onMouseDown); + document.removeEventListener('mouseup', onMouseUp); }); $: if (!chatId) { + showControls.set(false); showOverview.set(false); + showArtifacts.set(false); + + if ($showCallOverlay) { + showCallOverlay.set(false); + } } @@ -72,7 +104,9 @@ }} >
{#if $showCallOverlay}
+ {:else if $showArtifacts} + {:else if $showOverview} - -
- -
-
+ + {#if $showControls} + +
+ +
+
+ {/if} {#if $showControls}
{#if $showCallOverlay}
@@ -159,6 +199,8 @@ }} />
+ {:else if $showArtifacts} + {:else if $showOverview} { + submitPrompt(e.detail); + }} />
diff --git a/src/lib/components/chat/Controls/Controls.svelte b/src/lib/components/chat/Controls/Controls.svelte index 35184f3851..25924535a6 100644 --- a/src/lib/components/chat/Controls/Controls.svelte +++ b/src/lib/components/chat/Controls/Controls.svelte @@ -35,7 +35,9 @@ {#each chatFiles as file, fileIdx} { + console.log(file); + }} /> {/each}
diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index ea6b0aec8e..b0991914f9 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -12,20 +12,14 @@ config, showCallOverlay, tools, - user as _user + user as _user, + showControls } from '$lib/stores'; import { blobToFile, findWordIndices } from '$lib/utils'; - import { transcribeAudio } from '$lib/apis/audio'; - import { processDocToVectorDB } from '$lib/apis/rag'; import { uploadFile } from '$lib/apis/files'; - import { - SUPPORTED_FILE_TYPE, - SUPPORTED_FILE_EXTENSIONS, - WEBUI_BASE_URL, - WEBUI_API_BASE_URL - } from '$lib/constants'; + import { WEBUI_BASE_URL, WEBUI_API_BASE_URL } from '$lib/constants'; import Tooltip from '../common/Tooltip.svelte'; import InputMenu from './MessageInput/InputMenu.svelte'; @@ -40,7 +34,6 @@ export let transparentBackground = false; - export let submitPrompt: Function; export let createMessagePair: Function; export let stopResponse: Function; @@ -49,6 +42,14 @@ export let atSelectedModel: Model | undefined; export let selectedModels: ['']; + export let history; + + export let prompt = ''; + export let files = []; + export let availableToolIds = []; + export let selectedToolIds = []; + export let webSearchEnabled = false; + let recording = false; let chatTextAreaElement: HTMLTextAreaElement; @@ -60,15 +61,7 @@ let dragged = false; let user = null; - let chatInputPlaceholder = ''; - - export let history; - - export let prompt = ''; - export let files = []; - export let availableToolIds = []; - export let selectedToolIds = []; - export let webSearchEnabled = false; + export let placeholder = ''; let visionCapableModels = []; $: visionCapableModels = [...(atSelectedModel ? [atSelectedModel] : selectedModels)].filter( @@ -100,7 +93,7 @@ url: '', name: file.name, collection_name: '', - status: '', + status: 'uploading', size: file.size, error: '' }; @@ -124,29 +117,17 @@ } try { + // During the file upload, file content is automatically extracted. const uploadedFile = await uploadFile(localStorage.token, file); if (uploadedFile) { fileItem.status = 'uploaded'; fileItem.file = uploadedFile; fileItem.id = uploadedFile.id; + fileItem.collection_name = uploadedFile?.meta?.collection_name; fileItem.url = `${WEBUI_API_BASE_URL}/files/${uploadedFile.id}`; - // TODO: Check if tools & functions have files support to skip this step to delegate file processing - // Default Upload to VectorDB - if ( - SUPPORTED_FILE_TYPE.includes(file['type']) || - SUPPORTED_FILE_EXTENSIONS.includes(file.name.split('.').at(-1)) - ) { - processFileItem(fileItem); - } else { - toast.error( - $i18n.t(`Unknown file type '{{file_type}}'. Proceeding with the file upload anyway.`, { - file_type: file['type'] - }) - ); - processFileItem(fileItem); - } + files = files; } else { files = files.filter((item) => item.status !== null); } @@ -156,24 +137,6 @@ } }; - const processFileItem = async (fileItem) => { - try { - const res = await processDocToVectorDB(localStorage.token, fileItem.id); - - if (res) { - fileItem.status = 'processed'; - fileItem.collection_name = res.collection_name; - files = files; - } - } catch (e) { - // Remove the failed doc from the files array - // files = files.filter((f) => f.id !== fileItem.id); - toast.error(e); - fileItem.status = 'processed'; - files = files; - } - }; - const inputFilesHandler = async (inputFiles) => { inputFiles.forEach((file) => { console.log(file, file.name.split('.').at(-1)); @@ -270,7 +233,7 @@
-
+
{#if autoScroll === false && history?.currentId}
{#if atSelectedModel !== undefined}
model profile model.id === atSelectedModel.id)?.info?.meta ?.profile_image_url ?? ($i18n.language === 'dg-DG' @@ -352,8 +315,8 @@
-
-
+
+
@@ -400,7 +363,7 @@ class="w-full flex gap-1.5" on:submit|preventDefault={() => { // check if selectedModels support image input - submitPrompt(prompt); + dispatch('submit', prompt); }} >
{:else} { files.splice(fileIdx, 1); files = files; }} + on:click={() => { + console.log(file); + }} /> {/if} {/each} @@ -527,9 +495,7 @@ id="chat-textarea" bind:this={chatTextAreaElement} class="scrollbar-hidden bg-gray-50 dark:bg-gray-850 dark:text-gray-100 outline-none w-full py-3 px-1 rounded-xl resize-none h-[48px]" - placeholder={chatInputPlaceholder !== '' - ? chatInputPlaceholder - : $i18n.t('Send a Message')} + placeholder={placeholder ? placeholder : $i18n.t('Send a Message')} bind:value={prompt} on:keypress={(e) => { if ( @@ -547,7 +513,7 @@ // Submit the prompt when Enter key is pressed if (prompt !== '' && e.key === 'Enter' && !e.shiftKey) { - submitPrompt(prompt); + dispatch('submit', prompt); } } }} @@ -784,7 +750,7 @@ stream = null; showCallOverlay.set(true); - dispatch('call'); + showControls.set(true); } catch (err) { // If the user denies the permission or an error occurs, show an error message toast.error($i18n.t('Permission denied when accessing media devices')); @@ -849,22 +815,7 @@
{/if} - -
- {$i18n.t('LLMs can make mistakes. Verify important information.')} -
- - diff --git a/src/lib/components/chat/MessageInput/CallOverlay.svelte b/src/lib/components/chat/MessageInput/CallOverlay.svelte index 9e2d01a0b9..6f3b465a6c 100644 --- a/src/lib/components/chat/MessageInput/CallOverlay.svelte +++ b/src/lib/components/chat/MessageInput/CallOverlay.svelte @@ -1,9 +1,6 @@ diff --git a/src/lib/components/chat/MessageInput/Commands.svelte b/src/lib/components/chat/MessageInput/Commands.svelte index 88877592cd..d23c4c8d84 100644 --- a/src/lib/components/chat/MessageInput/Commands.svelte +++ b/src/lib/components/chat/MessageInput/Commands.svelte @@ -5,11 +5,11 @@ const dispatch = createEventDispatcher(); import Prompts from './Commands/Prompts.svelte'; - import Documents from './Commands/Documents.svelte'; + import Knowledge from './Commands/Knowledge.svelte'; import Models from './Commands/Models.svelte'; import { removeLastWordFromString } from '$lib/utils'; - import { uploadWebToVectorDB, uploadYoutubeTranscriptionToVectorDB } from '$lib/apis/rag'; + import { processWeb, processYoutubeVideo } from '$lib/apis/retrieval'; export let prompt = ''; export let files = []; @@ -30,7 +30,7 @@ const uploadWeb = async (url) => { console.log(url); - const doc = { + const fileItem = { type: 'doc', name: url, collection_name: '', @@ -40,25 +40,30 @@ }; try { - files = [...files, doc]; - const res = await uploadWebToVectorDB(localStorage.token, '', url); + files = [...files, fileItem]; + const res = await processWeb(localStorage.token, '', url); if (res) { - doc.status = 'processed'; - doc.collection_name = res.collection_name; + fileItem.status = 'processed'; + fileItem.collection_name = res.collection_name; + fileItem.file = { + content: res.content, + ...fileItem.file + }; + files = files; } } catch (e) { // Remove the failed doc from the files array files = files.filter((f) => f.name !== url); - toast.error(e); + toast.error(JSON.stringify(e)); } }; const uploadYoutubeTranscription = async (url) => { console.log(url); - const doc = { + const fileItem = { type: 'doc', name: url, collection_name: '', @@ -68,12 +73,16 @@ }; try { - files = [...files, doc]; - const res = await uploadYoutubeTranscriptionToVectorDB(localStorage.token, url); + files = [...files, fileItem]; + const res = await processYoutubeVideo(localStorage.token, url); if (res) { - doc.status = 'processed'; - doc.collection_name = res.collection_name; + fileItem.status = 'processed'; + fileItem.collection_name = res.collection_name; + fileItem.file = { + content: res.content, + ...fileItem.file + }; files = files; } } catch (e) { @@ -88,7 +97,7 @@ {#if command?.charAt(0) === '/'} {:else if command?.charAt(0) === '#'} - - import { createEventDispatcher } from 'svelte'; - - import { documents } from '$lib/stores'; - import { removeLastWordFromString, isValidHttpUrl } from '$lib/utils'; - import { tick, getContext } from 'svelte'; import { toast } from 'svelte-sonner'; + import Fuse from 'fuse.js'; + + import { createEventDispatcher, tick, getContext, onMount } from 'svelte'; + import { removeLastWordFromString, isValidHttpUrl } from '$lib/utils'; + import { knowledge } from '$lib/stores'; const i18n = getContext('i18n'); @@ -14,60 +14,22 @@ const dispatch = createEventDispatcher(); let selectedIdx = 0; + let items = []; + let fuse = null; + let filteredItems = []; - let filteredDocs = []; - - let collections = []; - - $: collections = [ - ...($documents.length > 0 - ? [ - { - name: 'All Documents', - type: 'collection', - title: $i18n.t('All Documents'), - collection_names: $documents.map((doc) => doc.collection_name) - } - ] - : []), - ...$documents - .reduce((a, e, i, arr) => { - return [...new Set([...a, ...(e?.content?.tags ?? []).map((tag) => tag.name)])]; - }, []) - .map((tag) => ({ - name: tag, - type: 'collection', - collection_names: $documents - .filter((doc) => (doc?.content?.tags ?? []).map((tag) => tag.name).includes(tag)) - .map((doc) => doc.collection_name) - })) - ]; - - $: filteredCollections = collections - .filter((collection) => findByName(collection, command)) - .sort((a, b) => a.name.localeCompare(b.name)); - - $: filteredDocs = $documents - .filter((doc) => findByName(doc, command)) - .sort((a, b) => a.title.localeCompare(b.title)); - - $: filteredItems = [...filteredCollections, ...filteredDocs]; + $: if (fuse) { + filteredItems = command.slice(1) + ? fuse.search(command).map((e) => { + return e.item; + }) + : items; + } $: if (command) { selectedIdx = 0; - - console.log(filteredCollections); } - type ObjectWithName = { - name: string; - }; - - const findByName = (obj: ObjectWithName, command: string) => { - const name = obj.name.toLowerCase(); - return name.includes(command.toLowerCase().split(' ')?.at(0)?.substring(1) ?? ''); - }; - export const selectUp = () => { selectedIdx = Math.max(0, selectedIdx - 1); }; @@ -76,8 +38,8 @@ selectedIdx = Math.min(selectedIdx + 1, filteredItems.length - 1); }; - const confirmSelect = async (doc) => { - dispatch('select', doc); + const confirmSelect = async (item) => { + dispatch('select', item); prompt = removeLastWordFromString(prompt, command); const chatInputElement = document.getElementById('chat-textarea'); @@ -108,55 +70,108 @@ chatInputElement?.focus(); await tick(); }; + + onMount(() => { + let legacy_documents = $knowledge.filter((item) => item?.meta?.document); + let legacy_collections = + legacy_documents.length > 0 + ? [ + { + name: 'All Documents', + legacy: true, + type: 'collection', + description: 'Deprecated (legacy collection), please create a new knowledge base.', + title: $i18n.t('All Documents'), + collection_names: legacy_documents.map((item) => item.id) + }, + + ...legacy_documents + .reduce((a, item) => { + return [...new Set([...a, ...(item?.meta?.tags ?? []).map((tag) => tag.name)])]; + }, []) + .map((tag) => ({ + name: tag, + legacy: true, + type: 'collection', + description: 'Deprecated (legacy collection), please create a new knowledge base.', + collection_names: legacy_documents + .filter((item) => (item?.meta?.tags ?? []).map((tag) => tag.name).includes(tag)) + .map((item) => item.id) + })) + ] + : []; + + items = [...$knowledge, ...legacy_collections].map((item) => { + return { + ...item, + ...(item?.legacy || item?.meta?.legacy || item?.meta?.document ? { legacy: true } : {}) + }; + }); + + fuse = new Fuse(items, { + keys: ['name', 'description'] + }); + }); {#if filteredItems.length > 0 || prompt.split(' ')?.at(0)?.substring(1).startsWith('http')}
-
#
+
#
- {#each filteredItems as doc, docIdx} + {#each filteredItems as item, idx} {/each} diff --git a/src/lib/components/chat/MessageInput/Commands/Models.svelte b/src/lib/components/chat/MessageInput/Commands/Models.svelte index 4b660a62c4..768c514217 100644 --- a/src/lib/components/chat/MessageInput/Commands/Models.svelte +++ b/src/lib/components/chat/MessageInput/Commands/Models.svelte @@ -1,4 +1,6 @@ -{#if filteredModels.length > 0} +{#if filteredItems.length > 0}
-
@
+
@
- {#each filteredModels as model, modelIdx} + {#each filteredItems as model, modelIdx} -
- {/each} - - -
-
- - diff --git a/src/lib/components/chat/Messages.svelte b/src/lib/components/chat/Messages.svelte index 83ea3714f1..a835020757 100644 --- a/src/lib/components/chat/Messages.svelte +++ b/src/lib/components/chat/Messages.svelte @@ -1,17 +1,19 @@ -
+
{#if Object.keys(history?.messages ?? {}).length == 0} - { let text = p; @@ -382,8 +385,10 @@ {continueResponse} {mergeResponses} {readOnly} + on:submit={async (e) => { + dispatch('submit', e.detail); + }} on:action={async (e) => { - const message = history.messages[message.id]; if (typeof e.detail === 'string') { await chatActionHandler(chatId, e.detail, message.model, message.id); } else { diff --git a/src/lib/components/chat/Messages/CodeBlock.svelte b/src/lib/components/chat/Messages/CodeBlock.svelte index 15aea5e350..a714718f30 100644 --- a/src/lib/components/chat/Messages/CodeBlock.svelte +++ b/src/lib/components/chat/Messages/CodeBlock.svelte @@ -5,21 +5,34 @@ import { v4 as uuidv4 } from 'uuid'; - import { getContext, getAllContexts, onMount } from 'svelte'; + import { getContext, getAllContexts, onMount, tick, createEventDispatcher } from 'svelte'; import { copyToClipboard } from '$lib/utils'; import 'highlight.js/styles/github-dark.min.css'; import PyodideWorker from '$lib/workers/pyodide.worker?worker'; + import CodeEditor from '$lib/components/common/CodeEditor.svelte'; + import SvgPanZoom from '$lib/components/common/SVGPanZoom.svelte'; const i18n = getContext('i18n'); + const dispatch = createEventDispatcher(); export let id = ''; + export let save = false; export let token; export let lang = ''; export let code = ''; + let _code = ''; + $: if (code) { + updateCode(); + } + + const updateCode = () => { + _code = code; + }; + let _token = null; let mermaidHtml = null; @@ -32,6 +45,18 @@ let result = null; let copied = false; + let saved = false; + + const saveCode = () => { + saved = true; + + code = _code; + dispatch('save', code); + + setTimeout(() => { + saved = false; + }, 1000); + }; const copyCode = async () => { copied = true; @@ -233,22 +258,11 @@ __builtins__.input = input`); (async () => { await drawMermaidDiagram(); })(); - } else { - // Function to perform the code highlighting - const highlightCode = () => { - highlightedCode = hljs.highlightAuto(code, hljs.getLanguage(lang)?.aliases).value || code; - }; - - // Clear the previous timeout if it exists - clearTimeout(debounceTimeout); - // Set a new timeout to debounce the code highlighting - debounceTimeout = setTimeout(highlightCode, 10); } }; $: if (token) { if (JSON.stringify(token) !== JSON.stringify(_token)) { - console.log('hi'); _token = token; } } @@ -257,8 +271,14 @@ __builtins__.input = input`); render(); } + $: dispatch('code', { lang, code }); + onMount(async () => { console.log('codeblock', lang, code); + + if (lang) { + dispatch('code', { lang, code }); + } if (document.documentElement.classList.contains('dark')) { mermaid.initialize({ startOnLoad: true, @@ -275,64 +295,92 @@ __builtins__.input = input`); }); -
- {#if lang === 'mermaid'} - {#if mermaidHtml} - {@html `${mermaidHtml}`} +
+
+ {#if lang === 'mermaid'} + {#if mermaidHtml} + + {:else} +
{code}
+ {/if} {:else} -
{code}
- {/if} - {:else} -
-
{lang}
+
+ {lang} +
-
- {#if lang.toLowerCase() === 'python' || lang.toLowerCase() === 'py' || (lang === '' && checkPythonCode(code))} - {#if executing} -
Running
- {:else} - +
+
+ {#if lang.toLowerCase() === 'python' || lang.toLowerCase() === 'py' || (lang === '' && checkPythonCode(code))} + {#if executing} +
Running
+ {:else} + + {/if} {/if} - {/if} - -
-
-
{#if highlightedCode}{@html highlightedCode}{:else}{code}{/if}
+ {#if save} + + {/if} -
+ +
+
- {#if executing} -
-
STDOUT/STDERR
-
Running...
-
- {:else if stdout || stderr || result} -
-
STDOUT/STDERR
-
{stdout || stderr || result}
+
+
+ { + saveCode(); + }} + on:change={(e) => { + _code = e.detail.value; + }} + />
+ +
+ + {#if executing} +
+
STDOUT/STDERR
+
Running...
+
+ {:else if stdout || stderr || result} +
+
STDOUT/STDERR
+
{stdout || stderr || result}
+
+ {/if} {/if} - {/if} +
diff --git a/src/lib/components/chat/Messages/ContentRenderer.svelte b/src/lib/components/chat/Messages/ContentRenderer.svelte new file mode 100644 index 0000000000..d40bb4ad98 --- /dev/null +++ b/src/lib/components/chat/Messages/ContentRenderer.svelte @@ -0,0 +1,208 @@ + + +
+ { + dispatch('update', e.detail); + }} + on:code={(e) => { + const { lang, code } = e.detail; + + if ( + (['html', 'svg'].includes(lang) || (lang === 'xml' && code.includes('svg'))) && + !$mobile && + $chatId + ) { + showArtifacts.set(true); + showControls.set(true); + } + }} + /> +
+ +{#if floatingButtons} + +{/if} diff --git a/src/lib/components/chat/Messages/Error.svelte b/src/lib/components/chat/Messages/Error.svelte index a1fed2f421..3a6d7cc30d 100644 --- a/src/lib/components/chat/Messages/Error.svelte +++ b/src/lib/components/chat/Messages/Error.svelte @@ -1,26 +1,15 @@ -
- - - +
+
+ +
-
- {content} +
+ {typeof content === 'string' ? content : JSON.stringify(content)}
diff --git a/src/lib/components/chat/Messages/Markdown.svelte b/src/lib/components/chat/Messages/Markdown.svelte index 2c2f74d768..1c627919ae 100644 --- a/src/lib/components/chat/Messages/Markdown.svelte +++ b/src/lib/components/chat/Messages/Markdown.svelte @@ -1,14 +1,20 @@ {#key id} - + { + dispatch('update', e.detail); + }} + on:code={(e) => { + dispatch('code', e.detail); + }} + /> {/key} diff --git a/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte b/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte index 8029c0a1b9..7fc61c3270 100644 --- a/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte +++ b/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte @@ -1,18 +1,24 @@ + +{#key mounted} +
+ {#if $temporaryChatEnabled} + +
+ Temporary Chat +
+
+ {/if} + +
+
+
+
+
+ {#each models as model, modelIdx} + tag.name.toUpperCase()) + .join(', ')} + placement="top" + > + + + {/each} +
+
+ +
+ {#if models[selectedModelIdx]?.info} + {models[selectedModelIdx]?.info?.name} + {:else} + {$i18n.t('Hello, {{name}}', { name: $user.name })} + {/if} +
+
+ +
+
+ {#if models[selectedModelIdx]?.info?.meta?.description ?? null} + +
+ {@html marked.parse( + sanitizeResponseContent(models[selectedModelIdx]?.info?.meta?.description) + )} +
+
+ + {#if models[selectedModelIdx]?.info?.meta?.user} +
+ By + {#if models[selectedModelIdx]?.info?.meta?.user.community} + {models[selectedModelIdx]?.info?.meta?.user.name + ? models[selectedModelIdx]?.info?.meta?.user.name + : `@${models[selectedModelIdx]?.info?.meta?.user.username}`} + {:else} + {models[selectedModelIdx]?.info?.meta?.user.name} + {/if} +
+ {/if} + {/if} +
+
+ +
+ { + dispatch('submit', e.detail); + }} + /> +
+
+
+
+
+ { + selectSuggestionPrompt(e.detail); + }} + /> +
+
+
+{/key} diff --git a/src/lib/components/chat/Settings/Interface.svelte b/src/lib/components/chat/Settings/Interface.svelte index 58de9be4c8..50cdc0559d 100644 --- a/src/lib/components/chat/Settings/Interface.svelte +++ b/src/lib/components/chat/Settings/Interface.svelte @@ -29,6 +29,7 @@ let defaultModelId = ''; let showUsername = false; + let landingPageMode = ''; let chatBubble = true; let chatDirection: 'LTR' | 'RTL' = 'LTR'; @@ -56,6 +57,11 @@ saveSettings({ chatBubble: chatBubble }); }; + const toggleLandingPageMode = async () => { + landingPageMode = landingPageMode === '' ? 'chat' : ''; + saveSettings({ landingPageMode: landingPageMode }); + }; + const toggleShowUsername = async () => { showUsername = !showUsername; saveSettings({ showUsername: showUsername }); @@ -150,6 +156,7 @@ showEmojiInCall = $settings.showEmojiInCall ?? false; voiceInterruption = $settings.voiceInterruption ?? false; + landingPageMode = $settings.landingPageMode ?? ''; chatBubble = $settings.chatBubble ?? true; widescreenMode = $settings.widescreenMode ?? false; splitLargeChunks = $settings.splitLargeChunks ?? false; @@ -229,6 +236,26 @@
{$i18n.t('UI')}
+
+
+
{$i18n.t('Landing Page Mode')}
+ + +
+
+
{$i18n.t('Chat Bubble UI')}
diff --git a/src/lib/components/chat/Suggestions.svelte b/src/lib/components/chat/Suggestions.svelte new file mode 100644 index 0000000000..a73d796171 --- /dev/null +++ b/src/lib/components/chat/Suggestions.svelte @@ -0,0 +1,53 @@ + + +{#if prompts.length > 0} +
+ + {$i18n.t('Suggested')} +
+{/if} + +
+ {#each prompts as prompt, promptIdx} + + {/each} +
diff --git a/src/lib/components/common/Badge.svelte b/src/lib/components/common/Badge.svelte new file mode 100644 index 0000000000..92e0c3bdf8 --- /dev/null +++ b/src/lib/components/common/Badge.svelte @@ -0,0 +1,18 @@ + + +
+ {content} +
diff --git a/src/lib/components/common/CodeEditor.svelte b/src/lib/components/common/CodeEditor.svelte index 9453c6ce32..e9769e0c63 100644 --- a/src/lib/components/common/CodeEditor.svelte +++ b/src/lib/components/common/CodeEditor.svelte @@ -7,10 +7,15 @@ import { indentWithTab } from '@codemirror/commands'; import { indentUnit } from '@codemirror/language'; - import { python } from '@codemirror/lang-python'; + import { languages } from '@codemirror/language-data'; + + // import { python } from '@codemirror/lang-python'; + // import { javascript } from '@codemirror/lang-javascript'; + import { oneDark } from '@codemirror/theme-one-dark'; - import { onMount, createEventDispatcher, getContext } from 'svelte'; + import { onMount, createEventDispatcher, getContext, tick } from 'svelte'; + import { formatPythonCode } from '$lib/apis/utils'; import { toast } from 'svelte-sonner'; @@ -19,15 +24,40 @@ export let boilerplate = ''; export let value = ''; + let _value = ''; + + $: if (value) { + updateValue(); + } + + const updateValue = () => { + if (_value !== value) { + _value = value; + if (codeEditor) { + codeEditor.dispatch({ + changes: [{ from: 0, to: codeEditor.state.doc.length, insert: _value }] + }); + } + } + }; + + export let id = ''; + export let lang = ''; let codeEditor; let isDarkMode = false; let editorTheme = new Compartment(); + let editorLanguage = new Compartment(); + + const getLang = async () => { + const language = languages.find((l) => l.alias.includes(lang)); + return await language?.load(); + }; export const formatPythonCodeHandler = async () => { if (codeEditor) { - const res = await formatPythonCode(value).catch((error) => { + const res = await formatPythonCode(_value).catch((error) => { toast.error(error); return null; }); @@ -38,6 +68,10 @@ changes: [{ from: 0, to: codeEditor.state.doc.length, insert: formattedCode }] }); + _value = formattedCode; + dispatch('change', { value: _value }); + await tick(); + toast.success($i18n.t('Code formatted successfully')); return true; } @@ -49,33 +83,49 @@ let extensions = [ basicSetup, keymap.of([{ key: 'Tab', run: acceptCompletion }, indentWithTab]), - python(), indentUnit.of(' '), placeholder('Enter your code here...'), EditorView.updateListener.of((e) => { if (e.docChanged) { - value = e.state.doc.toString(); + _value = e.state.doc.toString(); + dispatch('change', { value: _value }); } }), - editorTheme.of([]) + editorTheme.of([]), + editorLanguage.of([]) ]; + $: if (lang) { + setLanguage(); + } + + const setLanguage = async () => { + const language = await getLang(); + if (language) { + codeEditor.dispatch({ + effects: editorLanguage.reconfigure(language) + }); + } + }; + onMount(() => { console.log(value); if (value === '') { value = boilerplate; } + _value = value; + // Check if html class has dark mode isDarkMode = document.documentElement.classList.contains('dark'); // python code editor, highlight python code codeEditor = new EditorView({ state: EditorState.create({ - doc: value, + doc: _value, extensions: extensions }), - parent: document.getElementById('code-textarea') + parent: document.getElementById(`code-textarea-${id}`) }); if (isDarkMode) { @@ -133,4 +183,4 @@ }); -
+
diff --git a/src/lib/components/common/Collapsible.svelte b/src/lib/components/common/Collapsible.svelte index df032b7ef2..1f6faf48b2 100644 --- a/src/lib/components/common/Collapsible.svelte +++ b/src/lib/components/common/Collapsible.svelte @@ -12,8 +12,8 @@
{#if title !== null} - +
{#if dismissible}
+ +
{/if} -
+ diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte new file mode 100644 index 0000000000..6408ad05da --- /dev/null +++ b/src/lib/components/common/FileItemModal.svelte @@ -0,0 +1,108 @@ + + + +
+
+
+ + +
+ +
+
+ +
+
+
+ {#if item.size} +
{formatFileSize(item.size)}
+ • + {/if} + + {#if item?.file?.data?.content} +
+ {getLineCount(item?.file?.data?.content ?? '')} extracted lines +
+ +
+ + + Formatting may be inconsistent from source. +
+ {/if} +
+ + {#if edit} +
+ +
+ {#if enableFullContent} + Using Entire Document + {:else} + Using Focused Retrieval + {/if} + { + item.context = e.detail ? 'full' : undefined; + }} + /> +
+
+
+ {/if} +
+
+
+ +
+ {item?.file?.data?.content ?? 'No content'} +
+
+
diff --git a/src/lib/components/common/SVGPanZoom.svelte b/src/lib/components/common/SVGPanZoom.svelte new file mode 100644 index 0000000000..549fd25000 --- /dev/null +++ b/src/lib/components/common/SVGPanZoom.svelte @@ -0,0 +1,29 @@ + + +
+
+ {@html svg} +
+
diff --git a/src/lib/components/documents/AddDocModal.svelte b/src/lib/components/documents/AddDocModal.svelte index 10164be97d..8c4d478f7f 100644 --- a/src/lib/components/documents/AddDocModal.svelte +++ b/src/lib/components/documents/AddDocModal.svelte @@ -3,16 +3,13 @@ import dayjs from 'dayjs'; import { onMount, getContext } from 'svelte'; - import { createNewDoc, getDocs, tagDocByName, updateDocByName } from '$lib/apis/documents'; + import { getDocs } from '$lib/apis/documents'; import Modal from '../common/Modal.svelte'; import { documents } from '$lib/stores'; - import TagInput from '../common/Tags/TagInput.svelte'; - import Tags from '../common/Tags.svelte'; - import { addTagById } from '$lib/apis/chats'; - import { uploadDocToVectorDB } from '$lib/apis/rag'; - import { transformFileName } from '$lib/utils'; import { SUPPORTED_FILE_EXTENSIONS, SUPPORTED_FILE_TYPE } from '$lib/constants'; + import Tags from '../common/Tags.svelte'; + const i18n = getContext('i18n'); export let show = false; diff --git a/src/lib/components/icons/ArrowPath.svelte b/src/lib/components/icons/ArrowPath.svelte new file mode 100644 index 0000000000..5b31652d6d --- /dev/null +++ b/src/lib/components/icons/ArrowPath.svelte @@ -0,0 +1,19 @@ + + + + + diff --git a/src/lib/components/icons/ArrowsPointingOut.svelte b/src/lib/components/icons/ArrowsPointingOut.svelte new file mode 100644 index 0000000000..efc7f98b6b --- /dev/null +++ b/src/lib/components/icons/ArrowsPointingOut.svelte @@ -0,0 +1,19 @@ + + + + + diff --git a/src/lib/components/icons/BarsArrowUp.svelte b/src/lib/components/icons/BarsArrowUp.svelte new file mode 100644 index 0000000000..d34dbde676 --- /dev/null +++ b/src/lib/components/icons/BarsArrowUp.svelte @@ -0,0 +1,19 @@ + + + + + diff --git a/src/lib/components/icons/BookOpen.svelte b/src/lib/components/icons/BookOpen.svelte new file mode 100644 index 0000000000..5a77433d5c --- /dev/null +++ b/src/lib/components/icons/BookOpen.svelte @@ -0,0 +1,19 @@ + + + + + diff --git a/src/lib/components/icons/Cube.svelte b/src/lib/components/icons/Cube.svelte new file mode 100644 index 0000000000..4015b2d7cc --- /dev/null +++ b/src/lib/components/icons/Cube.svelte @@ -0,0 +1,19 @@ + + + + + diff --git a/src/lib/components/icons/FloppyDisk.svelte b/src/lib/components/icons/FloppyDisk.svelte new file mode 100644 index 0000000000..bcb481e826 --- /dev/null +++ b/src/lib/components/icons/FloppyDisk.svelte @@ -0,0 +1,20 @@ + + + diff --git a/src/lib/components/icons/FolderOpen.svelte b/src/lib/components/icons/FolderOpen.svelte new file mode 100644 index 0000000000..f6b3c64b30 --- /dev/null +++ b/src/lib/components/icons/FolderOpen.svelte @@ -0,0 +1,19 @@ + + + + + diff --git a/src/lib/components/icons/Info.svelte b/src/lib/components/icons/Info.svelte new file mode 100644 index 0000000000..2849ac532b --- /dev/null +++ b/src/lib/components/icons/Info.svelte @@ -0,0 +1,19 @@ + + + + + diff --git a/src/lib/components/icons/LightBlub.svelte b/src/lib/components/icons/LightBlub.svelte new file mode 100644 index 0000000000..0778e6dad3 --- /dev/null +++ b/src/lib/components/icons/LightBlub.svelte @@ -0,0 +1,19 @@ + + + + + diff --git a/src/lib/components/layout/Navbar.svelte b/src/lib/components/layout/Navbar.svelte index 7c6c113690..ccc486d8b8 100644 --- a/src/lib/components/layout/Navbar.svelte +++ b/src/lib/components/layout/Navbar.svelte @@ -39,8 +39,13 @@ -