From a4bc0b2829a6260d33e84a5768409e338d5fbb34 Mon Sep 17 00:00:00 2001 From: Hwang In Tak Date: Thu, 26 Sep 2024 20:39:40 +0900 Subject: [PATCH 001/252] fix: Fix OpenAI batch embedding --- backend/open_webui/apps/rag/main.py | 4 +++- backend/open_webui/apps/rag/utils.py | 2 -- backend/open_webui/apps/rag/vector/dbs/milvus.py | 4 ---- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/backend/open_webui/apps/rag/main.py b/backend/open_webui/apps/rag/main.py index 74855b336a..4efa1d81c2 100644 --- a/backend/open_webui/apps/rag/main.py +++ b/backend/open_webui/apps/rag/main.py @@ -1112,13 +1112,15 @@ def store_docs_in_vector_db( app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE, ) + embedding_texts = list(map(lambda x: x.replace("\n", " "), texts)) + VECTOR_DB_CLIENT.insert( collection_name=collection_name, items=[ { "id": str(uuid.uuid4()), "text": text, - "vector": embedding_function(text.replace("\n", " ")), + "vector": embedding_texts[idx], "metadata": metadatas[idx], } for idx, text in enumerate(texts) diff --git a/backend/open_webui/apps/rag/utils.py b/backend/open_webui/apps/rag/utils.py index 73ccfad387..f9443d3804 100644 --- a/backend/open_webui/apps/rag/utils.py +++ b/backend/open_webui/apps/rag/utils.py @@ -76,8 +76,6 @@ def query_doc( limit=k, ) - print("result", result) - log.info(f"query_doc:result {result}") return result except Exception as e: diff --git a/backend/open_webui/apps/rag/vector/dbs/milvus.py b/backend/open_webui/apps/rag/vector/dbs/milvus.py index f205b95215..33ec6035ac 100644 --- a/backend/open_webui/apps/rag/vector/dbs/milvus.py +++ b/backend/open_webui/apps/rag/vector/dbs/milvus.py @@ -16,8 +16,6 @@ class MilvusClient: self.client = Client(uri=MILVUS_URI) def _result_to_get_result(self, result) -> GetResult: - print(result) - ids = [] documents = [] metadatas = [] @@ -45,8 +43,6 @@ class MilvusClient: ) def _result_to_search_result(self, result) -> SearchResult: - print(result) - ids = [] distances = [] documents = [] From 4fe1f2487dde8458964d756a402c0a9278f87620 Mon Sep 17 00:00:00 2001 From: Hwang In Tak Date: Thu, 26 Sep 2024 20:48:14 +0900 Subject: [PATCH 002/252] fix: Fix OpenAI batch embedding --- backend/open_webui/apps/rag/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/apps/rag/main.py b/backend/open_webui/apps/rag/main.py index 4efa1d81c2..7b476c0563 100644 --- a/backend/open_webui/apps/rag/main.py +++ b/backend/open_webui/apps/rag/main.py @@ -1112,7 +1112,9 @@ def store_docs_in_vector_db( app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE, ) - embedding_texts = list(map(lambda x: x.replace("\n", " "), texts)) + embedding_texts = embedding_function( + list(map(lambda x: x.replace("\n", " "), texts)) + ) VECTOR_DB_CLIENT.insert( collection_name=collection_name, From 4c92a0f57110e9c7c27e970eb96ca4011f0df580 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Thu, 26 Sep 2024 21:33:37 +0200 Subject: [PATCH 003/252] chore: chromadb, pymilvus bump --- backend/requirements.txt | 4 ++-- pyproject.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index 2554bb5f88..764e41d3d9 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -39,8 +39,8 @@ langchain-community==0.2.12 langchain-chroma==0.1.2 fake-useragent==1.5.1 -chromadb==0.5.5 -pymilvus==2.4.6 +chromadb==0.5.9 +pymilvus==2.4.7 sentence-transformers==3.0.1 colbert-ai==0.2.21 diff --git a/pyproject.toml b/pyproject.toml index b2558e4d1a..d02281d521 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,8 +46,8 @@ dependencies = [ "langchain-chroma==0.1.2", "fake-useragent==1.5.1", - "chromadb==0.5.5", - "pymilvus==2.4.6", + "chromadb==0.5.9", + "pymilvus==2.4.7", "sentence-transformers==3.0.1", "colbert-ai==0.2.21", From 1715446b13ba8cc8dba536823ee5e0444d256dc0 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Thu, 26 Sep 2024 21:45:19 +0200 Subject: [PATCH 004/252] fix: call mode persisting after width change issue --- src/lib/components/chat/ChatControls.svelte | 16 ++++- .../chat/MessageInput/CallOverlay.svelte | 64 ++++++++++--------- 2 files changed, 48 insertions(+), 32 deletions(-) diff --git a/src/lib/components/chat/ChatControls.svelte b/src/lib/components/chat/ChatControls.svelte index 9cc44ce38d..8a7c925ab3 100644 --- a/src/lib/components/chat/ChatControls.svelte +++ b/src/lib/components/chat/ChatControls.svelte @@ -2,7 +2,7 @@ import { SvelteFlowProvider } from '@xyflow/svelte'; import { slide } from 'svelte/transition'; - import { onDestroy, onMount } from 'svelte'; + import { onDestroy, onMount, tick } from 'svelte'; import { mobile, showControls, showCallOverlay, showOverview } from '$lib/stores'; import Modal from '../common/Modal.svelte'; @@ -35,11 +35,23 @@ // listen to resize 1024px const mediaQuery = window.matchMedia('(min-width: 1024px)'); - const handleMediaQuery = (e) => { + const handleMediaQuery = async (e) => { if (e.matches) { largeScreen = true; + + if ($showCallOverlay) { + showCallOverlay.set(false); + await tick(); + showCallOverlay.set(true); + } } else { largeScreen = false; + + if ($showCallOverlay) { + showCallOverlay.set(false); + await tick(); + showCallOverlay.set(true); + } pane = null; } }; diff --git a/src/lib/components/chat/MessageInput/CallOverlay.svelte b/src/lib/components/chat/MessageInput/CallOverlay.svelte index 9e2d01a0b9..b5c8ea1df4 100644 --- a/src/lib/components/chat/MessageInput/CallOverlay.svelte +++ b/src/lib/components/chat/MessageInput/CallOverlay.svelte @@ -1,9 +1,6 @@ From d8f71e1d7ff8fbe8e2a79c91bdb6e58ff27f6ace Mon Sep 17 00:00:00 2001 From: smonux Date: Thu, 26 Sep 2024 22:02:56 +0200 Subject: [PATCH 005/252] Some models produce almost correct json during function calling, but with additional data before of after it. This solves it. --- backend/open_webui/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 4af48906b1..dadae0e040 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -440,6 +440,7 @@ async def chat_completion_tools_handler( if not content: return body, {} + content = content[content.find("{") : content.rfind("}") + 1] result = json.loads(content) tool_function_name = result.get("name", None) From 619c81472b9511525b0769e5fe1d5a0a4d91c95a Mon Sep 17 00:00:00 2001 From: Ethan <92686703+not-a-ethan@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:27:49 -0400 Subject: [PATCH 006/252] Migration link updated --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e83324ead3..c8ad500370 100644 --- a/README.md +++ b/README.md @@ -170,7 +170,7 @@ docker run --rm --volume /var/run/docker.sock:/var/run/docker.sock containrrr/wa In the last part of the command, replace `open-webui` with your container name if it is different. -Check our Migration Guide available in our [Open WebUI Documentation](https://docs.openwebui.com/migration/). +Check our Migration Guide available in our [Open WebUI Documentation](https://docs.openwebui.com/tutorials/migration/). ### Using the Dev Branch 🌙 From 719f4da1dcdd91d083165aa4956d2a8441d8e665 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Thu, 26 Sep 2024 22:59:09 +0200 Subject: [PATCH 007/252] fix: milvus collection creation issue --- backend/open_webui/apps/rag/vector/dbs/milvus.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/apps/rag/vector/dbs/milvus.py b/backend/open_webui/apps/rag/vector/dbs/milvus.py index 33ec6035ac..b4a6a77b3a 100644 --- a/backend/open_webui/apps/rag/vector/dbs/milvus.py +++ b/backend/open_webui/apps/rag/vector/dbs/milvus.py @@ -98,7 +98,10 @@ class MilvusClient: index_params = self.client.prepare_index_params() index_params.add_index( - field_name="vector", index_type="HNSW", metric_type="COSINE", params={} + field_name="vector", + index_type="HNSW", + metric_type="COSINE", + params={"M": 16, "efConstruction": 100}, ) self.client.create_collection( From be74a4c9c14612bb82bd1b55a43712f3be773080 Mon Sep 17 00:00:00 2001 From: kivvi Date: Fri, 27 Sep 2024 20:18:13 +0800 Subject: [PATCH 008/252] Fix: O1 does not support the system parameter --- backend/open_webui/apps/openai/main.py | 45 ++++++++++++++------------ 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/backend/open_webui/apps/openai/main.py b/backend/open_webui/apps/openai/main.py index e0a40a1f54..99461b5908 100644 --- a/backend/open_webui/apps/openai/main.py +++ b/backend/open_webui/apps/openai/main.py @@ -27,7 +27,6 @@ from fastapi.responses import FileResponse, StreamingResponse from pydantic import BaseModel from starlette.background import BackgroundTask - from open_webui.utils.payload import ( apply_model_params_to_body_openai, apply_model_system_prompt_to_body, @@ -47,7 +46,6 @@ app.add_middleware( allow_headers=["*"], ) - app.state.config = AppConfig() app.state.config.ENABLE_MODEL_FILTER = ENABLE_MODEL_FILTER @@ -193,8 +191,8 @@ async def fetch_url(url, key): async def cleanup_response( - response: Optional[aiohttp.ClientResponse], - session: Optional[aiohttp.ClientSession], + response: Optional[aiohttp.ClientResponse], + session: Optional[aiohttp.ClientSession], ): if response: response.close() @@ -219,18 +217,18 @@ def merge_models_lists(model_lists): } for model in models if "api.openai.com" - not in app.state.config.OPENAI_API_BASE_URLS[idx] - or not any( - name in model["id"] - for name in [ - "babbage", - "dall-e", - "davinci", - "embedding", - "tts", - "whisper", - ] - ) + not in app.state.config.OPENAI_API_BASE_URLS[idx] + or not any( + name in model["id"] + for name in [ + "babbage", + "dall-e", + "davinci", + "embedding", + "tts", + "whisper", + ] + ) ] ) @@ -373,9 +371,9 @@ async def get_models(url_idx: Optional[int] = None, user=Depends(get_verified_us @app.post("/chat/completions") @app.post("/chat/completions/{url_idx}") async def generate_chat_completion( - form_data: dict, - url_idx: Optional[int] = None, - user=Depends(get_verified_user), + form_data: dict, + url_idx: Optional[int] = None, + user=Depends(get_verified_user), ): idx = 0 payload = {**form_data} @@ -407,20 +405,25 @@ async def generate_chat_completion( url = app.state.config.OPENAI_API_BASE_URLS[idx] key = app.state.config.OPENAI_API_KEYS[idx] + is_o1 = payload["model"].lower().startswith("o1") # Change max_completion_tokens to max_tokens (Backward compatible) - if "api.openai.com" not in url and not payload["model"].lower().startswith("o1-"): + if "api.openai.com" not in url and not is_o1: if "max_completion_tokens" in payload: # Remove "max_completion_tokens" from the payload payload["max_tokens"] = payload["max_completion_tokens"] del payload["max_completion_tokens"] else: - if payload["model"].lower().startswith("o1-") and "max_tokens" in payload: + if is_o1 and "max_tokens" in payload: payload["max_completion_tokens"] = payload["max_tokens"] del payload["max_tokens"] if "max_tokens" in payload and "max_completion_tokens" in payload: del payload["max_tokens"] + # Fix: O1 does not support the "system" parameter, Modify "system" to "user" + if is_o1 and payload["messages"][0]["role"] == "system": + payload["messages"][0]["role"] = "user" + # Convert the modified body back to JSON payload = json.dumps(payload) From 0bd9d59c78213ad2b3cd9c1440bef5ca5d619edc Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Fri, 27 Sep 2024 14:38:56 +0200 Subject: [PATCH 009/252] refac: update check timeout --- backend/open_webui/main.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 4af48906b1..ca829cfa1b 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -2153,7 +2153,8 @@ async def get_app_changelog(): @app.get("/api/version/updates") async def get_app_latest_release_version(): try: - async with aiohttp.ClientSession(trust_env=True) as session: + timeout = aiohttp.ClientTimeout(total=1) + async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session: async with session.get( "https://api.github.com/repos/open-webui/open-webui/releases/latest" ) as response: @@ -2163,10 +2164,7 @@ async def get_app_latest_release_version(): return {"current": VERSION, "latest": latest_version[1:]} except aiohttp.ClientError: - raise HTTPException( - status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail=ERROR_MESSAGES.RATE_LIMIT_EXCEEDED, - ) + return {"current": VERSION, "latest": VERSION} ############################ From 44d768ecf3b53055fa249bf9a7031f80b93d5887 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Fri, 27 Sep 2024 14:41:29 +0200 Subject: [PATCH 010/252] refac: do not wait for update check --- src/routes/(app)/+layout.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/routes/(app)/+layout.svelte b/src/routes/(app)/+layout.svelte index ad2f085cf7..83a53dffd9 100644 --- a/src/routes/(app)/+layout.svelte +++ b/src/routes/(app)/+layout.svelte @@ -206,10 +206,10 @@ const now = new Date(); if (now - dismissedUpdateToast > 24 * 60 * 60 * 1000) { - await checkForVersionUpdates(); + checkForVersionUpdates(); } } else { - await checkForVersionUpdates(); + checkForVersionUpdates(); } } await tick(); From 464b6a329edbb353a0b7a51efd1ebbc31a7a092d Mon Sep 17 00:00:00 2001 From: Aleix Dorca Date: Fri, 27 Sep 2024 16:11:52 +0200 Subject: [PATCH 011/252] Update catalan translation.json --- src/lib/i18n/locales/ca-ES/translation.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib/i18n/locales/ca-ES/translation.json b/src/lib/i18n/locales/ca-ES/translation.json index 25907c0f2d..bd61795585 100644 --- a/src/lib/i18n/locales/ca-ES/translation.json +++ b/src/lib/i18n/locales/ca-ES/translation.json @@ -9,7 +9,7 @@ "{{user}}'s Chats": "Els xats de {{user}}", "{{webUIName}} Backend Required": "El Backend de {{webUIName}} és necessari", "*Prompt node ID(s) are required for image generation": "*Els identificadors de nodes d'indicacions són necessaris per a la generació d'imatges", - "A new version (v{{LATEST_VERSION}}) is now available.": "", + "A new version (v{{LATEST_VERSION}}) is now available.": "Hi ha una nova versió disponible (v{{LATEST_VERSION}}).", "A task model is used when performing tasks such as generating titles for chats and web search queries": "Un model de tasca s'utilitza quan es realitzen tasques com ara generar títols per a xats i consultes de cerca per a la web", "a user": "un usuari", "About": "Sobre", @@ -466,7 +466,7 @@ "Oops! Looks like the URL is invalid. Please double-check and try again.": "Ui! Sembla que l'URL no és vàlida. Si us plau, revisa-la i torna-ho a provar.", "Oops! There was an error in the previous response. Please try again or contact admin.": "Ui! Hi ha hagut un error en la resposta anterior. Torna a provar-ho o contacta amb un administrador", "Oops! You're using an unsupported method (frontend only). Please serve the WebUI from the backend.": "Ui! Estàs utilitzant un mètode no suportat (només frontend). Si us plau, serveix la WebUI des del backend.", - "Open file": "", + "Open file": "Obrir arxiu", "Open new chat": "Obre un xat nou", "Open WebUI version (v{{OPEN_WEBUI_VERSION}}) is lower than required version (v{{REQUIRED_VERSION}})": "La versió d'Open WebUI (v{{OPEN_WEBUI_VERSION}}) és inferior a la versió requerida (v{{REQUIRED_VERSION}})", "OpenAI": "OpenAI", @@ -478,7 +478,7 @@ "Other": "Altres", "Output format": "Format de sortida", "Overview": "Vista general", - "page": "", + "page": "pàgina", "Password": "Contrasenya", "PDF document (.pdf)": "Document PDF (.pdf)", "PDF Extract Images (OCR)": "Extreu imatges del PDF (OCR)", @@ -497,7 +497,7 @@ "Plain text (.txt)": "Text pla (.txt)", "Playground": "Zona de jocs", "Please carefully review the following warnings:": "Si us plau, revisa els següents avisos amb cura:", - "Please select a reason": "", + "Please select a reason": "Si us plau, selecciona una raó", "Positive attitude": "Actitud positiva", "Previous 30 days": "30 dies anteriors", "Previous 7 days": "7 dies anteriors", @@ -704,7 +704,7 @@ "Unpin": "Alliberar", "Update": "Actualitzar", "Update and Copy Link": "Actualitzar i copiar l'enllaç", - "Update for the latest features and improvements.": "", + "Update for the latest features and improvements.": "Actualitza per a les darreres característiques i millores.", "Update password": "Actualitzar la contrasenya", "Updated at": "Actualitzat", "Upload": "Pujar", From e13614e11bcd50ecd206411b5e49ec25520d0865 Mon Sep 17 00:00:00 2001 From: kivvi Date: Fri, 27 Sep 2024 20:18:13 +0800 Subject: [PATCH 012/252] Fix: O1 does not support the system parameter --- backend/open_webui/apps/openai/main.py | 45 ++++++++++++++------------ 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/backend/open_webui/apps/openai/main.py b/backend/open_webui/apps/openai/main.py index e0a40a1f54..5768fe6450 100644 --- a/backend/open_webui/apps/openai/main.py +++ b/backend/open_webui/apps/openai/main.py @@ -27,7 +27,6 @@ from fastapi.responses import FileResponse, StreamingResponse from pydantic import BaseModel from starlette.background import BackgroundTask - from open_webui.utils.payload import ( apply_model_params_to_body_openai, apply_model_system_prompt_to_body, @@ -47,7 +46,6 @@ app.add_middleware( allow_headers=["*"], ) - app.state.config = AppConfig() app.state.config.ENABLE_MODEL_FILTER = ENABLE_MODEL_FILTER @@ -193,8 +191,8 @@ async def fetch_url(url, key): async def cleanup_response( - response: Optional[aiohttp.ClientResponse], - session: Optional[aiohttp.ClientSession], + response: Optional[aiohttp.ClientResponse], + session: Optional[aiohttp.ClientSession], ): if response: response.close() @@ -219,18 +217,18 @@ def merge_models_lists(model_lists): } for model in models if "api.openai.com" - not in app.state.config.OPENAI_API_BASE_URLS[idx] - or not any( - name in model["id"] - for name in [ - "babbage", - "dall-e", - "davinci", - "embedding", - "tts", - "whisper", - ] - ) + not in app.state.config.OPENAI_API_BASE_URLS[idx] + or not any( + name in model["id"] + for name in [ + "babbage", + "dall-e", + "davinci", + "embedding", + "tts", + "whisper", + ] + ) ] ) @@ -373,9 +371,9 @@ async def get_models(url_idx: Optional[int] = None, user=Depends(get_verified_us @app.post("/chat/completions") @app.post("/chat/completions/{url_idx}") async def generate_chat_completion( - form_data: dict, - url_idx: Optional[int] = None, - user=Depends(get_verified_user), + form_data: dict, + url_idx: Optional[int] = None, + user=Depends(get_verified_user), ): idx = 0 payload = {**form_data} @@ -407,20 +405,25 @@ async def generate_chat_completion( url = app.state.config.OPENAI_API_BASE_URLS[idx] key = app.state.config.OPENAI_API_KEYS[idx] + is_o1 = payload["model"].lower().startswith("o1-") # Change max_completion_tokens to max_tokens (Backward compatible) - if "api.openai.com" not in url and not payload["model"].lower().startswith("o1-"): + if "api.openai.com" not in url and not is_o1: if "max_completion_tokens" in payload: # Remove "max_completion_tokens" from the payload payload["max_tokens"] = payload["max_completion_tokens"] del payload["max_completion_tokens"] else: - if payload["model"].lower().startswith("o1-") and "max_tokens" in payload: + if is_o1 and "max_tokens" in payload: payload["max_completion_tokens"] = payload["max_tokens"] del payload["max_tokens"] if "max_tokens" in payload and "max_completion_tokens" in payload: del payload["max_tokens"] + # Fix: O1 does not support the "system" parameter, Modify "system" to "user" + if is_o1 and payload["messages"][0]["role"] == "system": + payload["messages"][0]["role"] = "user" + # Convert the modified body back to JSON payload = json.dumps(payload) From 4ead3c5b8059a55fa65f22b26c5763da0b1d5809 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Fri, 27 Sep 2024 19:43:40 +0200 Subject: [PATCH 013/252] chore: format --- backend/open_webui/apps/openai/main.py | 34 +++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/backend/open_webui/apps/openai/main.py b/backend/open_webui/apps/openai/main.py index 5768fe6450..9d62f32d22 100644 --- a/backend/open_webui/apps/openai/main.py +++ b/backend/open_webui/apps/openai/main.py @@ -191,8 +191,8 @@ async def fetch_url(url, key): async def cleanup_response( - response: Optional[aiohttp.ClientResponse], - session: Optional[aiohttp.ClientSession], + response: Optional[aiohttp.ClientResponse], + session: Optional[aiohttp.ClientSession], ): if response: response.close() @@ -217,18 +217,18 @@ def merge_models_lists(model_lists): } for model in models if "api.openai.com" - not in app.state.config.OPENAI_API_BASE_URLS[idx] - or not any( - name in model["id"] - for name in [ - "babbage", - "dall-e", - "davinci", - "embedding", - "tts", - "whisper", - ] - ) + not in app.state.config.OPENAI_API_BASE_URLS[idx] + or not any( + name in model["id"] + for name in [ + "babbage", + "dall-e", + "davinci", + "embedding", + "tts", + "whisper", + ] + ) ] ) @@ -371,9 +371,9 @@ async def get_models(url_idx: Optional[int] = None, user=Depends(get_verified_us @app.post("/chat/completions") @app.post("/chat/completions/{url_idx}") async def generate_chat_completion( - form_data: dict, - url_idx: Optional[int] = None, - user=Depends(get_verified_user), + form_data: dict, + url_idx: Optional[int] = None, + user=Depends(get_verified_user), ): idx = 0 payload = {**form_data} From eab30781e0e02033c66df1f659118204ab3bbc98 Mon Sep 17 00:00:00 2001 From: Hugo Haldi <20846785+HaldiH@users.noreply.github.com> Date: Fri, 27 Sep 2024 20:04:45 +0200 Subject: [PATCH 014/252] Chat completion 401 when no Authorization header When we send a request to `/api/chat/completions` without the `Authorization` header, the server just crashes and creates a stack trace, returning "Internal Server Error" to the calling client. With this fix, the server sends a 401 to the client with the content `{"detail": "Not authenticated"}`. --- backend/open_webui/main.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 4af48906b1..9c075f3674 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -761,10 +761,22 @@ class PipelineMiddleware(BaseHTTPMiddleware): # Parse string to JSON data = json.loads(body_str) if body_str else {} - user = get_current_user( - request, - get_http_authorization_cred(request.headers["Authorization"]), - ) + try: + user = get_current_user( + request, + get_http_authorization_cred(request.headers["Authorization"]), + ) + except KeyError as e: + if len(e.args) > 1: + return JSONResponse( + status_code=e.args[0], + content={"detail": e.args[1]}, + ) + else: + return JSONResponse( + status_code=status.HTTP_401_UNAUTHORIZED, + content={"detail": "Not authenticated"}, + ) try: data = filter_pipeline(data, user) From e1103305f5466dd57767d0f6bcb388839b0a98b3 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 01:27:46 +0200 Subject: [PATCH 015/252] refac: "rag" endpoints renamed to "retrieval" --- .../apps/{rag => retrieval}/main.py | 3 +- .../apps/{rag => retrieval}/search/brave.py | 0 .../{rag => retrieval}/search/duckduckgo.py | 0 .../{rag => retrieval}/search/google_pse.py | 0 .../{rag => retrieval}/search/jina_search.py | 0 .../apps/{rag => retrieval}/search/main.py | 0 .../{rag => retrieval}/search/searchapi.py | 0 .../apps/{rag => retrieval}/search/searxng.py | 0 .../apps/{rag => retrieval}/search/serper.py | 0 .../apps/{rag => retrieval}/search/serply.py | 0 .../{rag => retrieval}/search/serpstack.py | 0 .../apps/{rag => retrieval}/search/tavily.py | 0 .../search/testdata/brave.json | 0 .../search/testdata/google_pse.json | 0 .../search/testdata/searchapi.json | 0 .../search/testdata/searxng.json | 0 .../search/testdata/serper.json | 0 .../search/testdata/serply.json | 0 .../search/testdata/serpstack.json | 0 .../apps/{rag => retrieval}/utils.py | 0 .../{rag => retrieval}/vector/connector.py | 0 .../{rag => retrieval}/vector/dbs/chroma.py | 0 .../{rag => retrieval}/vector/dbs/milvus.py | 0 .../apps/{rag => retrieval}/vector/main.py | 0 backend/open_webui/main.py | 65 +++++++++++-------- src/lib/components/chat/MessageInput.svelte | 5 +- src/lib/constants.ts | 2 +- 27 files changed, 41 insertions(+), 34 deletions(-) rename backend/open_webui/apps/{rag => retrieval}/main.py (99%) rename backend/open_webui/apps/{rag => retrieval}/search/brave.py (100%) rename backend/open_webui/apps/{rag => retrieval}/search/duckduckgo.py (100%) rename backend/open_webui/apps/{rag => retrieval}/search/google_pse.py (100%) rename backend/open_webui/apps/{rag => retrieval}/search/jina_search.py (100%) rename backend/open_webui/apps/{rag => retrieval}/search/main.py (100%) rename backend/open_webui/apps/{rag => retrieval}/search/searchapi.py (100%) rename backend/open_webui/apps/{rag => retrieval}/search/searxng.py (100%) rename backend/open_webui/apps/{rag => retrieval}/search/serper.py (100%) rename backend/open_webui/apps/{rag => retrieval}/search/serply.py (100%) rename backend/open_webui/apps/{rag => retrieval}/search/serpstack.py (100%) rename backend/open_webui/apps/{rag => retrieval}/search/tavily.py (100%) rename backend/open_webui/apps/{rag => retrieval}/search/testdata/brave.json (100%) rename backend/open_webui/apps/{rag => retrieval}/search/testdata/google_pse.json (100%) rename backend/open_webui/apps/{rag => retrieval}/search/testdata/searchapi.json (100%) rename backend/open_webui/apps/{rag => retrieval}/search/testdata/searxng.json (100%) rename backend/open_webui/apps/{rag => retrieval}/search/testdata/serper.json (100%) rename backend/open_webui/apps/{rag => retrieval}/search/testdata/serply.json (100%) rename backend/open_webui/apps/{rag => retrieval}/search/testdata/serpstack.json (100%) rename backend/open_webui/apps/{rag => retrieval}/utils.py (100%) rename backend/open_webui/apps/{rag => retrieval}/vector/connector.py (100%) rename backend/open_webui/apps/{rag => retrieval}/vector/dbs/chroma.py (100%) rename backend/open_webui/apps/{rag => retrieval}/vector/dbs/milvus.py (100%) rename backend/open_webui/apps/{rag => retrieval}/vector/main.py (100%) diff --git a/backend/open_webui/apps/rag/main.py b/backend/open_webui/apps/retrieval/main.py similarity index 99% rename from backend/open_webui/apps/rag/main.py rename to backend/open_webui/apps/retrieval/main.py index 7b476c0563..d276bd80bb 100644 --- a/backend/open_webui/apps/rag/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -1061,7 +1061,7 @@ def store_data_in_vector_db( if len(docs) > 0: log.info(f"store_data_in_vector_db {docs}") - return store_docs_in_vector_db(docs, collection_name, metadata, overwrite), None + return store_docs_in_vector_db(docs, collection_name, metadata, overwrite) else: raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT) @@ -1377,6 +1377,7 @@ def process_doc( ) if result: + return { "status": True, "collection_name": collection_name, diff --git a/backend/open_webui/apps/rag/search/brave.py b/backend/open_webui/apps/retrieval/search/brave.py similarity index 100% rename from backend/open_webui/apps/rag/search/brave.py rename to backend/open_webui/apps/retrieval/search/brave.py diff --git a/backend/open_webui/apps/rag/search/duckduckgo.py b/backend/open_webui/apps/retrieval/search/duckduckgo.py similarity index 100% rename from backend/open_webui/apps/rag/search/duckduckgo.py rename to backend/open_webui/apps/retrieval/search/duckduckgo.py diff --git a/backend/open_webui/apps/rag/search/google_pse.py b/backend/open_webui/apps/retrieval/search/google_pse.py similarity index 100% rename from backend/open_webui/apps/rag/search/google_pse.py rename to backend/open_webui/apps/retrieval/search/google_pse.py diff --git a/backend/open_webui/apps/rag/search/jina_search.py b/backend/open_webui/apps/retrieval/search/jina_search.py similarity index 100% rename from backend/open_webui/apps/rag/search/jina_search.py rename to backend/open_webui/apps/retrieval/search/jina_search.py diff --git a/backend/open_webui/apps/rag/search/main.py b/backend/open_webui/apps/retrieval/search/main.py similarity index 100% rename from backend/open_webui/apps/rag/search/main.py rename to backend/open_webui/apps/retrieval/search/main.py diff --git a/backend/open_webui/apps/rag/search/searchapi.py b/backend/open_webui/apps/retrieval/search/searchapi.py similarity index 100% rename from backend/open_webui/apps/rag/search/searchapi.py rename to backend/open_webui/apps/retrieval/search/searchapi.py diff --git a/backend/open_webui/apps/rag/search/searxng.py b/backend/open_webui/apps/retrieval/search/searxng.py similarity index 100% rename from backend/open_webui/apps/rag/search/searxng.py rename to backend/open_webui/apps/retrieval/search/searxng.py diff --git a/backend/open_webui/apps/rag/search/serper.py b/backend/open_webui/apps/retrieval/search/serper.py similarity index 100% rename from backend/open_webui/apps/rag/search/serper.py rename to backend/open_webui/apps/retrieval/search/serper.py diff --git a/backend/open_webui/apps/rag/search/serply.py b/backend/open_webui/apps/retrieval/search/serply.py similarity index 100% rename from backend/open_webui/apps/rag/search/serply.py rename to backend/open_webui/apps/retrieval/search/serply.py diff --git a/backend/open_webui/apps/rag/search/serpstack.py b/backend/open_webui/apps/retrieval/search/serpstack.py similarity index 100% rename from backend/open_webui/apps/rag/search/serpstack.py rename to backend/open_webui/apps/retrieval/search/serpstack.py diff --git a/backend/open_webui/apps/rag/search/tavily.py b/backend/open_webui/apps/retrieval/search/tavily.py similarity index 100% rename from backend/open_webui/apps/rag/search/tavily.py rename to backend/open_webui/apps/retrieval/search/tavily.py diff --git a/backend/open_webui/apps/rag/search/testdata/brave.json b/backend/open_webui/apps/retrieval/search/testdata/brave.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/brave.json rename to backend/open_webui/apps/retrieval/search/testdata/brave.json diff --git a/backend/open_webui/apps/rag/search/testdata/google_pse.json b/backend/open_webui/apps/retrieval/search/testdata/google_pse.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/google_pse.json rename to backend/open_webui/apps/retrieval/search/testdata/google_pse.json diff --git a/backend/open_webui/apps/rag/search/testdata/searchapi.json b/backend/open_webui/apps/retrieval/search/testdata/searchapi.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/searchapi.json rename to backend/open_webui/apps/retrieval/search/testdata/searchapi.json diff --git a/backend/open_webui/apps/rag/search/testdata/searxng.json b/backend/open_webui/apps/retrieval/search/testdata/searxng.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/searxng.json rename to backend/open_webui/apps/retrieval/search/testdata/searxng.json diff --git a/backend/open_webui/apps/rag/search/testdata/serper.json b/backend/open_webui/apps/retrieval/search/testdata/serper.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/serper.json rename to backend/open_webui/apps/retrieval/search/testdata/serper.json diff --git a/backend/open_webui/apps/rag/search/testdata/serply.json b/backend/open_webui/apps/retrieval/search/testdata/serply.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/serply.json rename to backend/open_webui/apps/retrieval/search/testdata/serply.json diff --git a/backend/open_webui/apps/rag/search/testdata/serpstack.json b/backend/open_webui/apps/retrieval/search/testdata/serpstack.json similarity index 100% rename from backend/open_webui/apps/rag/search/testdata/serpstack.json rename to backend/open_webui/apps/retrieval/search/testdata/serpstack.json diff --git a/backend/open_webui/apps/rag/utils.py b/backend/open_webui/apps/retrieval/utils.py similarity index 100% rename from backend/open_webui/apps/rag/utils.py rename to backend/open_webui/apps/retrieval/utils.py diff --git a/backend/open_webui/apps/rag/vector/connector.py b/backend/open_webui/apps/retrieval/vector/connector.py similarity index 100% rename from backend/open_webui/apps/rag/vector/connector.py rename to backend/open_webui/apps/retrieval/vector/connector.py diff --git a/backend/open_webui/apps/rag/vector/dbs/chroma.py b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py similarity index 100% rename from backend/open_webui/apps/rag/vector/dbs/chroma.py rename to backend/open_webui/apps/retrieval/vector/dbs/chroma.py diff --git a/backend/open_webui/apps/rag/vector/dbs/milvus.py b/backend/open_webui/apps/retrieval/vector/dbs/milvus.py similarity index 100% rename from backend/open_webui/apps/rag/vector/dbs/milvus.py rename to backend/open_webui/apps/retrieval/vector/dbs/milvus.py diff --git a/backend/open_webui/apps/rag/vector/main.py b/backend/open_webui/apps/retrieval/vector/main.py similarity index 100% rename from backend/open_webui/apps/rag/vector/main.py rename to backend/open_webui/apps/retrieval/vector/main.py diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index b6fa63fc31..4c1a2053d1 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -16,37 +16,45 @@ from typing import Optional import aiohttp import requests - -from open_webui.apps.audio.main import app as audio_app -from open_webui.apps.images.main import app as images_app -from open_webui.apps.ollama.main import app as ollama_app from open_webui.apps.ollama.main import ( - GenerateChatCompletionForm, + app as ollama_app, + get_all_models as get_ollama_models, generate_chat_completion as generate_ollama_chat_completion, generate_openai_chat_completion as generate_ollama_openai_chat_completion, + GenerateChatCompletionForm, ) -from open_webui.apps.ollama.main import get_all_models as get_ollama_models -from open_webui.apps.openai.main import app as openai_app from open_webui.apps.openai.main import ( + app as openai_app, generate_chat_completion as generate_openai_chat_completion, + get_all_models as get_openai_models, ) -from open_webui.apps.openai.main import get_all_models as get_openai_models -from open_webui.apps.rag.main import app as rag_app -from open_webui.apps.rag.utils import get_rag_context, rag_template -from open_webui.apps.socket.main import app as socket_app, periodic_usage_pool_cleanup -from open_webui.apps.socket.main import get_event_call, get_event_emitter -from open_webui.apps.webui.internal.db import Session -from open_webui.apps.webui.main import app as webui_app + +from open_webui.apps.retrieval.main import app as retrieval_app +from open_webui.apps.retrieval.utils import get_rag_context, rag_template + +from open_webui.apps.socket.main import ( + app as socket_app, + periodic_usage_pool_cleanup, + get_event_call, + get_event_emitter, +) + from open_webui.apps.webui.main import ( + app as webui_app, generate_function_chat_completion, get_pipe_models, ) +from open_webui.apps.webui.internal.db import Session + from open_webui.apps.webui.models.auths import Auths from open_webui.apps.webui.models.functions import Functions from open_webui.apps.webui.models.models import Models from open_webui.apps.webui.models.users import UserModel, Users + from open_webui.apps.webui.utils import load_function_module_by_id +from open_webui.apps.audio.main import app as audio_app +from open_webui.apps.images.main import app as images_app from authlib.integrations.starlette_client import OAuth from authlib.oidc.core import UserInfo @@ -491,11 +499,11 @@ async def chat_completion_files_handler(body) -> tuple[dict, dict[str, list]]: contexts, citations = get_rag_context( files=files, messages=body["messages"], - embedding_function=rag_app.state.EMBEDDING_FUNCTION, - k=rag_app.state.config.TOP_K, - reranking_function=rag_app.state.sentence_transformer_rf, - r=rag_app.state.config.RELEVANCE_THRESHOLD, - hybrid_search=rag_app.state.config.ENABLE_RAG_HYBRID_SEARCH, + embedding_function=retrieval_app.state.EMBEDDING_FUNCTION, + k=retrieval_app.state.config.TOP_K, + reranking_function=retrieval_app.state.sentence_transformer_rf, + r=retrieval_app.state.config.RELEVANCE_THRESHOLD, + hybrid_search=retrieval_app.state.config.ENABLE_RAG_HYBRID_SEARCH, ) log.debug(f"rag_contexts: {contexts}, citations: {citations}") @@ -608,7 +616,7 @@ class ChatCompletionMiddleware(BaseHTTPMiddleware): if prompt is None: raise Exception("No user message found") if ( - rag_app.state.config.RELEVANCE_THRESHOLD == 0 + retrieval_app.state.config.RELEVANCE_THRESHOLD == 0 and context_string.strip() == "" ): log.debug( @@ -620,14 +628,14 @@ class ChatCompletionMiddleware(BaseHTTPMiddleware): if model["owned_by"] == "ollama": body["messages"] = prepend_to_first_user_message_content( rag_template( - rag_app.state.config.RAG_TEMPLATE, context_string, prompt + retrieval_app.state.config.RAG_TEMPLATE, context_string, prompt ), body["messages"], ) else: body["messages"] = add_or_update_system_message( rag_template( - rag_app.state.config.RAG_TEMPLATE, context_string, prompt + retrieval_app.state.config.RAG_TEMPLATE, context_string, prompt ), body["messages"], ) @@ -849,7 +857,7 @@ async def check_url(request: Request, call_next): async def update_embedding_function(request: Request, call_next): response = await call_next(request) if "/embedding/update" in request.url.path: - webui_app.state.EMBEDDING_FUNCTION = rag_app.state.EMBEDDING_FUNCTION + webui_app.state.EMBEDDING_FUNCTION = retrieval_app.state.EMBEDDING_FUNCTION return response @@ -877,11 +885,12 @@ app.mount("/openai", openai_app) app.mount("/images/api/v1", images_app) app.mount("/audio/api/v1", audio_app) -app.mount("/rag/api/v1", rag_app) +app.mount("/retrieval/api/v1", retrieval_app) app.mount("/api/v1", webui_app) -webui_app.state.EMBEDDING_FUNCTION = rag_app.state.EMBEDDING_FUNCTION + +webui_app.state.EMBEDDING_FUNCTION = retrieval_app.state.EMBEDDING_FUNCTION async def get_all_models(): @@ -2066,7 +2075,7 @@ async def get_app_config(request: Request): "enable_login_form": webui_app.state.config.ENABLE_LOGIN_FORM, **( { - "enable_web_search": rag_app.state.config.ENABLE_RAG_WEB_SEARCH, + "enable_web_search": retrieval_app.state.config.ENABLE_RAG_WEB_SEARCH, "enable_image_generation": images_app.state.config.ENABLED, "enable_community_sharing": webui_app.state.config.ENABLE_COMMUNITY_SHARING, "enable_message_rating": webui_app.state.config.ENABLE_MESSAGE_RATING, @@ -2092,8 +2101,8 @@ async def get_app_config(request: Request): }, }, "file": { - "max_size": rag_app.state.config.FILE_MAX_SIZE, - "max_count": rag_app.state.config.FILE_MAX_COUNT, + "max_size": retrieval_app.state.config.FILE_MAX_SIZE, + "max_count": retrieval_app.state.config.FILE_MAX_COUNT, }, "permissions": {**webui_app.state.config.USER_PERMISSIONS}, } diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index ea6b0aec8e..0a10d53938 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -159,16 +159,13 @@ const processFileItem = async (fileItem) => { try { const res = await processDocToVectorDB(localStorage.token, fileItem.id); - if (res) { fileItem.status = 'processed'; fileItem.collection_name = res.collection_name; files = files; } } catch (e) { - // Remove the failed doc from the files array - // files = files.filter((f) => f.id !== fileItem.id); - toast.error(e); + // We keep the file in the files list even if it fails to process fileItem.status = 'processed'; files = files; } diff --git a/src/lib/constants.ts b/src/lib/constants.ts index ad7b5c29ed..8820c0d99a 100644 --- a/src/lib/constants.ts +++ b/src/lib/constants.ts @@ -11,7 +11,7 @@ export const OLLAMA_API_BASE_URL = `${WEBUI_BASE_URL}/ollama`; export const OPENAI_API_BASE_URL = `${WEBUI_BASE_URL}/openai`; export const AUDIO_API_BASE_URL = `${WEBUI_BASE_URL}/audio/api/v1`; export const IMAGES_API_BASE_URL = `${WEBUI_BASE_URL}/images/api/v1`; -export const RAG_API_BASE_URL = `${WEBUI_BASE_URL}/rag/api/v1`; +export const RAG_API_BASE_URL = `${WEBUI_BASE_URL}/retrieval/api/v1`; export const WEBUI_VERSION = APP_VERSION; export const WEBUI_BUILD_HASH = APP_BUILD_HASH; From 5b7cf889153dbc6f81b49364d19d15622af40400 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 01:28:45 +0200 Subject: [PATCH 016/252] refac --- backend/open_webui/apps/retrieval/main.py | 26 +++++++++---------- .../open_webui/apps/retrieval/search/brave.py | 2 +- .../apps/retrieval/search/duckduckgo.py | 2 +- .../apps/retrieval/search/google_pse.py | 2 +- .../apps/retrieval/search/jina_search.py | 2 +- .../apps/retrieval/search/searchapi.py | 2 +- .../apps/retrieval/search/searxng.py | 2 +- .../apps/retrieval/search/serper.py | 2 +- .../apps/retrieval/search/serply.py | 2 +- .../apps/retrieval/search/serpstack.py | 2 +- .../apps/retrieval/search/tavily.py | 2 +- backend/open_webui/apps/retrieval/utils.py | 2 +- .../apps/retrieval/vector/connector.py | 4 +-- .../apps/retrieval/vector/dbs/chroma.py | 2 +- .../apps/retrieval/vector/dbs/milvus.py | 2 +- .../open_webui/apps/webui/routers/memories.py | 2 +- 16 files changed, 29 insertions(+), 29 deletions(-) diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index d276bd80bb..e4f98b8b0c 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -20,18 +20,18 @@ from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile, sta from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel -from open_webui.apps.rag.search.main import SearchResult -from open_webui.apps.rag.search.brave import search_brave -from open_webui.apps.rag.search.duckduckgo import search_duckduckgo -from open_webui.apps.rag.search.google_pse import search_google_pse -from open_webui.apps.rag.search.jina_search import search_jina -from open_webui.apps.rag.search.searchapi import search_searchapi -from open_webui.apps.rag.search.searxng import search_searxng -from open_webui.apps.rag.search.serper import search_serper -from open_webui.apps.rag.search.serply import search_serply -from open_webui.apps.rag.search.serpstack import search_serpstack -from open_webui.apps.rag.search.tavily import search_tavily -from open_webui.apps.rag.utils import ( +from open_webui.apps.retrieval.search.main import SearchResult +from open_webui.apps.retrieval.search.brave import search_brave +from open_webui.apps.retrieval.search.duckduckgo import search_duckduckgo +from open_webui.apps.retrieval.search.google_pse import search_google_pse +from open_webui.apps.retrieval.search.jina_search import search_jina +from open_webui.apps.retrieval.search.searchapi import search_searchapi +from open_webui.apps.retrieval.search.searxng import search_searxng +from open_webui.apps.retrieval.search.serper import search_serper +from open_webui.apps.retrieval.search.serply import search_serply +from open_webui.apps.retrieval.search.serpstack import search_serpstack +from open_webui.apps.retrieval.search.tavily import search_tavily +from open_webui.apps.retrieval.utils import ( get_embedding_function, get_model_path, query_collection, @@ -98,7 +98,7 @@ from open_webui.utils.misc import ( sanitize_filename, ) from open_webui.utils.utils import get_admin_user, get_verified_user -from open_webui.apps.rag.vector.connector import VECTOR_DB_CLIENT +from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import ( diff --git a/backend/open_webui/apps/retrieval/search/brave.py b/backend/open_webui/apps/retrieval/search/brave.py index 2eb256b4bc..11a2938b2c 100644 --- a/backend/open_webui/apps/retrieval/search/brave.py +++ b/backend/open_webui/apps/retrieval/search/brave.py @@ -2,7 +2,7 @@ import logging from typing import Optional import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.search.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/retrieval/search/duckduckgo.py b/backend/open_webui/apps/retrieval/search/duckduckgo.py index a8a580acad..82558ba374 100644 --- a/backend/open_webui/apps/retrieval/search/duckduckgo.py +++ b/backend/open_webui/apps/retrieval/search/duckduckgo.py @@ -1,7 +1,7 @@ import logging from typing import Optional -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.search.main import SearchResult, get_filtered_results from duckduckgo_search import DDGS from open_webui.env import SRC_LOG_LEVELS diff --git a/backend/open_webui/apps/retrieval/search/google_pse.py b/backend/open_webui/apps/retrieval/search/google_pse.py index a7f75a6c6d..c42851f478 100644 --- a/backend/open_webui/apps/retrieval/search/google_pse.py +++ b/backend/open_webui/apps/retrieval/search/google_pse.py @@ -2,7 +2,7 @@ import logging from typing import Optional import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.search.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/retrieval/search/jina_search.py b/backend/open_webui/apps/retrieval/search/jina_search.py index 41cde679d1..f44f10d5cc 100644 --- a/backend/open_webui/apps/retrieval/search/jina_search.py +++ b/backend/open_webui/apps/retrieval/search/jina_search.py @@ -1,7 +1,7 @@ import logging import requests -from open_webui.apps.rag.search.main import SearchResult +from open_webui.apps.retrieval.search.main import SearchResult from open_webui.env import SRC_LOG_LEVELS from yarl import URL diff --git a/backend/open_webui/apps/retrieval/search/searchapi.py b/backend/open_webui/apps/retrieval/search/searchapi.py index 9ec9a07476..a648d6600c 100644 --- a/backend/open_webui/apps/retrieval/search/searchapi.py +++ b/backend/open_webui/apps/retrieval/search/searchapi.py @@ -3,7 +3,7 @@ from typing import Optional from urllib.parse import urlencode import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.search.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/retrieval/search/searxng.py b/backend/open_webui/apps/retrieval/search/searxng.py index 26c534aa3c..14b6b40b5e 100644 --- a/backend/open_webui/apps/retrieval/search/searxng.py +++ b/backend/open_webui/apps/retrieval/search/searxng.py @@ -2,7 +2,7 @@ import logging from typing import Optional import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.search.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/retrieval/search/serper.py b/backend/open_webui/apps/retrieval/search/serper.py index ed7cc2c5fb..afebe8097b 100644 --- a/backend/open_webui/apps/retrieval/search/serper.py +++ b/backend/open_webui/apps/retrieval/search/serper.py @@ -3,7 +3,7 @@ import logging from typing import Optional import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.search.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/retrieval/search/serply.py b/backend/open_webui/apps/retrieval/search/serply.py index 260e9b30e2..266fd666a2 100644 --- a/backend/open_webui/apps/retrieval/search/serply.py +++ b/backend/open_webui/apps/retrieval/search/serply.py @@ -3,7 +3,7 @@ from typing import Optional from urllib.parse import urlencode import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.search.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/retrieval/search/serpstack.py b/backend/open_webui/apps/retrieval/search/serpstack.py index 962c1a5b30..236fb51815 100644 --- a/backend/open_webui/apps/retrieval/search/serpstack.py +++ b/backend/open_webui/apps/retrieval/search/serpstack.py @@ -2,7 +2,7 @@ import logging from typing import Optional import requests -from open_webui.apps.rag.search.main import SearchResult, get_filtered_results +from open_webui.apps.retrieval.search.main import SearchResult, get_filtered_results from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/retrieval/search/tavily.py b/backend/open_webui/apps/retrieval/search/tavily.py index a619d29edb..00f5b15c47 100644 --- a/backend/open_webui/apps/retrieval/search/tavily.py +++ b/backend/open_webui/apps/retrieval/search/tavily.py @@ -1,7 +1,7 @@ import logging import requests -from open_webui.apps.rag.search.main import SearchResult +from open_webui.apps.retrieval.search.main import SearchResult from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) diff --git a/backend/open_webui/apps/retrieval/utils.py b/backend/open_webui/apps/retrieval/utils.py index f9443d3804..1fa30e6a0e 100644 --- a/backend/open_webui/apps/retrieval/utils.py +++ b/backend/open_webui/apps/retrieval/utils.py @@ -15,7 +15,7 @@ from open_webui.apps.ollama.main import ( GenerateEmbeddingsForm, generate_ollama_embeddings, ) -from open_webui.apps.rag.vector.connector import VECTOR_DB_CLIENT +from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT from open_webui.utils.misc import get_last_user_message from open_webui.env import SRC_LOG_LEVELS diff --git a/backend/open_webui/apps/retrieval/vector/connector.py b/backend/open_webui/apps/retrieval/vector/connector.py index 073becdbeb..5b203271f7 100644 --- a/backend/open_webui/apps/retrieval/vector/connector.py +++ b/backend/open_webui/apps/retrieval/vector/connector.py @@ -1,5 +1,5 @@ -from open_webui.apps.rag.vector.dbs.chroma import ChromaClient -from open_webui.apps.rag.vector.dbs.milvus import MilvusClient +from open_webui.apps.retrieval.vector.dbs.chroma import ChromaClient +from open_webui.apps.retrieval.vector.dbs.milvus import MilvusClient from open_webui.config import VECTOR_DB diff --git a/backend/open_webui/apps/retrieval/vector/dbs/chroma.py b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py index 5f94201087..fe065f868e 100644 --- a/backend/open_webui/apps/retrieval/vector/dbs/chroma.py +++ b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py @@ -4,7 +4,7 @@ from chromadb.utils.batch_utils import create_batches from typing import Optional -from open_webui.apps.rag.vector.main import VectorItem, SearchResult, GetResult +from open_webui.apps.retrieval.vector.main import VectorItem, SearchResult, GetResult from open_webui.config import ( CHROMA_DATA_PATH, CHROMA_HTTP_HOST, diff --git a/backend/open_webui/apps/retrieval/vector/dbs/milvus.py b/backend/open_webui/apps/retrieval/vector/dbs/milvus.py index b4a6a77b3a..77300acf21 100644 --- a/backend/open_webui/apps/retrieval/vector/dbs/milvus.py +++ b/backend/open_webui/apps/retrieval/vector/dbs/milvus.py @@ -4,7 +4,7 @@ import json from typing import Optional -from open_webui.apps.rag.vector.main import VectorItem, SearchResult, GetResult +from open_webui.apps.retrieval.vector.main import VectorItem, SearchResult, GetResult from open_webui.config import ( MILVUS_URI, ) diff --git a/backend/open_webui/apps/webui/routers/memories.py b/backend/open_webui/apps/webui/routers/memories.py index d659833bc2..ccf84a9d4c 100644 --- a/backend/open_webui/apps/webui/routers/memories.py +++ b/backend/open_webui/apps/webui/routers/memories.py @@ -4,7 +4,7 @@ import logging from typing import Optional from open_webui.apps.webui.models.memories import Memories, MemoryModel -from open_webui.apps.rag.vector.connector import VECTOR_DB_CLIENT +from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT from open_webui.utils.utils import get_verified_user from open_webui.env import SRC_LOG_LEVELS From a52e8cd5375e7347dc7c4bb74a6b1a114db40fe1 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 01:35:31 +0200 Subject: [PATCH 017/252] refac --- backend/open_webui/apps/retrieval/main.py | 8 ++++---- backend/open_webui/config.py | 2 +- src/lib/apis/{rag => retrieval}/index.ts | 4 ++-- src/lib/components/chat/Chat.svelte | 2 +- src/lib/components/chat/MessageInput.svelte | 5 +++-- src/lib/components/workspace/Documents.svelte | 4 ++-- src/lib/utils/rag/index.ts | 2 +- 7 files changed, 14 insertions(+), 13 deletions(-) rename src/lib/apis/{rag => retrieval}/index.ts (98%) diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index e4f98b8b0c..8f23ea2c57 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -1340,14 +1340,14 @@ def store_doc( ) -class ProcessDocForm(BaseModel): +class ProcessFileForm(BaseModel): file_id: str collection_name: Optional[str] = None -@app.post("/process/doc") -def process_doc( - form_data: ProcessDocForm, +@app.post("/process/file") +def process_file( + form_data: ProcessFileForm, user=Depends(get_verified_user), ): try: diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index f531a8728d..2518599ca2 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -921,7 +921,7 @@ CHROMA_HTTP_SSL = os.environ.get("CHROMA_HTTP_SSL", "false").lower() == "true" MILVUS_URI = os.environ.get("MILVUS_URI", f"{DATA_DIR}/vector_db/milvus.db") #################################### -# RAG +# Information Retrieval (RAG) #################################### # RAG Content Extraction diff --git a/src/lib/apis/rag/index.ts b/src/lib/apis/retrieval/index.ts similarity index 98% rename from src/lib/apis/rag/index.ts rename to src/lib/apis/retrieval/index.ts index 3c0dba4b55..ce3a0c0a57 100644 --- a/src/lib/apis/rag/index.ts +++ b/src/lib/apis/retrieval/index.ts @@ -170,10 +170,10 @@ export const updateQuerySettings = async (token: string, settings: QuerySettings return res; }; -export const processDocToVectorDB = async (token: string, file_id: string) => { +export const processFile = async (token: string, file_id: string) => { let error = null; - const res = await fetch(`${RAG_API_BASE_URL}/process/doc`, { + const res = await fetch(`${RAG_API_BASE_URL}/process/file`, { method: 'POST', headers: { Accept: 'application/json', diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte index 9db03ef532..e196936a6e 100644 --- a/src/lib/components/chat/Chat.svelte +++ b/src/lib/components/chat/Chat.svelte @@ -52,7 +52,7 @@ updateChatById } from '$lib/apis/chats'; import { generateOpenAIChatCompletion } from '$lib/apis/openai'; - import { runWebSearch } from '$lib/apis/rag'; + import { runWebSearch } from '$lib/apis/retrieval'; import { createOpenAITextStream } from '$lib/apis/streaming'; import { queryMemory } from '$lib/apis/memories'; import { getAndUpdateUserLocation, getUserSettings } from '$lib/apis/users'; diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index 0a10d53938..2ca0c8d207 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -17,7 +17,8 @@ import { blobToFile, findWordIndices } from '$lib/utils'; import { transcribeAudio } from '$lib/apis/audio'; - import { processDocToVectorDB } from '$lib/apis/rag'; + + import { processFile } from '$lib/apis/retrieval'; import { uploadFile } from '$lib/apis/files'; import { @@ -158,7 +159,7 @@ const processFileItem = async (fileItem) => { try { - const res = await processDocToVectorDB(localStorage.token, fileItem.id); + const res = await processFile(localStorage.token, fileItem.id); if (res) { fileItem.status = 'processed'; fileItem.collection_name = res.collection_name; diff --git a/src/lib/components/workspace/Documents.svelte b/src/lib/components/workspace/Documents.svelte index 38f46f7452..bba4af10fc 100644 --- a/src/lib/components/workspace/Documents.svelte +++ b/src/lib/components/workspace/Documents.svelte @@ -8,7 +8,7 @@ import { createNewDoc, deleteDocByName, getDocs } from '$lib/apis/documents'; import { SUPPORTED_FILE_TYPE, SUPPORTED_FILE_EXTENSIONS } from '$lib/constants'; - import { processDocToVectorDB, uploadDocToVectorDB } from '$lib/apis/rag'; + import { processFile } from '$lib/apis/rag'; import { blobToFile, transformFileName } from '$lib/utils'; import Checkbox from '$lib/components/common/Checkbox.svelte'; @@ -74,7 +74,7 @@ return null; }); - const res = await processDocToVectorDB(localStorage.token, uploadedFile.id).catch((error) => { + const res = await processFile(localStorage.token, uploadedFile.id).catch((error) => { toast.error(error); return null; }); diff --git a/src/lib/utils/rag/index.ts b/src/lib/utils/rag/index.ts index ba1f29f888..6523bb7dff 100644 --- a/src/lib/utils/rag/index.ts +++ b/src/lib/utils/rag/index.ts @@ -1,4 +1,4 @@ -import { getRAGTemplate } from '$lib/apis/rag'; +import { getRAGTemplate } from '$lib/apis/retrieval'; export const RAGTemplate = async (token: string, context: string, query: string) => { let template = await getRAGTemplate(token).catch(() => { From c1b4fbf5c2002634cd3d9e6e8e2b25f51a580425 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 01:35:52 +0200 Subject: [PATCH 018/252] refac --- src/lib/components/workspace/Documents.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/workspace/Documents.svelte b/src/lib/components/workspace/Documents.svelte index bba4af10fc..0fa50278c7 100644 --- a/src/lib/components/workspace/Documents.svelte +++ b/src/lib/components/workspace/Documents.svelte @@ -8,7 +8,7 @@ import { createNewDoc, deleteDocByName, getDocs } from '$lib/apis/documents'; import { SUPPORTED_FILE_TYPE, SUPPORTED_FILE_EXTENSIONS } from '$lib/constants'; - import { processFile } from '$lib/apis/rag'; + import { processFile } from '$lib/apis/retrieval'; import { blobToFile, transformFileName } from '$lib/utils'; import Checkbox from '$lib/components/common/Checkbox.svelte'; From 1b349016ffaef65ad4e4713689d21b4752bbc815 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 01:36:35 +0200 Subject: [PATCH 019/252] refac --- src/lib/components/admin/Settings/Documents.svelte | 2 +- src/lib/components/admin/Settings/WebSearch.svelte | 2 +- src/lib/components/chat/MessageInput/Commands.svelte | 2 +- src/lib/components/documents/AddDocModal.svelte | 9 +++------ 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index e06edce9dd..84f60847ef 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -17,7 +17,7 @@ resetUploadDir, getRAGConfig, updateRAGConfig - } from '$lib/apis/rag'; + } from '$lib/apis/retrieval'; import ResetUploadDirConfirmDialog from '$lib/components/common/ConfirmDialog.svelte'; import ResetVectorDBConfirmDialog from '$lib/components/common/ConfirmDialog.svelte'; diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index 15eba096b7..0a0c2eb165 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -1,5 +1,5 @@
@@ -37,17 +35,7 @@ class="h-14 {className} flex items-center space-x-3 {colorClassName} rounded-xl border border-gray-100 dark:border-gray-800 text-left" type="button" on:click={async () => { - if (clickHandler === null) { - if (url) { - if (type === 'file') { - window.open(`${url}/content`, '_blank').focus(); - } else { - window.open(`${url}`, '_blank').focus(); - } - } - } else { - clickHandler(); - } + dispatch('click'); }} >
From 2428878f4225f644b285ee6c1c1d251db96b165a Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 02:29:08 +0200 Subject: [PATCH 021/252] refac --- backend/open_webui/apps/retrieval/main.py | 96 ++++------------------- 1 file changed, 16 insertions(+), 80 deletions(-) diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index 3e1ec8854a..497a5685d6 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -246,10 +246,10 @@ app.add_middleware( class CollectionNameForm(BaseModel): - collection_name: Optional[str] = "test" + collection_name: Optional[str] = None -class UrlForm(CollectionNameForm): +class ProcessUrlForm(CollectionNameForm): url: str @@ -636,7 +636,6 @@ def store_data_in_vector_db( chunk_overlap=app.state.config.CHUNK_OVERLAP, add_start_index=True, ) - docs = text_splitter.split_documents(data) if len(docs) > 0: @@ -715,66 +714,6 @@ def store_docs_in_vector_db( return False -@app.post("/doc") -def store_doc( - collection_name: Optional[str] = Form(None), - file: UploadFile = File(...), - user=Depends(get_verified_user), -): - # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" - - log.info(f"file.content_type: {file.content_type}") - try: - unsanitized_filename = file.filename - filename = os.path.basename(unsanitized_filename) - - file_path = f"{UPLOAD_DIR}/{filename}" - - contents = file.file.read() - with open(file_path, "wb") as f: - f.write(contents) - f.close() - - f = open(file_path, "rb") - if collection_name is None: - collection_name = calculate_sha256(f)[:63] - f.close() - - loader = Loader( - engine=app.state.config.CONTENT_EXTRACTION_ENGINE, - TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL, - PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES, - ) - data = loader.load(filename, file.content_type, file_path) - - try: - result = store_data_in_vector_db(data, collection_name) - - if result: - return { - "status": True, - "collection_name": collection_name, - "filename": filename, - } - except Exception as e: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=e, - ) - except Exception as e: - log.exception(e) - if "No pandoc was found" in str(e): - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED, - ) - else: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.DEFAULT(e), - ) - - class ProcessFileForm(BaseModel): file_id: str collection_name: Optional[str] = None @@ -796,11 +735,10 @@ def process_file( ) data = loader.load(file.filename, file.meta.get("content_type"), file_path) - f = open(file_path, "rb") collection_name = form_data.collection_name if collection_name is None: - collection_name = calculate_sha256(f)[:63] - f.close() + with open(file_path, "rb") as f: + collection_name = calculate_sha256(f)[:63] try: result = store_data_in_vector_db( @@ -813,11 +751,9 @@ def process_file( ) if result: - return { "status": True, "collection_name": collection_name, - "known_type": known_type, "filename": file.meta.get("name", file.filename), } except Exception as e: @@ -839,15 +775,15 @@ def process_file( ) -class TextRAGForm(BaseModel): +class ProcessTextForm(BaseModel): name: str content: str collection_name: Optional[str] = None -@app.post("/text") -def store_text( - form_data: TextRAGForm, +@app.post("/process/text") +def process_text( + form_data: ProcessTextForm, user=Depends(get_verified_user), ): collection_name = form_data.collection_name @@ -878,9 +814,8 @@ def process_docs_dir(user=Depends(get_admin_user)): filename = path.name file_content_type = mimetypes.guess_type(path) - f = open(path, "rb") - collection_name = calculate_sha256(f)[:63] - f.close() + with open(path, "rb") as f: + collection_name = calculate_sha256(f)[:63] loader = Loader( engine=app.state.config.CONTENT_EXTRACTION_ENGINE, @@ -933,7 +868,7 @@ def process_docs_dir(user=Depends(get_admin_user)): @app.post("/process/youtube") -def process_youtube_video(form_data: UrlForm, user=Depends(get_verified_user)): +def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_user)): try: loader = YoutubeLoader.from_youtube_url( form_data.url, @@ -944,10 +879,11 @@ def process_youtube_video(form_data: UrlForm, user=Depends(get_verified_user)): data = loader.load() collection_name = form_data.collection_name - if collection_name == "": + if not collection_name: collection_name = calculate_sha256_string(form_data.url)[:63] store_data_in_vector_db(data, collection_name, overwrite=True) + return { "status": True, "collection_name": collection_name, @@ -962,8 +898,7 @@ def process_youtube_video(form_data: UrlForm, user=Depends(get_verified_user)): @app.post("/process/web") -def process_web(form_data: UrlForm, user=Depends(get_verified_user)): - # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" +def process_web(form_data: ProcessUrlForm, user=Depends(get_verified_user)): try: loader = get_web_loader( form_data.url, @@ -973,10 +908,11 @@ def process_web(form_data: UrlForm, user=Depends(get_verified_user)): data = loader.load() collection_name = form_data.collection_name - if collection_name == "": + if not collection_name: collection_name = calculate_sha256_string(form_data.url)[:63] store_data_in_vector_db(data, collection_name, overwrite=True) + return { "status": True, "collection_name": collection_name, From 00eb02245027220788d51675628978d8e75ce603 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 02:38:59 +0200 Subject: [PATCH 022/252] refac --- backend/open_webui/apps/retrieval/main.py | 119 ++++++++++------------ 1 file changed, 55 insertions(+), 64 deletions(-) diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index 497a5685d6..f2f4733c5b 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -628,40 +628,26 @@ async def update_query_settings( #################################### -def store_data_in_vector_db( - data, collection_name, metadata: Optional[dict] = None, overwrite: bool = False +def save_docs_to_vector_db( + docs, + collection_name, + metadata: Optional[dict] = None, + overwrite: bool = False, + split: bool = True, ) -> bool: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=app.state.config.CHUNK_SIZE, - chunk_overlap=app.state.config.CHUNK_OVERLAP, - add_start_index=True, - ) - docs = text_splitter.split_documents(data) + log.info(f"save_docs_to_vector_db {docs} {collection_name}") - if len(docs) > 0: - log.info(f"store_data_in_vector_db {docs}") - return store_docs_in_vector_db(docs, collection_name, metadata, overwrite) - else: + if split: + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=app.state.config.CHUNK_SIZE, + chunk_overlap=app.state.config.CHUNK_OVERLAP, + add_start_index=True, + ) + docs = text_splitter.split_documents(docs) + + if len(docs) == 0: raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT) - -def store_text_in_vector_db( - text, metadata, collection_name, overwrite: bool = False -) -> bool: - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=app.state.config.CHUNK_SIZE, - chunk_overlap=app.state.config.CHUNK_OVERLAP, - add_start_index=True, - ) - docs = text_splitter.create_documents([text], metadatas=[metadata]) - return store_docs_in_vector_db(docs, collection_name, overwrite=overwrite) - - -def store_docs_in_vector_db( - docs, collection_name, metadata: Optional[dict] = None, overwrite: bool = False -) -> bool: - log.info(f"store_docs_in_vector_db {docs} {collection_name}") - texts = [doc.page_content for doc in docs] metadatas = [{**doc.metadata, **(metadata if metadata else {})} for doc in docs] @@ -728,21 +714,24 @@ def process_file( file = Files.get_file_by_id(form_data.file_id) file_path = file.meta.get("path", f"{UPLOAD_DIR}/{file.filename}") - loader = Loader( - engine=app.state.config.CONTENT_EXTRACTION_ENGINE, - TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL, - PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES, - ) - data = loader.load(file.filename, file.meta.get("content_type"), file_path) - collection_name = form_data.collection_name if collection_name is None: with open(file_path, "rb") as f: collection_name = calculate_sha256(f)[:63] + loader = Loader( + engine=app.state.config.CONTENT_EXTRACTION_ENGINE, + TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL, + PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES, + ) + docs = loader.load(file.filename, file.meta.get("content_type"), file_path) + + raw_content = " ".join([doc.page_content for doc in docs]) + print(raw_content) + try: - result = store_data_in_vector_db( - data, + result = save_docs_to_vector_db( + docs, collection_name, { "file_id": form_data.file_id, @@ -790,11 +779,13 @@ def process_text( if collection_name is None: collection_name = calculate_sha256_string(form_data.content) - result = store_text_in_vector_db( - form_data.content, - metadata={"name": form_data.name, "created_by": user.id}, - collection_name=collection_name, - ) + docs = [ + Document( + page_content=form_data.content, + metadata={"name": form_data.name, "created_by": user.id}, + ) + ] + result = save_docs_to_vector_db(docs, collection_name) if result: return {"status": True, "collection_name": collection_name} @@ -822,10 +813,10 @@ def process_docs_dir(user=Depends(get_admin_user)): TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL, PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES, ) - data = loader.load(filename, file_content_type[0], str(path)) + docs = loader.load(filename, file_content_type[0], str(path)) try: - result = store_data_in_vector_db(data, collection_name) + result = save_docs_to_vector_db(docs, collection_name) if result: sanitized_filename = sanitize_filename(filename) @@ -870,19 +861,19 @@ def process_docs_dir(user=Depends(get_admin_user)): @app.post("/process/youtube") def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_user)): try: + collection_name = form_data.collection_name + if not collection_name: + collection_name = calculate_sha256_string(form_data.url)[:63] + loader = YoutubeLoader.from_youtube_url( form_data.url, add_video_info=True, language=app.state.config.YOUTUBE_LOADER_LANGUAGE, translation=app.state.YOUTUBE_LOADER_TRANSLATION, ) - data = loader.load() + docs = loader.load() - collection_name = form_data.collection_name - if not collection_name: - collection_name = calculate_sha256_string(form_data.url)[:63] - - store_data_in_vector_db(data, collection_name, overwrite=True) + save_docs_to_vector_db(docs, collection_name, overwrite=True) return { "status": True, @@ -900,18 +891,17 @@ def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_u @app.post("/process/web") def process_web(form_data: ProcessUrlForm, user=Depends(get_verified_user)): try: + collection_name = form_data.collection_name + if not collection_name: + collection_name = calculate_sha256_string(form_data.url)[:63] + loader = get_web_loader( form_data.url, verify_ssl=app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, requests_per_second=app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, ) - data = loader.load() - - collection_name = form_data.collection_name - if not collection_name: - collection_name = calculate_sha256_string(form_data.url)[:63] - - store_data_in_vector_db(data, collection_name, overwrite=True) + docs = loader.load() + save_docs_to_vector_db(docs, collection_name, overwrite=True) return { "status": True, @@ -1060,15 +1050,16 @@ def process_web_search(form_data: SearchForm, user=Depends(get_verified_user)): ) try: - urls = [result.link for result in web_results] - loader = get_web_loader(urls) - data = loader.load() - collection_name = form_data.collection_name if collection_name == "": collection_name = calculate_sha256_string(form_data.query)[:63] - store_data_in_vector_db(data, collection_name, overwrite=True) + urls = [result.link for result in web_results] + + loader = get_web_loader(urls) + docs = loader.load() + save_docs_to_vector_db(docs, collection_name, overwrite=True) + return { "status": True, "collection_name": collection_name, From b8b994a82040a79c37bd6e73bc11f1dd264e2c61 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 02:49:18 +0200 Subject: [PATCH 023/252] refac --- backend/open_webui/apps/retrieval/loader/main.py | 11 ++++++++++- backend/open_webui/apps/retrieval/main.py | 2 -- backend/requirements.txt | 2 ++ pyproject.toml | 2 ++ 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/apps/retrieval/loader/main.py b/backend/open_webui/apps/retrieval/loader/main.py index f4c948b430..b435fa21f3 100644 --- a/backend/open_webui/apps/retrieval/loader/main.py +++ b/backend/open_webui/apps/retrieval/loader/main.py @@ -1,5 +1,7 @@ import requests import logging +import ftfy + from langchain_community.document_loaders import ( BSHTMLLoader, @@ -122,7 +124,14 @@ class Loader: self, filename: str, file_content_type: str, file_path: str ) -> list[Document]: loader = self._get_loader(filename, file_content_type, file_path) - return loader.load() + docs = loader.load() + + return [ + Document( + page_content=ftfy.fix_text(doc.page_content), metadata=doc.metadata + ) + for doc in docs + ] def _get_loader(self, filename: str, file_content_type: str, file_path: str): file_ext = filename.split(".")[-1].lower() diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index f2f4733c5b..9c2ec141fe 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -725,7 +725,6 @@ def process_file( PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES, ) docs = loader.load(file.filename, file.meta.get("content_type"), file_path) - raw_content = " ".join([doc.page_content for doc in docs]) print(raw_content) @@ -872,7 +871,6 @@ def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_u translation=app.state.YOUTUBE_LOADER_TRANSLATION, ) docs = loader.load() - save_docs_to_vector_db(docs, collection_name, overwrite=True) return { diff --git a/backend/requirements.txt b/backend/requirements.txt index 764e41d3d9..a6933d20a4 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -46,6 +46,8 @@ sentence-transformers==3.0.1 colbert-ai==0.2.21 einops==0.8.0 + +ftfy==6.2.3 pypdf==4.3.1 docx2txt==0.8 python-pptx==1.0.0 diff --git a/pyproject.toml b/pyproject.toml index d02281d521..1df284f802 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,8 @@ dependencies = [ "colbert-ai==0.2.21", "einops==0.8.0", + + "ftfy==6.2.3", "pypdf==4.3.1", "docx2txt==0.8", "python-pptx==1.0.0", From 9d2ed3d2be6b94f2866c549688740b92f67f6568 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 02:56:56 +0200 Subject: [PATCH 024/252] refac --- backend/open_webui/apps/retrieval/loader/main.py | 2 -- backend/open_webui/apps/retrieval/main.py | 12 ++++++++++-- backend/open_webui/apps/webui/models/files.py | 11 +++++++++++ backend/open_webui/apps/webui/routers/files.py | 13 +++++++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/backend/open_webui/apps/retrieval/loader/main.py b/backend/open_webui/apps/retrieval/loader/main.py index b435fa21f3..f0e8f804ee 100644 --- a/backend/open_webui/apps/retrieval/loader/main.py +++ b/backend/open_webui/apps/retrieval/loader/main.py @@ -2,7 +2,6 @@ import requests import logging import ftfy - from langchain_community.document_loaders import ( BSHTMLLoader, CSVLoader, @@ -24,7 +23,6 @@ from open_webui.env import SRC_LOG_LEVELS log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) - known_source_ext = [ "go", "py", diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index 9c2ec141fe..a3e8289784 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -725,8 +725,16 @@ def process_file( PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES, ) docs = loader.load(file.filename, file.meta.get("content_type"), file_path) - raw_content = " ".join([doc.page_content for doc in docs]) - print(raw_content) + raw_text_content = " ".join([doc.page_content for doc in docs]) + + Files.update_files_metadata_by_id( + form_data.file_id, + { + "content": { + "text": raw_text_content, + } + }, + ) try: result = save_docs_to_vector_db( diff --git a/backend/open_webui/apps/webui/models/files.py b/backend/open_webui/apps/webui/models/files.py index 7fba74479d..cf572ac788 100644 --- a/backend/open_webui/apps/webui/models/files.py +++ b/backend/open_webui/apps/webui/models/files.py @@ -97,6 +97,17 @@ class FilesTable: for file in db.query(File).filter_by(user_id=user_id).all() ] + def update_files_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]: + with get_db() as db: + try: + file = db.query(File).filter_by(id=id).first() + file.meta = {**file.meta, **meta} + db.commit() + + return FileModel.model_validate(file) + except Exception: + return None + def delete_file_by_id(self, id: str) -> bool: with get_db() as db: try: diff --git a/backend/open_webui/apps/webui/routers/files.py b/backend/open_webui/apps/webui/routers/files.py index 1a326bcd8c..f46a7992d2 100644 --- a/backend/open_webui/apps/webui/routers/files.py +++ b/backend/open_webui/apps/webui/routers/files.py @@ -171,6 +171,19 @@ async def get_file_content_by_id(id: str, user=Depends(get_verified_user)): ) +@router.get("/{id}/content/text") +async def get_file_text_content_by_id(id: str, user=Depends(get_verified_user)): + file = Files.get_file_by_id(id) + + if file and (file.user_id == user.id or user.role == "admin"): + return {"text": file.meta.get("content", {}).get("text", None)} + else: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + @router.get("/{id}/content/{file_name}", response_model=Optional[FileModel]) async def get_file_content_by_id(id: str, user=Depends(get_verified_user)): file = Files.get_file_by_id(id) From 90ec458c4c3cd667877144327181737e91fc0a04 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 10:53:25 +0200 Subject: [PATCH 025/252] enh: show extracted file content --- backend/open_webui/apps/retrieval/main.py | 7 ++- src/lib/components/chat/MessageInput.svelte | 6 ++ src/lib/components/common/FileItem.svelte | 27 ++++---- .../components/common/FileItemModal.svelte | 62 +++++++++++++++++++ src/lib/components/icons/Info.svelte | 19 ++++++ src/lib/utils/index.ts | 18 ++++++ 6 files changed, 123 insertions(+), 16 deletions(-) create mode 100644 src/lib/components/common/FileItemModal.svelte create mode 100644 src/lib/components/icons/Info.svelte diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index a3e8289784..a0964e5a07 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -725,13 +725,15 @@ def process_file( PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES, ) docs = loader.load(file.filename, file.meta.get("content_type"), file_path) - raw_text_content = " ".join([doc.page_content for doc in docs]) + text_content = " ".join([doc.page_content for doc in docs]) + + log.debug(f"text_content: {text_content}") Files.update_files_metadata_by_id( form_data.file_id, { "content": { - "text": raw_text_content, + "text": text_content, } }, ) @@ -751,6 +753,7 @@ def process_file( "status": True, "collection_name": collection_name, "filename": file.meta.get("name", file.filename), + "content": text_content, } except Exception as e: raise HTTPException( diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index 2ca0c8d207..b93468470a 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -163,6 +163,8 @@ if (res) { fileItem.status = 'processed'; fileItem.collection_name = res.collection_name; + fileItem.content = res.content; + files = files; } } catch (e) { @@ -464,6 +466,7 @@
{:else} { + console.log(file); + }} /> {/if} {/each} diff --git a/src/lib/components/common/FileItem.svelte b/src/lib/components/common/FileItem.svelte index 7e8592ab98..46eac31b85 100644 --- a/src/lib/components/common/FileItem.svelte +++ b/src/lib/components/common/FileItem.svelte @@ -1,5 +1,8 @@ +{#if file} + +{/if} +
diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte new file mode 100644 index 0000000000..de70c6826c --- /dev/null +++ b/src/lib/components/common/FileItemModal.svelte @@ -0,0 +1,62 @@ + + + +
+
+
+
+ {file?.name ?? 'File'} +
+ +
+
+ {#if file.size} +
{formatFileSize(file.size)}
+ • + {/if} + + {#if file.content} +
{getLineCount(file.content)} extracted lines
+ +
+ + + Formatting may be inconsistent from source. +
+ {/if} +
+
+
+ +
+ +
+
+ +
+ {file?.content ?? 'No content'} +
+
+
diff --git a/src/lib/components/icons/Info.svelte b/src/lib/components/icons/Info.svelte new file mode 100644 index 0000000000..2849ac532b --- /dev/null +++ b/src/lib/components/icons/Info.svelte @@ -0,0 +1,19 @@ + + + + + diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index 8478e885bd..fef9aaffee 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -873,3 +873,21 @@ export const createMessagesList = (history, messageId) => { return [message]; } }; + +export const formatFileSize = (size) => { + if (size == null) return 'Unknown size'; + if (typeof size !== 'number' || size < 0) return 'Invalid size'; + if (size === 0) return '0 B'; + const units = ['B', 'KB', 'MB', 'GB', 'TB']; + let unitIndex = 0; + + while (size >= 1024 && unitIndex < units.length - 1) { + size /= 1024; + unitIndex++; + } + return `${size.toFixed(1)} ${units[unitIndex]}`; +}; + +export const getLineCount = (text) => { + return text.split('\n').length; +}; From d2e2e535dd041a9cf22328bc80e42033a270c9a7 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 10:55:52 +0200 Subject: [PATCH 026/252] refac --- src/lib/components/chat/Controls/Controls.svelte | 1 + src/lib/components/chat/Messages/UserMessage.svelte | 1 + 2 files changed, 2 insertions(+) diff --git a/src/lib/components/chat/Controls/Controls.svelte b/src/lib/components/chat/Controls/Controls.svelte index 50d5a56489..d1246266fa 100644 --- a/src/lib/components/chat/Controls/Controls.svelte +++ b/src/lib/components/chat/Controls/Controls.svelte @@ -35,6 +35,7 @@ {#each chatFiles as file, fileIdx} {:else} Date: Sat, 28 Sep 2024 19:16:52 +0200 Subject: [PATCH 027/252] refac --- backend/open_webui/apps/retrieval/main.py | 2 +- backend/open_webui/apps/retrieval/{model => models}/colbert.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename backend/open_webui/apps/retrieval/{model => models}/colbert.py (100%) diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index a0964e5a07..87f60b954f 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -16,7 +16,7 @@ from pydantic import BaseModel from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT # Information retrieval models -from open_webui.apps.retrieval.model.colbert import ColBERT +from open_webui.apps.retrieval.models.colbert import ColBERT # Document loaders from open_webui.apps.retrieval.loader.main import Loader diff --git a/backend/open_webui/apps/retrieval/model/colbert.py b/backend/open_webui/apps/retrieval/models/colbert.py similarity index 100% rename from backend/open_webui/apps/retrieval/model/colbert.py rename to backend/open_webui/apps/retrieval/models/colbert.py From 276ce3374d840b9c0f06f857216e1a1e19f8f030 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 19:17:21 +0200 Subject: [PATCH 028/252] refac --- backend/open_webui/apps/retrieval/{loader => loaders}/main.py | 0 backend/open_webui/apps/retrieval/main.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename backend/open_webui/apps/retrieval/{loader => loaders}/main.py (100%) diff --git a/backend/open_webui/apps/retrieval/loader/main.py b/backend/open_webui/apps/retrieval/loaders/main.py similarity index 100% rename from backend/open_webui/apps/retrieval/loader/main.py rename to backend/open_webui/apps/retrieval/loaders/main.py diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index 87f60b954f..87242db02b 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -19,7 +19,7 @@ from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT from open_webui.apps.retrieval.models.colbert import ColBERT # Document loaders -from open_webui.apps.retrieval.loader.main import Loader +from open_webui.apps.retrieval.loaders.main import Loader # Web search engines from open_webui.apps.retrieval.web.main import SearchResult From 5a168ecc2ad844f1be3ecd5e8d80b130e2ef1a5f Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 19:25:41 +0200 Subject: [PATCH 029/252] refac --- src/lib/components/chat/Overview.svelte | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lib/components/chat/Overview.svelte b/src/lib/components/chat/Overview.svelte index 0c2a376e8c..0dba53ea03 100644 --- a/src/lib/components/chat/Overview.svelte +++ b/src/lib/components/chat/Overview.svelte @@ -38,13 +38,16 @@ $: if (history && history.currentId) { focusNode(); - selectedMessageId = null; } const focusNode = async () => { if (selectedMessageId === null) { await fitView({ nodes: [{ id: history.currentId }] }); + } else { + await fitView({ nodes: [{ id: selectedMessageId }] }); } + + selectedMessageId = null; }; const drawFlow = async () => { From c93a10388bee97da0bd491d6391c6b8e1412159e Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 28 Sep 2024 19:51:28 +0200 Subject: [PATCH 030/252] refac --- backend/open_webui/apps/ollama/main.py | 1 + backend/open_webui/main.py | 66 ++++++++++++++------------ 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/backend/open_webui/apps/ollama/main.py b/backend/open_webui/apps/ollama/main.py index 1337fbb31d..2e02d069fc 100644 --- a/backend/open_webui/apps/ollama/main.py +++ b/backend/open_webui/apps/ollama/main.py @@ -787,6 +787,7 @@ async def generate_chat_completion( ): payload = {**form_data.model_dump(exclude_none=True)} log.debug(f"{payload = }") + if "metadata" in payload: del payload["metadata"] diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 40fac171f5..5c964c597a 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -448,38 +448,44 @@ async def chat_completion_tools_handler( if not content: return body, {} - content = content[content.find("{") : content.rfind("}") + 1] - result = json.loads(content) - - tool_function_name = result.get("name", None) - if tool_function_name not in tools: - return body, {} - - tool_function_params = result.get("parameters", {}) - try: - tool_output = await tools[tool_function_name]["callable"]( - **tool_function_params - ) + content = content[content.find("{") : content.rfind("}") + 1] + if not content: + raise Exception("No JSON object found in the response") + + result = json.loads(content) + + tool_function_name = result.get("name", None) + if tool_function_name not in tools: + return body, {} + + tool_function_params = result.get("parameters", {}) + + try: + tool_output = await tools[tool_function_name]["callable"]( + **tool_function_params + ) + except Exception as e: + tool_output = str(e) + + if tools[tool_function_name]["citation"]: + citations.append( + { + "source": { + "name": f"TOOL:{tools[tool_function_name]['toolkit_id']}/{tool_function_name}" + }, + "document": [tool_output], + "metadata": [{"source": tool_function_name}], + } + ) + if tools[tool_function_name]["file_handler"]: + skip_files = True + + if isinstance(tool_output, str): + contexts.append(tool_output) except Exception as e: - tool_output = str(e) - - if tools[tool_function_name]["citation"]: - citations.append( - { - "source": { - "name": f"TOOL:{tools[tool_function_name]['toolkit_id']}/{tool_function_name}" - }, - "document": [tool_output], - "metadata": [{"source": tool_function_name}], - } - ) - if tools[tool_function_name]["file_handler"]: - skip_files = True - - if isinstance(tool_output, str): - contexts.append(tool_output) - + log.exception(f"Error: {e}") + content = None except Exception as e: log.exception(f"Error: {e}") content = None From 550075bba4313e9398b555ebc98a4018f7e89901 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 29 Sep 2024 18:24:44 +0200 Subject: [PATCH 031/252] fix: action button not working --- src/lib/components/chat/Messages.svelte | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib/components/chat/Messages.svelte b/src/lib/components/chat/Messages.svelte index 83ea3714f1..ef2810686d 100644 --- a/src/lib/components/chat/Messages.svelte +++ b/src/lib/components/chat/Messages.svelte @@ -383,7 +383,6 @@ {mergeResponses} {readOnly} on:action={async (e) => { - const message = history.messages[message.id]; if (typeof e.detail === 'string') { await chatActionHandler(chatId, e.detail, message.model, message.id); } else { From b3517c63e8b7138e96acf51a66fa32fe6107cca4 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 29 Sep 2024 18:29:50 +0200 Subject: [PATCH 032/252] fix: multi model chat infinite loop issue --- .../chat/Messages/MultiResponseMessages.svelte | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/lib/components/chat/Messages/MultiResponseMessages.svelte b/src/lib/components/chat/Messages/MultiResponseMessages.svelte index 7e1e7d30f5..291b2f1ae8 100644 --- a/src/lib/components/chat/Messages/MultiResponseMessages.svelte +++ b/src/lib/components/chat/Messages/MultiResponseMessages.svelte @@ -186,12 +186,13 @@ }`} transition-all p-5 rounded-2xl" on:click={() => { if (messageId != _messageId) { - let messageChildrenIds = history.messages[_messageId].childrenIds; + let currentMessageId = _messageId; + let messageChildrenIds = history.messages[currentMessageId].childrenIds; while (messageChildrenIds.length !== 0) { - messageId = messageChildrenIds.at(-1); - messageChildrenIds = history.messages[_messageId].childrenIds; + currentMessageId = messageChildrenIds.at(-1); + messageChildrenIds = history.messages[currentMessageId].childrenIds; } - history.currentId = _messageId; + history.currentId = currentMessageId; dispatch('change'); } }} From d784d5c367de970d2ae55d075afd15b65548ad39 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 29 Sep 2024 18:33:16 +0200 Subject: [PATCH 033/252] refac --- src/lib/components/common/FileItem.svelte | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/lib/components/common/FileItem.svelte b/src/lib/components/common/FileItem.svelte index 46eac31b85..67cbdf5096 100644 --- a/src/lib/components/common/FileItem.svelte +++ b/src/lib/components/common/FileItem.svelte @@ -33,7 +33,16 @@ class="h-14 {className} flex items-center space-x-3 {colorClassName} rounded-xl border border-gray-100 dark:border-gray-800 text-left" type="button" on:click={async () => { - showModal = !showModal; + if (file.content) { + showModal = !showModal; + } else { + if (url) { + if (type === 'file') { + window.open(`${url}/content`, '_blank').focus(); + } + } + } + dispatch('click'); }} > From f2ec020b644780382676725ccd020cbc496d1f16 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 29 Sep 2024 18:41:27 +0200 Subject: [PATCH 034/252] refac: styling --- src/lib/components/common/FileItemModal.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte index de70c6826c..f590d07556 100644 --- a/src/lib/components/common/FileItemModal.svelte +++ b/src/lib/components/common/FileItemModal.svelte @@ -17,7 +17,7 @@ -
+
From 92dd173b27fdbdd3683f9919e2a4f020120aa80f Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 29 Sep 2024 18:55:26 +0200 Subject: [PATCH 035/252] refac --- backend/open_webui/apps/retrieval/main.py | 141 ++++++++++-------- .../chat/MessageInput/Commands.svelte | 19 ++- src/lib/components/common/FileItem.svelte | 2 + .../components/common/FileItemModal.svelte | 10 +- 4 files changed, 98 insertions(+), 74 deletions(-) diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index 87242db02b..118a3e2035 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -726,7 +726,6 @@ def process_file( ) docs = loader.load(file.filename, file.meta.get("content_type"), file_path) text_content = " ".join([doc.page_content for doc in docs]) - log.debug(f"text_content: {text_content}") Files.update_files_metadata_by_id( @@ -795,10 +794,17 @@ def process_text( metadata={"name": form_data.name, "created_by": user.id}, ) ] + text_content = form_data.content + log.debug(f"text_content: {text_content}") + result = save_docs_to_vector_db(docs, collection_name) if result: - return {"status": True, "collection_name": collection_name} + return { + "status": True, + "collection_name": collection_name, + "content": text_content, + } else: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -806,68 +812,6 @@ def process_text( ) -@app.get("/process/dir") -def process_docs_dir(user=Depends(get_admin_user)): - for path in Path(DOCS_DIR).rglob("./**/*"): - try: - if path.is_file() and not path.name.startswith("."): - tags = extract_folders_after_data_docs(path) - filename = path.name - file_content_type = mimetypes.guess_type(path) - - with open(path, "rb") as f: - collection_name = calculate_sha256(f)[:63] - - loader = Loader( - engine=app.state.config.CONTENT_EXTRACTION_ENGINE, - TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL, - PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES, - ) - docs = loader.load(filename, file_content_type[0], str(path)) - - try: - result = save_docs_to_vector_db(docs, collection_name) - - if result: - sanitized_filename = sanitize_filename(filename) - doc = Documents.get_doc_by_name(sanitized_filename) - - if doc is None: - doc = Documents.insert_new_doc( - user.id, - DocumentForm( - **{ - "name": sanitized_filename, - "title": filename, - "collection_name": collection_name, - "filename": filename, - "content": ( - json.dumps( - { - "tags": list( - map( - lambda name: {"name": name}, - tags, - ) - ) - } - ) - if len(tags) - else "{}" - ), - } - ), - ) - except Exception as e: - log.exception(e) - pass - - except Exception as e: - log.exception(e) - - return True - - @app.post("/process/youtube") def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_user)): try: @@ -882,12 +826,15 @@ def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_u translation=app.state.YOUTUBE_LOADER_TRANSLATION, ) docs = loader.load() + text_content = " ".join([doc.page_content for doc in docs]) + log.debug(f"text_content: {text_content}") save_docs_to_vector_db(docs, collection_name, overwrite=True) return { "status": True, "collection_name": collection_name, "filename": form_data.url, + "content": text_content, } except Exception as e: log.exception(e) @@ -910,12 +857,15 @@ def process_web(form_data: ProcessUrlForm, user=Depends(get_verified_user)): requests_per_second=app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, ) docs = loader.load() + text_content = " ".join([doc.page_content for doc in docs]) + log.debug(f"text_content: {text_content}") save_docs_to_vector_db(docs, collection_name, overwrite=True) return { "status": True, "collection_name": collection_name, "filename": form_data.url, + "content": text_content, } except Exception as e: log.exception(e) @@ -1067,6 +1017,7 @@ def process_web_search(form_data: SearchForm, user=Depends(get_verified_user)): loader = get_web_loader(urls) docs = loader.load() + save_docs_to_vector_db(docs, collection_name, overwrite=True) return { @@ -1082,6 +1033,68 @@ def process_web_search(form_data: SearchForm, user=Depends(get_verified_user)): ) +@app.get("/process/dir") +def process_docs_dir(user=Depends(get_admin_user)): + for path in Path(DOCS_DIR).rglob("./**/*"): + try: + if path.is_file() and not path.name.startswith("."): + tags = extract_folders_after_data_docs(path) + filename = path.name + file_content_type = mimetypes.guess_type(path) + + with open(path, "rb") as f: + collection_name = calculate_sha256(f)[:63] + + loader = Loader( + engine=app.state.config.CONTENT_EXTRACTION_ENGINE, + TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL, + PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES, + ) + docs = loader.load(filename, file_content_type[0], str(path)) + + try: + result = save_docs_to_vector_db(docs, collection_name) + + if result: + sanitized_filename = sanitize_filename(filename) + doc = Documents.get_doc_by_name(sanitized_filename) + + if doc is None: + doc = Documents.insert_new_doc( + user.id, + DocumentForm( + **{ + "name": sanitized_filename, + "title": filename, + "collection_name": collection_name, + "filename": filename, + "content": ( + json.dumps( + { + "tags": list( + map( + lambda name: {"name": name}, + tags, + ) + ) + } + ) + if len(tags) + else "{}" + ), + } + ), + ) + except Exception as e: + log.exception(e) + pass + + except Exception as e: + log.exception(e) + + return True + + class QueryDocForm(BaseModel): collection_name: str query: str diff --git a/src/lib/components/chat/MessageInput/Commands.svelte b/src/lib/components/chat/MessageInput/Commands.svelte index d1f85d4587..91f78866d0 100644 --- a/src/lib/components/chat/MessageInput/Commands.svelte +++ b/src/lib/components/chat/MessageInput/Commands.svelte @@ -30,7 +30,7 @@ const uploadWeb = async (url) => { console.log(url); - const doc = { + const fileItem = { type: 'doc', name: url, collection_name: '', @@ -40,12 +40,14 @@ }; try { - files = [...files, doc]; + files = [...files, fileItem]; const res = await processWeb(localStorage.token, '', url); if (res) { - doc.status = 'processed'; - doc.collection_name = res.collection_name; + fileItem.status = 'processed'; + fileItem.collection_name = res.collection_name; + fileItem.content = res.content; + files = files; } } catch (e) { @@ -58,7 +60,7 @@ const uploadYoutubeTranscription = async (url) => { console.log(url); - const doc = { + const fileItem = { type: 'doc', name: url, collection_name: '', @@ -68,12 +70,13 @@ }; try { - files = [...files, doc]; + files = [...files, fileItem]; const res = await processYoutubeVideo(localStorage.token, url); if (res) { - doc.status = 'processed'; - doc.collection_name = res.collection_name; + fileItem.status = 'processed'; + fileItem.collection_name = res.collection_name; + fileItem.content = res.content; files = files; } } catch (e) { diff --git a/src/lib/components/common/FileItem.svelte b/src/lib/components/common/FileItem.svelte index 67cbdf5096..b86a2d37ab 100644 --- a/src/lib/components/common/FileItem.svelte +++ b/src/lib/components/common/FileItem.svelte @@ -39,6 +39,8 @@ if (url) { if (type === 'file') { window.open(`${url}/content`, '_blank').focus(); + } else { + window.open(`${url}`, '_blank').focus(); } } } diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte index f590d07556..c124a45c73 100644 --- a/src/lib/components/common/FileItemModal.svelte +++ b/src/lib/components/common/FileItemModal.svelte @@ -20,8 +20,14 @@
-
- {file?.name ?? 'File'} +
From 1d8b3b8c51fe72be3a4d656d5325a59dcab4d406 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 29 Sep 2024 22:11:50 +0200 Subject: [PATCH 036/252] refac --- src/lib/components/chat/Chat.svelte | 13 ++++++++++++- src/lib/components/chat/MessageInput.svelte | 17 ++--------------- src/lib/components/common/FileItem.svelte | 2 +- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte index f60f0ede33..066272e434 100644 --- a/src/lib/components/chat/Chat.svelte +++ b/src/lib/components/chat/Chat.svelte @@ -700,7 +700,7 @@ childrenIds: [], role: 'user', content: userPrompt, - files: _files.length > 0 ? _files : undefined, + files: chatFiles.length > 0 ? chatFiles : undefined, timestamp: Math.floor(Date.now() / 1000), // Unix epoch models: selectedModels }; @@ -947,6 +947,12 @@ ...(responseMessage?.files ?? []).filter((item) => ['web_search_results'].includes(item.type)) ); + // Remove duplicates + files = files.filter( + (item, index, array) => + array.findIndex((i) => JSON.stringify(i) === JSON.stringify(item)) === index + ); + scrollToBottom(); eventTarget.dispatchEvent( @@ -1246,6 +1252,11 @@ ), ...(responseMessage?.files ?? []).filter((item) => ['web_search_results'].includes(item.type)) ); + // Remove duplicates + files = files.filter( + (item, index, array) => + array.findIndex((i) => JSON.stringify(i) === JSON.stringify(item)) === index + ); scrollToBottom(); diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index b93468470a..1694ad44b3 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -133,21 +133,8 @@ fileItem.id = uploadedFile.id; fileItem.url = `${WEBUI_API_BASE_URL}/files/${uploadedFile.id}`; - // TODO: Check if tools & functions have files support to skip this step to delegate file processing - // Default Upload to VectorDB - if ( - SUPPORTED_FILE_TYPE.includes(file['type']) || - SUPPORTED_FILE_EXTENSIONS.includes(file.name.split('.').at(-1)) - ) { - processFileItem(fileItem); - } else { - toast.error( - $i18n.t(`Unknown file type '{{file_type}}'. Proceeding with the file upload anyway.`, { - file_type: file['type'] - }) - ); - processFileItem(fileItem); - } + // Try to extract content of the file for retrieval, even non-supported file types + processFileItem(fileItem); } else { files = files.filter((item) => item.status !== null); } diff --git a/src/lib/components/common/FileItem.svelte b/src/lib/components/common/FileItem.svelte index b86a2d37ab..2dea203d54 100644 --- a/src/lib/components/common/FileItem.svelte +++ b/src/lib/components/common/FileItem.svelte @@ -25,7 +25,7 @@ {#if file} - + {/if}
From 6d764ee55ef6c685fd64b089200de76a49844970 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 29 Sep 2024 22:52:27 +0200 Subject: [PATCH 037/252] feat: retrieval whole document mode --- backend/open_webui/apps/retrieval/utils.py | 87 ++++++++++--------- .../components/chat/Controls/Controls.svelte | 1 + src/lib/components/chat/MessageInput.svelte | 1 + src/lib/components/common/FileItem.svelte | 4 +- .../components/common/FileItemModal.svelte | 86 +++++++++++++----- 5 files changed, 112 insertions(+), 67 deletions(-) diff --git a/backend/open_webui/apps/retrieval/utils.py b/backend/open_webui/apps/retrieval/utils.py index 1fa30e6a0e..6b12f76a1c 100644 --- a/backend/open_webui/apps/retrieval/utils.py +++ b/backend/open_webui/apps/retrieval/utils.py @@ -317,58 +317,63 @@ def get_rag_context( relevant_contexts = [] for file in files: - context = None - - collection_names = ( - file["collection_names"] - if file["type"] == "collection" - else [file["collection_name"]] if file["collection_name"] else [] - ) - - collection_names = set(collection_names).difference(extracted_collections) - if not collection_names: - log.debug(f"skipping {file} as it has already been extracted") - continue - - try: + if file.get("context") == "full": + context = { + "documents": [[file["content"]]], + "metadatas": [[{"file_id": file["id"], "name": file["name"]}]], + } + else: context = None - if file["type"] == "text": - context = file["content"] - else: - if hybrid_search: - try: - context = query_collection_with_hybrid_search( + + collection_names = ( + file["collection_names"] + if file["type"] == "collection" + else [file["collection_name"]] if file["collection_name"] else [] + ) + + collection_names = set(collection_names).difference(extracted_collections) + if not collection_names: + log.debug(f"skipping {file} as it has already been extracted") + continue + + try: + context = None + if file["type"] == "text": + context = file["content"] + else: + if hybrid_search: + try: + context = query_collection_with_hybrid_search( + collection_names=collection_names, + query=query, + embedding_function=embedding_function, + k=k, + reranking_function=reranking_function, + r=r, + ) + except Exception as e: + log.debug( + "Error when using hybrid search, using" + " non hybrid search as fallback." + ) + + if (not hybrid_search) or (context is None): + context = query_collection( collection_names=collection_names, query=query, embedding_function=embedding_function, k=k, - reranking_function=reranking_function, - r=r, - ) - except Exception as e: - log.debug( - "Error when using hybrid search, using" - " non hybrid search as fallback." ) + except Exception as e: + log.exception(e) - if (not hybrid_search) or (context is None): - context = query_collection( - collection_names=collection_names, - query=query, - embedding_function=embedding_function, - k=k, - ) - except Exception as e: - log.exception(e) + extracted_collections.extend(collection_names) if context: - relevant_contexts.append({**context, "source": file}) - - extracted_collections.extend(collection_names) + relevant_contexts.append({**context, "file": file}) contexts = [] citations = [] - for context in relevant_contexts: try: if "documents" in context: @@ -381,7 +386,7 @@ def get_rag_context( if "metadatas" in context: citations.append( { - "source": context["source"], + "source": context["file"], "document": context["documents"][0], "metadata": context["metadatas"][0], } diff --git a/src/lib/components/chat/Controls/Controls.svelte b/src/lib/components/chat/Controls/Controls.svelte index d1246266fa..f5807b9b86 100644 --- a/src/lib/components/chat/Controls/Controls.svelte +++ b/src/lib/components/chat/Controls/Controls.svelte @@ -36,6 +36,7 @@ { files.splice(fileIdx, 1); files = files; diff --git a/src/lib/components/common/FileItem.svelte b/src/lib/components/common/FileItem.svelte index 2dea203d54..51fb44f2ba 100644 --- a/src/lib/components/common/FileItem.svelte +++ b/src/lib/components/common/FileItem.svelte @@ -15,7 +15,7 @@ export let status = 'processed'; export let file = null; - export let enableModal = true; + export let edit = false; export let name: string; export let type: string; @@ -25,7 +25,7 @@ {#if file} - + {/if}
diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte index c124a45c73..70755885a3 100644 --- a/src/lib/components/common/FileItemModal.svelte +++ b/src/lib/components/common/FileItemModal.svelte @@ -7,57 +7,95 @@ import Modal from './Modal.svelte'; import XMark from '../icons/XMark.svelte'; import Info from '../icons/Info.svelte'; + import Switch from './Switch.svelte'; + import Tooltip from './Tooltip.svelte'; export let file; export let show = false; + export let edit = false; + + let enableFullContent = false; + onMount(() => { console.log(file); + + if (file?.context === 'full') { + enableFullContent = true; + } });
-
-
-
- - {file?.name ?? 'File'} - +
+
+
-
+ +
+
+ +
+
+
{#if file.size} -
{formatFileSize(file.size)}
+
{formatFileSize(file.size)}
• {/if} {#if file.content} -
{getLineCount(file.content)} extracted lines
+
{getLineCount(file.content)} extracted lines
-
+
Formatting may be inconsistent from source.
{/if}
-
-
-
- + {#if edit} +
+ +
+ {#if enableFullContent} + Use Entire Document + {:else} + Use Focused Retrieval + {/if} + { + file.context = e.detail ? 'full' : undefined; + }} + /> +
+
+
+ {/if} +
From 677c36c3aa9df947145fcdb08c8c5af0bcc5e44c Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 29 Sep 2024 22:55:53 +0200 Subject: [PATCH 038/252] refac --- backend/open_webui/apps/retrieval/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/open_webui/apps/retrieval/utils.py b/backend/open_webui/apps/retrieval/utils.py index 6b12f76a1c..cfae8a80b9 100644 --- a/backend/open_webui/apps/retrieval/utils.py +++ b/backend/open_webui/apps/retrieval/utils.py @@ -320,7 +320,7 @@ def get_rag_context( if file.get("context") == "full": context = { "documents": [[file["content"]]], - "metadatas": [[{"file_id": file["id"], "name": file["name"]}]], + "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]], } else: context = None From 6afc686e17d3943df3c693afac1372ffa0488638 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 29 Sep 2024 23:08:55 +0200 Subject: [PATCH 039/252] refac --- backend/open_webui/apps/retrieval/utils.py | 2 +- src/lib/components/chat/MessageInput.svelte | 5 ++++- src/lib/components/chat/MessageInput/Commands.svelte | 12 +++++++++--- src/lib/components/common/FileItem.svelte | 2 +- src/lib/components/common/FileItemModal.svelte | 8 +++++--- src/lib/utils/index.ts | 3 ++- 6 files changed, 22 insertions(+), 10 deletions(-) diff --git a/backend/open_webui/apps/retrieval/utils.py b/backend/open_webui/apps/retrieval/utils.py index cfae8a80b9..12c30edbb7 100644 --- a/backend/open_webui/apps/retrieval/utils.py +++ b/backend/open_webui/apps/retrieval/utils.py @@ -319,7 +319,7 @@ def get_rag_context( for file in files: if file.get("context") == "full": context = { - "documents": [[file["content"]]], + "documents": [[file.get("file").get("content")]], "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]], } else: diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index e3ae32e399..121a232055 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -150,7 +150,10 @@ if (res) { fileItem.status = 'processed'; fileItem.collection_name = res.collection_name; - fileItem.content = res.content; + fileItem.file = { + ...fileItem.file, + content: res.content + }; files = files; } diff --git a/src/lib/components/chat/MessageInput/Commands.svelte b/src/lib/components/chat/MessageInput/Commands.svelte index 91f78866d0..8be6d5f87a 100644 --- a/src/lib/components/chat/MessageInput/Commands.svelte +++ b/src/lib/components/chat/MessageInput/Commands.svelte @@ -46,14 +46,17 @@ if (res) { fileItem.status = 'processed'; fileItem.collection_name = res.collection_name; - fileItem.content = res.content; + fileItem.file = { + content: res.content, + ...fileItem.file + }; files = files; } } catch (e) { // Remove the failed doc from the files array files = files.filter((f) => f.name !== url); - toast.error(e); + toast.error(JSON.stringify(e)); } }; @@ -76,7 +79,10 @@ if (res) { fileItem.status = 'processed'; fileItem.collection_name = res.collection_name; - fileItem.content = res.content; + fileItem.file = { + content: res.content, + ...fileItem.file + }; files = files; } } catch (e) { diff --git a/src/lib/components/common/FileItem.svelte b/src/lib/components/common/FileItem.svelte index 51fb44f2ba..316587b182 100644 --- a/src/lib/components/common/FileItem.svelte +++ b/src/lib/components/common/FileItem.svelte @@ -33,7 +33,7 @@ class="h-14 {className} flex items-center space-x-3 {colorClassName} rounded-xl border border-gray-100 dark:border-gray-800 text-left" type="button" on:click={async () => { - if (file.content) { + if (file?.file?.content) { showModal = !showModal; } else { if (url) { diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte index 70755885a3..6cb6402ffd 100644 --- a/src/lib/components/common/FileItemModal.svelte +++ b/src/lib/components/common/FileItemModal.svelte @@ -61,8 +61,10 @@ • {/if} - {#if file.content} -
{getLineCount(file.content)} extracted lines
+ {#if file?.file?.content} +
+ {getLineCount(file?.file?.content ?? '')} extracted lines +
@@ -100,7 +102,7 @@
- {file?.content ?? 'No content'} + {file?.file?.content ?? 'No content'}
diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index fef9aaffee..f6df53d387 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -889,5 +889,6 @@ export const formatFileSize = (size) => { }; export const getLineCount = (text) => { - return text.split('\n').length; + console.log(typeof text); + return text ? text.split('\n').length : 0; }; From f7aba20d79c637534bc74a4d05a1c8aadd1de946 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 29 Sep 2024 23:11:22 +0200 Subject: [PATCH 040/252] refac --- src/lib/components/common/FileItemModal.svelte | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte index 6cb6402ffd..e98f554be1 100644 --- a/src/lib/components/common/FileItemModal.svelte +++ b/src/lib/components/common/FileItemModal.svelte @@ -78,14 +78,14 @@
{#if enableFullContent} - Use Entire Document + Using Entire Document {:else} - Use Focused Retrieval + Using Focused Retrieval {/if} Date: Sun, 29 Sep 2024 23:20:37 +0200 Subject: [PATCH 041/252] refac --- backend/open_webui/apps/retrieval/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index 118a3e2035..2a79ac90f0 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -193,7 +193,8 @@ def update_reranking_model( if any(model in reranking_model for model in ["jinaai/jina-colbert-v2"]): try: app.state.sentence_transformer_rf = ColBERT( - get_model_path(reranking_model, auto_update) + get_model_path(reranking_model, auto_update), + env="docker" if DOCKER else None, ) except Exception as e: log.error(f"ColBERT: {e}") From 7152af949b782537da41fce914d43e5a091bb261 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Mon, 30 Sep 2024 00:30:12 +0200 Subject: [PATCH 042/252] feat: compress audio Co-Authored-By: Beck Bekmyradov <47065940+bekmuradov@users.noreply.github.com> --- backend/open_webui/apps/audio/main.py | 232 +++++++++++------- src/lib/components/chat/Chat.svelte | 2 +- .../components/common/FileItemModal.svelte | 2 +- 3 files changed, 139 insertions(+), 97 deletions(-) diff --git a/backend/open_webui/apps/audio/main.py b/backend/open_webui/apps/audio/main.py index a1e6e94fa0..1cad5f4c7d 100644 --- a/backend/open_webui/apps/audio/main.py +++ b/backend/open_webui/apps/audio/main.py @@ -5,6 +5,8 @@ import os import uuid from functools import lru_cache from pathlib import Path +from pydub import AudioSegment +from pydub.silence import split_on_silence import requests from open_webui.config import ( @@ -35,7 +37,12 @@ from fastapi import Depends, FastAPI, File, HTTPException, Request, UploadFile, from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse from pydantic import BaseModel -from open_webui.utils.utils import get_admin_user, get_current_user, get_verified_user +from open_webui.utils.utils import get_admin_user, get_verified_user + +# Constants +MAX_FILE_SIZE_MB = 25 +MAX_FILE_SIZE = MAX_FILE_SIZE_MB * 1024 * 1024 # Convert MB to bytes + log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["AUDIO"]) @@ -353,10 +360,103 @@ async def speech(request: Request, user=Depends(get_verified_user)): ) +def transcribe(file_path): + print("transcribe", file_path) + filename = os.path.basename(file_path) + file_dir = os.path.dirname(file_path) + id = filename.split(".")[0] + + if app.state.config.STT_ENGINE == "": + from faster_whisper import WhisperModel + + whisper_kwargs = { + "model_size_or_path": WHISPER_MODEL, + "device": whisper_device_type, + "compute_type": "int8", + "download_root": WHISPER_MODEL_DIR, + "local_files_only": not WHISPER_MODEL_AUTO_UPDATE, + } + + log.debug(f"whisper_kwargs: {whisper_kwargs}") + + try: + model = WhisperModel(**whisper_kwargs) + except Exception: + log.warning( + "WhisperModel initialization failed, attempting download with local_files_only=False" + ) + whisper_kwargs["local_files_only"] = False + model = WhisperModel(**whisper_kwargs) + + segments, info = model.transcribe(file_path, beam_size=5) + log.info( + "Detected language '%s' with probability %f" + % (info.language, info.language_probability) + ) + + transcript = "".join([segment.text for segment in list(segments)]) + + data = {"text": transcript.strip()} + + # save the transcript to a json file + transcript_file = f"{file_dir}/{id}.json" + with open(transcript_file, "w") as f: + json.dump(data, f) + + print(data) + return data + elif app.state.config.STT_ENGINE == "openai": + if is_mp4_audio(file_path): + print("is_mp4_audio") + os.rename(file_path, file_path.replace(".wav", ".mp4")) + # Convert MP4 audio file to WAV format + convert_mp4_to_wav(file_path.replace(".wav", ".mp4"), file_path) + + headers = {"Authorization": f"Bearer {app.state.config.STT_OPENAI_API_KEY}"} + + files = {"file": (filename, open(file_path, "rb"))} + data = {"model": app.state.config.STT_MODEL} + + print(files, data) + + r = None + try: + r = requests.post( + url=f"{app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions", + headers=headers, + files=files, + data=data, + ) + + r.raise_for_status() + + data = r.json() + + # save the transcript to a json file + transcript_file = f"{file_dir}/{id}.json" + with open(transcript_file, "w") as f: + json.dump(data, f) + + print(data) + return data + except Exception as e: + log.exception(e) + error_detail = "Open WebUI: Server Connection Error" + if r is not None: + try: + res = r.json() + if "error" in res: + error_detail = f"External: {res['error']['message']}" + except Exception: + error_detail = f"External: {e}" + + raise error_detail + + @app.post("/transcriptions") -def transcribe( +def transcription( file: UploadFile = File(...), - user=Depends(get_current_user), + user=Depends(get_verified_user), ): log.info(f"file.content_type: {file.content_type}") @@ -368,111 +468,53 @@ def transcribe( try: ext = file.filename.split(".")[-1] - id = uuid.uuid4() + filename = f"{id}.{ext}" + contents = file.file.read() file_dir = f"{CACHE_DIR}/audio/transcriptions" os.makedirs(file_dir, exist_ok=True) file_path = f"{file_dir}/{filename}" - print(filename) - - contents = file.file.read() with open(file_path, "wb") as f: f.write(contents) - f.close() - if app.state.config.STT_ENGINE == "": - from faster_whisper import WhisperModel + try: + if os.path.getsize(file_path) > MAX_FILE_SIZE: # file is bigger than 25MB + log.debug(f"File size is larger than {MAX_FILE_SIZE_MB}MB") + audio = AudioSegment.from_file(file_path) + audio = audio.set_frame_rate(16000).set_channels(1) # Compress audio + compressed_path = f"{file_dir}/{id}_compressed.opus" + audio.export(compressed_path, format="opus", bitrate="32k") + log.debug(f"Compressed audio to {compressed_path}") + file_path = compressed_path - whisper_kwargs = { - "model_size_or_path": WHISPER_MODEL, - "device": whisper_device_type, - "compute_type": "int8", - "download_root": WHISPER_MODEL_DIR, - "local_files_only": not WHISPER_MODEL_AUTO_UPDATE, - } - - log.debug(f"whisper_kwargs: {whisper_kwargs}") - - try: - model = WhisperModel(**whisper_kwargs) - except Exception: - log.warning( - "WhisperModel initialization failed, attempting download with local_files_only=False" - ) - whisper_kwargs["local_files_only"] = False - model = WhisperModel(**whisper_kwargs) - - segments, info = model.transcribe(file_path, beam_size=5) - log.info( - "Detected language '%s' with probability %f" - % (info.language, info.language_probability) - ) - - transcript = "".join([segment.text for segment in list(segments)]) - - data = {"text": transcript.strip()} - - # save the transcript to a json file - transcript_file = f"{file_dir}/{id}.json" - with open(transcript_file, "w") as f: - json.dump(data, f) - - print(data) + if ( + os.path.getsize(file_path) > MAX_FILE_SIZE + ): # Still larger than 25MB after compression + chunks = split_on_silence( + audio, min_silence_len=500, silence_thresh=-40 + ) + texts = [] + for i, chunk in enumerate(chunks): + chunk_file_path = f"{file_dir}/{id}_chunk{i}.{ext}" + chunk.export(chunk_file_path, format=ext) + text = transcribe(chunk_file_path) + texts.append(text) + data = {"text": " ".join(texts)} + else: + data = transcribe(file_path) + else: + data = transcribe(file_path) return data - - elif app.state.config.STT_ENGINE == "openai": - if is_mp4_audio(file_path): - print("is_mp4_audio") - os.rename(file_path, file_path.replace(".wav", ".mp4")) - # Convert MP4 audio file to WAV format - convert_mp4_to_wav(file_path.replace(".wav", ".mp4"), file_path) - - headers = {"Authorization": f"Bearer {app.state.config.STT_OPENAI_API_KEY}"} - - files = {"file": (filename, open(file_path, "rb"))} - data = {"model": app.state.config.STT_MODEL} - - print(files, data) - - r = None - try: - r = requests.post( - url=f"{app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions", - headers=headers, - files=files, - data=data, - ) - - r.raise_for_status() - - data = r.json() - - # save the transcript to a json file - transcript_file = f"{file_dir}/{id}.json" - with open(transcript_file, "w") as f: - json.dump(data, f) - - print(data) - return data - except Exception as e: - log.exception(e) - error_detail = "Open WebUI: Server Connection Error" - if r is not None: - try: - res = r.json() - if "error" in res: - error_detail = f"External: {res['error']['message']}" - except Exception: - error_detail = f"External: {e}" - - raise HTTPException( - status_code=r.status_code if r != None else 500, - detail=error_detail, - ) + except Exception as e: + log.exception(e) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT(e), + ) except Exception as e: log.exception(e) diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte index 066272e434..eeb2f330fd 100644 --- a/src/lib/components/chat/Chat.svelte +++ b/src/lib/components/chat/Chat.svelte @@ -700,7 +700,7 @@ childrenIds: [], role: 'user', content: userPrompt, - files: chatFiles.length > 0 ? chatFiles : undefined, + files: _files.length > 0 ? _files : undefined, timestamp: Math.floor(Date.now() / 1000), // Unix epoch models: selectedModels }; diff --git a/src/lib/components/common/FileItemModal.svelte b/src/lib/components/common/FileItemModal.svelte index e98f554be1..f97e4f33d4 100644 --- a/src/lib/components/common/FileItemModal.svelte +++ b/src/lib/components/common/FileItemModal.svelte @@ -54,7 +54,7 @@
-
+
{#if file.size}
{formatFileSize(file.size)}
From 209828c7c351f82bcb1d0b770db803bb6bbf2b4f Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Mon, 30 Sep 2024 00:37:31 +0200 Subject: [PATCH 043/252] refac: styling --- src/lib/components/chat/Messages/Error.svelte | 25 ++++++------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/src/lib/components/chat/Messages/Error.svelte b/src/lib/components/chat/Messages/Error.svelte index a1fed2f421..3cac9e1cd0 100644 --- a/src/lib/components/chat/Messages/Error.svelte +++ b/src/lib/components/chat/Messages/Error.svelte @@ -1,26 +1,15 @@ -
- - - +
+
+ +
-
+
{content}
From 3899405864d0dd0de227734a4cb9cd1709ed817e Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Mon, 30 Sep 2024 00:39:30 +0200 Subject: [PATCH 044/252] refac --- backend/open_webui/apps/audio/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/open_webui/apps/audio/main.py b/backend/open_webui/apps/audio/main.py index 1cad5f4c7d..486f63f93c 100644 --- a/backend/open_webui/apps/audio/main.py +++ b/backend/open_webui/apps/audio/main.py @@ -494,7 +494,7 @@ def transcription( os.path.getsize(file_path) > MAX_FILE_SIZE ): # Still larger than 25MB after compression chunks = split_on_silence( - audio, min_silence_len=500, silence_thresh=-40 + audio, min_silence_len=1000, silence_thresh=-40 ) texts = [] for i, chunk in enumerate(chunks): From 1c4b6b9cd93b4185b46c06525cb48a90f435f8d2 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Mon, 30 Sep 2024 01:00:13 +0200 Subject: [PATCH 045/252] refac --- backend/open_webui/apps/audio/main.py | 21 ++++++++++----------- backend/open_webui/constants.py | 4 ++++ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/backend/open_webui/apps/audio/main.py b/backend/open_webui/apps/audio/main.py index 486f63f93c..0e56720138 100644 --- a/backend/open_webui/apps/audio/main.py +++ b/backend/open_webui/apps/audio/main.py @@ -493,18 +493,17 @@ def transcription( if ( os.path.getsize(file_path) > MAX_FILE_SIZE ): # Still larger than 25MB after compression - chunks = split_on_silence( - audio, min_silence_len=1000, silence_thresh=-40 + log.debug( + f"Compressed file size is still larger than {MAX_FILE_SIZE_MB}MB: {os.path.getsize(file_path)}" ) - texts = [] - for i, chunk in enumerate(chunks): - chunk_file_path = f"{file_dir}/{id}_chunk{i}.{ext}" - chunk.export(chunk_file_path, format=ext) - text = transcribe(chunk_file_path) - texts.append(text) - data = {"text": " ".join(texts)} - else: - data = transcribe(file_path) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.FILE_TOO_LARGE( + size=f"{MAX_FILE_SIZE_MB}MB" + ), + ) + + data = transcribe(file_path) else: data = transcribe(file_path) diff --git a/backend/open_webui/constants.py b/backend/open_webui/constants.py index d55216bb5d..98dbe32b20 100644 --- a/backend/open_webui/constants.py +++ b/backend/open_webui/constants.py @@ -90,6 +90,10 @@ class ERROR_MESSAGES(str, Enum): "The Ollama API is disabled. Please enable it to use this feature." ) + FILE_TOO_LARGE = ( + lambda size="": f"Oops! The file you're trying to upload is too large. Please upload a file that is less than {size}." + ) + class TASKS(str, Enum): def __str__(self) -> str: From 21c919988de9f71b32b902849e9434a4c9a3f4b1 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Mon, 30 Sep 2024 01:01:39 +0200 Subject: [PATCH 046/252] refac --- src/lib/components/chat/Messages/Error.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/components/chat/Messages/Error.svelte b/src/lib/components/chat/Messages/Error.svelte index 3cac9e1cd0..3a6d7cc30d 100644 --- a/src/lib/components/chat/Messages/Error.svelte +++ b/src/lib/components/chat/Messages/Error.svelte @@ -4,12 +4,12 @@ export let content = ''; -
+
- {content} + {typeof content === 'string' ? content : JSON.stringify(content)}
From 49fe04a627fd11d6212ed0cca469dcf6fbde0cf9 Mon Sep 17 00:00:00 2001 From: Eman Lear <8792460+res0Nanz@users.noreply.github.com> Date: Mon, 30 Sep 2024 15:20:57 +0800 Subject: [PATCH 047/252] frontend: add `apple-touch-icon` With `apple-touch-icon`, mobile devices of particular OS can properly show icon when bookmarking the web page as a home screen application. --- src/app.html | 1 + 1 file changed, 1 insertion(+) diff --git a/src/app.html b/src/app.html index d7f4513e78..f6e46c9cfb 100644 --- a/src/app.html +++ b/src/app.html @@ -3,6 +3,7 @@ + Date: Mon, 30 Sep 2024 12:50:53 +0200 Subject: [PATCH 048/252] enh: summary tag support --- .../components/chat/Messages/Markdown.svelte | 7 +- .../Messages/Markdown/MarkdownTokens.svelte | 7 ++ src/lib/utils/marked/extension.ts | 70 +++++++++++++++++++ 3 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 src/lib/utils/marked/extension.ts diff --git a/src/lib/components/chat/Messages/Markdown.svelte b/src/lib/components/chat/Messages/Markdown.svelte index 2c2f74d768..e5373d39d8 100644 --- a/src/lib/components/chat/Messages/Markdown.svelte +++ b/src/lib/components/chat/Messages/Markdown.svelte @@ -1,9 +1,11 @@ -{#if filteredItems.length > 0 || prompt.split(' ')?.at(0)?.substring(1).startsWith('http')} +{#if filteredProjects.length > 0 || prompt.split(' ')?.at(0)?.substring(1).startsWith('http')}
- {#each filteredItems as doc, docIdx} + {#each filteredProjects as project, idx} {/each} diff --git a/src/lib/components/workspace/Models/Knowledge/Selector.svelte b/src/lib/components/workspace/Models/Knowledge/Selector.svelte index 52d73540ef..5dfd43ef9b 100644 --- a/src/lib/components/workspace/Models/Knowledge/Selector.svelte +++ b/src/lib/components/workspace/Models/Knowledge/Selector.svelte @@ -1,11 +1,10 @@ diff --git a/src/lib/components/workspace/Projects.svelte b/src/lib/components/workspace/Projects.svelte new file mode 100644 index 0000000000..1a37c82826 --- /dev/null +++ b/src/lib/components/workspace/Projects.svelte @@ -0,0 +1,168 @@ + + + + + {$i18n.t('Projects')} | {$WEBUI_NAME} + + + + { + deleteHandler(selectedProject); + }} +/> + +
+
+
+ {$i18n.t('Projects')} +
+ {$projects.length} +
+
+
+ +
+
+
+ + + +
+ +
+ +
+ +
+
+ +
+ +
+ {#each filteredProjects as project} + + {/each} +
+ +
+ ⓘ {$i18n.t("Use '#' in the prompt input to load and select your projects.")} +
diff --git a/src/lib/components/workspace/Projects/CreateProject.svelte b/src/lib/components/workspace/Projects/CreateProject.svelte new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/lib/components/workspace/Projects/EditProject.svelte b/src/lib/components/workspace/Projects/EditProject.svelte new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/lib/components/workspace/Projects/ProjectMenu.svelte b/src/lib/components/workspace/Projects/ProjectMenu.svelte new file mode 100644 index 0000000000..b213081603 --- /dev/null +++ b/src/lib/components/workspace/Projects/ProjectMenu.svelte @@ -0,0 +1,65 @@ + + + { + if (e.detail === false) { + onClose(); + } + }} + align="end" +> + + + + + +
+ + { + dispatch('delete'); + }} + > + +
{$i18n.t('Delete')}
+
+
+
+
diff --git a/src/lib/stores/index.ts b/src/lib/stores/index.ts index b96bf4a98a..844eea514e 100644 --- a/src/lib/stores/index.ts +++ b/src/lib/stores/index.ts @@ -29,7 +29,7 @@ export const tags = writable([]); export const models: Writable = writable([]); export const prompts: Writable = writable([]); -export const documents: Writable = writable([]); +export const projects: Writable = writable([]); export const tools = writable([]); export const functions = writable([]); diff --git a/src/routes/(app)/+layout.svelte b/src/routes/(app)/+layout.svelte index 83a53dffd9..0556a87a19 100644 --- a/src/routes/(app)/+layout.svelte +++ b/src/routes/(app)/+layout.svelte @@ -3,50 +3,46 @@ import { onMount, tick, getContext } from 'svelte'; import { openDB, deleteDB } from 'idb'; import fileSaver from 'file-saver'; + const { saveAs } = fileSaver; import mermaid from 'mermaid'; - const { saveAs } = fileSaver; - import { goto } from '$app/navigation'; + import { page } from '$app/stores'; + import { fade } from 'svelte/transition'; + import { getProjects } from '$lib/apis/projects'; + import { getFunctions } from '$lib/apis/functions'; import { getModels as _getModels, getVersionUpdates } from '$lib/apis'; import { getAllChatTags } from '$lib/apis/chats'; - import { getPrompts } from '$lib/apis/prompts'; - import { getDocs } from '$lib/apis/documents'; import { getTools } from '$lib/apis/tools'; - import { getBanners } from '$lib/apis/configs'; import { getUserSettings } from '$lib/apis/users'; - import { - user, - showSettings, - settings, - models, - prompts, - documents, - tags, - banners, - showChangelog, - config, - showCallOverlay, - tools, - functions, - temporaryChatEnabled - } from '$lib/stores'; - - import SettingsModal from '$lib/components/chat/SettingsModal.svelte'; - import Sidebar from '$lib/components/layout/Sidebar.svelte'; - import ChangelogModal from '$lib/components/ChangelogModal.svelte'; - import AccountPending from '$lib/components/layout/Overlay/AccountPending.svelte'; - import { getFunctions } from '$lib/apis/functions'; - import { page } from '$app/stores'; import { WEBUI_VERSION } from '$lib/constants'; import { compareVersion } from '$lib/utils'; + import { + config, + user, + settings, + models, + prompts, + projects, + tools, + functions, + tags, + banners, + showSettings, + showChangelog, + temporaryChatEnabled + } from '$lib/stores'; + + import Sidebar from '$lib/components/layout/Sidebar.svelte'; + import SettingsModal from '$lib/components/chat/SettingsModal.svelte'; + import ChangelogModal from '$lib/components/ChangelogModal.svelte'; + import AccountPending from '$lib/components/layout/Overlay/AccountPending.svelte'; import UpdateInfoToast from '$lib/components/layout/UpdateInfoToast.svelte'; - import { fade } from 'svelte/transition'; const i18n = getContext('i18n'); @@ -109,7 +105,7 @@ prompts.set(await getPrompts(localStorage.token)); })(), (async () => { - documents.set(await getDocs(localStorage.token)); + projects.set(await getProjects(localStorage.token)); })(), (async () => { tools.set(await getTools(localStorage.token)); diff --git a/src/routes/(app)/workspace/+layout.svelte b/src/routes/(app)/workspace/+layout.svelte index 05ab80715c..6f69cffec7 100644 --- a/src/routes/(app)/workspace/+layout.svelte +++ b/src/routes/(app)/workspace/+layout.svelte @@ -69,14 +69,12 @@ > - {$i18n.t('Documents')} + {$i18n.t('Projects')} - import Documents from '$lib/components/workspace/Documents.svelte'; - - - diff --git a/src/routes/(app)/workspace/projects/+page.svelte b/src/routes/(app)/workspace/projects/+page.svelte new file mode 100644 index 0000000000..9f3f250178 --- /dev/null +++ b/src/routes/(app)/workspace/projects/+page.svelte @@ -0,0 +1,5 @@ + + + diff --git a/src/routes/(app)/workspace/projects/create/+page.svelte b/src/routes/(app)/workspace/projects/create/+page.svelte new file mode 100644 index 0000000000..d3744383a3 --- /dev/null +++ b/src/routes/(app)/workspace/projects/create/+page.svelte @@ -0,0 +1,5 @@ + + + diff --git a/src/routes/(app)/workspace/projects/edit/+page.svelte b/src/routes/(app)/workspace/projects/edit/+page.svelte new file mode 100644 index 0000000000..121a5dfcf5 --- /dev/null +++ b/src/routes/(app)/workspace/projects/edit/+page.svelte @@ -0,0 +1,5 @@ + + + From c2732a099081f9632ca8c4c511b77ffba0a96c5e Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Tue, 1 Oct 2024 17:46:56 -0700 Subject: [PATCH 069/252] refac --- .../versions/6a39f3d8e55c_add_project_table.py | 5 +++++ .../chat/MessageInput/Commands/Projects.svelte | 9 --------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/backend/open_webui/migrations/versions/6a39f3d8e55c_add_project_table.py b/backend/open_webui/migrations/versions/6a39f3d8e55c_add_project_table.py index c6c42f64b1..201dd11757 100644 --- a/backend/open_webui/migrations/versions/6a39f3d8e55c_add_project_table.py +++ b/backend/open_webui/migrations/versions/6a39f3d8e55c_add_project_table.py @@ -9,6 +9,8 @@ Create Date: 2024-10-01 14:02:35.241684 from alembic import op import sqlalchemy as sa from sqlalchemy.sql import table, column, select +import json + revision = "6a39f3d8e55c" down_revision = "c0fbf31ca0db" @@ -39,6 +41,7 @@ def upgrade(): column("user_id", sa.String()), column("name", sa.String()), column("title", sa.Text()), + column("content", sa.Text()), column("timestamp", sa.BigInteger()), ) @@ -49,6 +52,7 @@ def upgrade(): document_table.c.user_id, document_table.c.name, document_table.c.title, + document_table.c.content, document_table.c.timestamp, ) ) @@ -62,6 +66,7 @@ def upgrade(): description=doc.name, meta={ "legacy": True, + "tags": json.loads(doc.content or "{}").get("tags", []), }, name=doc.title, created_at=doc.timestamp, diff --git a/src/lib/components/chat/MessageInput/Commands/Projects.svelte b/src/lib/components/chat/MessageInput/Commands/Projects.svelte index 9be916b6fc..2b80ca0ee7 100644 --- a/src/lib/components/chat/MessageInput/Commands/Projects.svelte +++ b/src/lib/components/chat/MessageInput/Commands/Projects.svelte @@ -29,15 +29,6 @@ selectedIdx = 0; } - type ObjectWithName = { - name: string; - }; - - const findByName = (obj: ObjectWithName, command: string) => { - const name = obj.name.toLowerCase(); - return name.includes(command.toLowerCase().split(' ')?.at(0)?.substring(1) ?? ''); - }; - export const selectUp = () => { selectedIdx = Math.max(0, selectedIdx - 1); }; From 1b7d363d32aae5e6583bf2c3b96108d8fcceb18f Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Tue, 1 Oct 2024 21:32:59 -0700 Subject: [PATCH 070/252] refac --- .../open_webui/apps/webui/models/projects.py | 3 +- .../open_webui/apps/webui/routers/projects.py | 38 +++-- src/lib/apis/projects/index.ts | 6 +- .../admin/Settings/WebSearch.svelte | 2 +- .../chat/MessageInput/Commands.svelte | 2 +- src/lib/components/workspace/Projects.svelte | 7 +- .../workspace/Projects/CreateProject.svelte | 138 ++++++++++++++++++ .../{EditProject.svelte => Project.svelte} | 0 src/routes/(app)/workspace/+layout.svelte | 2 +- .../workspace/projects/[id]/+page.svelte | 7 + .../workspace/projects/edit/+page.svelte | 5 - 11 files changed, 182 insertions(+), 28 deletions(-) rename src/lib/components/workspace/Projects/{EditProject.svelte => Project.svelte} (100%) create mode 100644 src/routes/(app)/workspace/projects/[id]/+page.svelte delete mode 100644 src/routes/(app)/workspace/projects/edit/+page.svelte diff --git a/backend/open_webui/apps/webui/models/projects.py b/backend/open_webui/apps/webui/models/projects.py index 5b9f070903..4debbbe28f 100644 --- a/backend/open_webui/apps/webui/models/projects.py +++ b/backend/open_webui/apps/webui/models/projects.py @@ -2,6 +2,7 @@ import json import logging import time from typing import Optional +import uuid from open_webui.apps.webui.internal.db import Base, get_db from open_webui.env import SRC_LOG_LEVELS @@ -65,7 +66,6 @@ class ProjectResponse(BaseModel): class ProjectForm(BaseModel): - id: str name: str description: str data: Optional[dict] = None @@ -79,6 +79,7 @@ class ProjectTable: project = ProjectModel( **{ **form_data.model_dump(), + "id": str(uuid.uuid4()), "user_id": user_id, "created_at": int(time.time()), "updated_at": int(time.time()), diff --git a/backend/open_webui/apps/webui/routers/projects.py b/backend/open_webui/apps/webui/routers/projects.py index ed47b41b2b..493bde99e6 100644 --- a/backend/open_webui/apps/webui/routers/projects.py +++ b/backend/open_webui/apps/webui/routers/projects.py @@ -46,21 +46,32 @@ async def get_projects(id: Optional[str] = None, user=Depends(get_verified_user) @router.post("/create", response_model=Optional[ProjectResponse]) async def create_new_project(form_data: ProjectForm, user=Depends(get_admin_user)): - project = Projects.get_project_by_id(form_data.id) - if project is None: - project = Projects.insert_new_project(user.id, form_data) + project = Projects.insert_new_project(user.id, form_data) - if project: - return project - else: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.FILE_EXISTS, - ) + if project: + return project else: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.ID_TAKEN, + detail=ERROR_MESSAGES.FILE_EXISTS, + ) + + +############################ +# GetProjectById +############################ + + +@router.get("/{id}", response_model=Optional[ProjectResponse]) +async def get_projects(id: str, user=Depends(get_verified_user)): + project = Projects.get_project_by_id(id=id) + + if project: + return project + else: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.NOT_FOUND, ) @@ -69,8 +80,9 @@ async def create_new_project(form_data: ProjectForm, user=Depends(get_admin_user ############################ -@router.post("/update", response_model=Optional[ProjectResponse]) +@router.post("/{id}/update", response_model=Optional[ProjectResponse]) async def update_project_by_id( + id: str, form_data: ProjectForm, user=Depends(get_admin_user), ): @@ -89,7 +101,7 @@ async def update_project_by_id( ############################ -@router.delete("/delete", response_model=bool) +@router.delete("/{id}/delete", response_model=bool) async def delete_project_by_id(id: str, user=Depends(get_admin_user)): result = Projects.delete_project_by_id(id=id) return result diff --git a/src/lib/apis/projects/index.ts b/src/lib/apis/projects/index.ts index 8fad3ffd89..af448d1bb1 100644 --- a/src/lib/apis/projects/index.ts +++ b/src/lib/apis/projects/index.ts @@ -1,6 +1,6 @@ import { WEBUI_API_BASE_URL } from '$lib/constants'; -export const createNewProject = async (token: string, id: string, name: string) => { +export const createNewProject = async (token: string, name: string, description: string) => { let error = null; const res = await fetch(`${WEBUI_API_BASE_URL}/projects/create`, { @@ -11,8 +11,8 @@ export const createNewProject = async (token: string, id: string, name: string) authorization: `Bearer ${token}` }, body: JSON.stringify({ - id: id, - name: name + name: name, + description: description }) }) .then(async (res) => { diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index 0a0c2eb165..ddda39b10b 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -2,7 +2,7 @@ import { getRAGConfig, updateRAGConfig } from '$lib/apis/retrieval'; import Switch from '$lib/components/common/Switch.svelte'; - import { documents, models } from '$lib/stores'; + import { models } from '$lib/stores'; import { onMount, getContext } from 'svelte'; import { toast } from 'svelte-sonner'; import SensitiveInput from '$lib/components/common/SensitiveInput.svelte'; diff --git a/src/lib/components/chat/MessageInput/Commands.svelte b/src/lib/components/chat/MessageInput/Commands.svelte index bb153f6474..f9c0cc25fc 100644 --- a/src/lib/components/chat/MessageInput/Commands.svelte +++ b/src/lib/components/chat/MessageInput/Commands.svelte @@ -114,7 +114,7 @@ files = [ ...files, { - type: e?.detail?.type ?? 'file', + type: e?.detail?.meta?.legacy ? 'file' : 'project', ...e.detail, status: 'processed' } diff --git a/src/lib/components/workspace/Projects.svelte b/src/lib/components/workspace/Projects.svelte index 1a37c82826..0eda768f4c 100644 --- a/src/lib/components/workspace/Projects.svelte +++ b/src/lib/components/workspace/Projects.svelte @@ -111,12 +111,13 @@
-
+
{#each filteredProjects as project} + {JSON.stringify(project)} + +
{ + submitHandler(); + }} + > +
+
Create a project
+ +
+
+
What are you working on?
+ +
+ +
+
+ +
+
What are you trying to achieve?
+ +
+