From 4e8b3906821a9a10f4fd0038373291dff41b65cf Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Tue, 28 Jan 2025 23:03:15 -0600 Subject: [PATCH 001/232] Add RAG_WEB_LOADER + Playwright mode + improve stability of search --- backend/open_webui/config.py | 5 + backend/open_webui/main.py | 2 + backend/open_webui/retrieval/web/main.py | 4 + backend/open_webui/retrieval/web/utils.py | 179 +++++++++++++++++++--- backend/open_webui/routers/retrieval.py | 21 ++- backend/open_webui/utils/middleware.py | 27 ++-- backend/requirements.txt | 2 +- backend/start.sh | 9 ++ backend/start_windows.bat | 9 ++ pyproject.toml | 1 + 10 files changed, 220 insertions(+), 39 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index c37b831dec..3cec6edd70 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1712,6 +1712,11 @@ RAG_WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig( int(os.getenv("RAG_WEB_SEARCH_CONCURRENT_REQUESTS", "10")), ) +RAG_WEB_LOADER = PersistentConfig( + "RAG_WEB_LOADER", + "rag.web.loader", + os.environ.get("RAG_WEB_LOADER", "safe_web") +) #################################### # Images diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 00270aabc4..985624d816 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -129,6 +129,7 @@ from open_webui.config import ( AUDIO_TTS_VOICE, AUDIO_TTS_AZURE_SPEECH_REGION, AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT, + RAG_WEB_LOADER, WHISPER_MODEL, WHISPER_MODEL_AUTO_UPDATE, WHISPER_MODEL_DIR, @@ -526,6 +527,7 @@ app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = BING_SEARCH_V7_SUBSCRIPTION_K app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS +app.state.config.RAG_WEB_LOADER = RAG_WEB_LOADER app.state.EMBEDDING_FUNCTION = None app.state.ef = None diff --git a/backend/open_webui/retrieval/web/main.py b/backend/open_webui/retrieval/web/main.py index 1af8a70aa1..28a749e7d2 100644 --- a/backend/open_webui/retrieval/web/main.py +++ b/backend/open_webui/retrieval/web/main.py @@ -1,3 +1,5 @@ +import validators + from typing import Optional from urllib.parse import urlparse @@ -10,6 +12,8 @@ def get_filtered_results(results, filter_list): filtered_results = [] for result in results: url = result.get("url") or result.get("link", "") + if not validators.url(url): + continue domain = urlparse(url).netloc if any(domain.endswith(filtered_domain) for filtered_domain in filter_list): filtered_results.append(result) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index a322bbbfcd..bdc626749a 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -1,16 +1,21 @@ +import asyncio +from datetime import datetime, time, timedelta import socket +import ssl import urllib.parse +import certifi import validators -from typing import Union, Sequence, Iterator +from typing import AsyncIterator, Dict, List, Optional, Union, Sequence, Iterator from langchain_community.document_loaders import ( WebBaseLoader, + PlaywrightURLLoader ) from langchain_core.documents import Document from open_webui.constants import ERROR_MESSAGES -from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH +from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH, RAG_WEB_LOADER from open_webui.env import SRC_LOG_LEVELS import logging @@ -42,6 +47,15 @@ def validate_url(url: Union[str, Sequence[str]]): else: return False +def safe_validate_urls(url: Sequence[str]) -> Sequence[str]: + valid_urls = [] + for u in url: + try: + if validate_url(u): + valid_urls.append(u) + except ValueError: + continue + return valid_urls def resolve_hostname(hostname): # Get address information @@ -53,6 +67,131 @@ def resolve_hostname(hostname): return ipv4_addresses, ipv6_addresses +def extract_metadata(soup, url): + metadata = { + "source": url + } + if title := soup.find("title"): + metadata["title"] = title.get_text() + if description := soup.find("meta", attrs={"name": "description"}): + metadata["description"] = description.get( + "content", "No description found." + ) + if html := soup.find("html"): + metadata["language"] = html.get("lang", "No language found.") + return metadata + +class SafePlaywrightURLLoader(PlaywrightURLLoader): + """Load HTML pages safely with Playwright, supporting SSL verification and rate limiting. + + Attributes: + urls (List[str]): List of URLs to load. + verify_ssl (bool): If True, verify SSL certificates. + requests_per_second (Optional[float]): Number of requests per second to limit to. + continue_on_failure (bool): If True, continue loading other URLs on failure. + headless (bool): If True, the browser will run in headless mode. + """ + + def __init__( + self, + urls: List[str], + verify_ssl: bool = True, + requests_per_second: Optional[float] = None, + continue_on_failure: bool = True, + headless: bool = True, + remove_selectors: Optional[List[str]] = None, + proxy: Optional[Dict[str, str]] = None + ): + """Initialize with additional safety parameters.""" + super().__init__( + urls=urls, + continue_on_failure=continue_on_failure, + headless=headless, + remove_selectors=remove_selectors, + proxy=proxy + ) + self.verify_ssl = verify_ssl + self.requests_per_second = requests_per_second + self.last_request_time = None + + def _verify_ssl_cert(self, url: str) -> bool: + """Verify SSL certificate for the given URL.""" + if not url.startswith("https://"): + return True + + try: + hostname = url.split("://")[-1].split("/")[0] + context = ssl.create_default_context(cafile=certifi.where()) + with context.wrap_socket(ssl.socket(), server_hostname=hostname) as s: + s.connect((hostname, 443)) + return True + except ssl.SSLError: + return False + except Exception as e: + log.warning(f"SSL verification failed for {url}: {str(e)}") + return False + + async def _wait_for_rate_limit(self): + """Wait to respect the rate limit if specified.""" + if self.requests_per_second and self.last_request_time: + min_interval = timedelta(seconds=1.0 / self.requests_per_second) + time_since_last = datetime.now() - self.last_request_time + if time_since_last < min_interval: + await asyncio.sleep((min_interval - time_since_last).total_seconds()) + self.last_request_time = datetime.now() + + def _sync_wait_for_rate_limit(self): + """Synchronous version of rate limit wait.""" + if self.requests_per_second and self.last_request_time: + min_interval = timedelta(seconds=1.0 / self.requests_per_second) + time_since_last = datetime.now() - self.last_request_time + if time_since_last < min_interval: + time.sleep((min_interval - time_since_last).total_seconds()) + self.last_request_time = datetime.now() + + async def _safe_process_url(self, url: str) -> bool: + """Perform safety checks before processing a URL.""" + if self.verify_ssl and not self._verify_ssl_cert(url): + raise ValueError(f"SSL certificate verification failed for {url}") + await self._wait_for_rate_limit() + return True + + def _safe_process_url_sync(self, url: str) -> bool: + """Synchronous version of safety checks.""" + if self.verify_ssl and not self._verify_ssl_cert(url): + raise ValueError(f"SSL certificate verification failed for {url}") + self._sync_wait_for_rate_limit() + return True + + async def alazy_load(self) -> AsyncIterator[Document]: + """Safely load URLs asynchronously.""" + parent_iterator = super().alazy_load() + + async for document in parent_iterator: + url = document.metadata["source"] + try: + await self._safe_process_url(url) + yield document + except Exception as e: + if self.continue_on_failure: + log.error(f"Error processing {url}, exception: {e}") + continue + raise e + + def lazy_load(self) -> Iterator[Document]: + """Safely load URLs synchronously.""" + parent_iterator = super().lazy_load() + + for document in parent_iterator: + url = document.metadata["source"] + try: + self._safe_process_url_sync(url) + yield document + except Exception as e: + if self.continue_on_failure: + log.error(f"Error processing {url}, exception: {e}") + continue + raise e class SafeWebBaseLoader(WebBaseLoader): """WebBaseLoader with enhanced error handling for URLs.""" @@ -65,15 +204,7 @@ class SafeWebBaseLoader(WebBaseLoader): text = soup.get_text(**self.bs_get_text_kwargs) # Build metadata - metadata = {"source": path} - if title := soup.find("title"): - metadata["title"] = title.get_text() - if description := soup.find("meta", attrs={"name": "description"}): - metadata["description"] = description.get( - "content", "No description found." - ) - if html := soup.find("html"): - metadata["language"] = html.get("lang", "No language found.") + metadata = extract_metadata(soup, path) yield Document(page_content=text, metadata=metadata) except Exception as e: @@ -87,11 +218,21 @@ def get_web_loader( requests_per_second: int = 2, ): # Check if the URL is valid - if not validate_url(urls): - raise ValueError(ERROR_MESSAGES.INVALID_URL) - return SafeWebBaseLoader( - urls, - verify_ssl=verify_ssl, - requests_per_second=requests_per_second, - continue_on_failure=True, - ) + safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls) + + if RAG_WEB_LOADER.value == "chromium": + log.info("Using SafePlaywrightURLLoader") + return SafePlaywrightURLLoader( + safe_urls, + verify_ssl=verify_ssl, + requests_per_second=requests_per_second, + continue_on_failure=True, + ) + else: + log.info("Using SafeWebBaseLoader") + return SafeWebBaseLoader( + safe_urls, + verify_ssl=verify_ssl, + requests_per_second=requests_per_second, + continue_on_failure=True, + ) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 2cffd9ead4..e65a76050a 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1238,9 +1238,11 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: @router.post("/process/web/search") -def process_web_search( - request: Request, form_data: SearchForm, user=Depends(get_verified_user) +async def process_web_search( + request: Request, form_data: SearchForm, extra_params: dict, user=Depends(get_verified_user) ): + event_emitter = extra_params["__event_emitter__"] + try: logging.info( f"trying to web search with {request.app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query}" @@ -1258,6 +1260,18 @@ def process_web_search( log.debug(f"web_results: {web_results}") + await event_emitter( + { + "type": "status", + "data": { + "action": "web_search", + "description": "Loading {{count}} sites...", + "urls": [result.link for result in web_results], + "done": False + }, + } + ) + try: collection_name = form_data.collection_name if collection_name == "" or collection_name is None: @@ -1271,7 +1285,8 @@ def process_web_search( verify_ssl=request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, ) - docs = loader.load() + docs = [doc async for doc in loader.alazy_load()] + # docs = loader.load() save_docs_to_vector_db(request, docs, collection_name, overwrite=True) return { diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 6b2329be1c..27e499e0c1 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -419,21 +419,16 @@ async def chat_web_search_handler( try: - # Offload process_web_search to a separate thread - loop = asyncio.get_running_loop() - with ThreadPoolExecutor() as executor: - results = await loop.run_in_executor( - executor, - lambda: process_web_search( - request, - SearchForm( - **{ - "query": searchQuery, - } - ), - user, - ), - ) + results = await process_web_search( + request, + SearchForm( + **{ + "query": searchQuery, + } + ), + extra_params=extra_params, + user=user + ) if results: await event_emitter( @@ -441,7 +436,7 @@ async def chat_web_search_handler( "type": "status", "data": { "action": "web_search", - "description": "Searched {{count}} sites", + "description": "Loaded {{count}} sites", "query": searchQuery, "urls": results["filenames"], "done": True, diff --git a/backend/requirements.txt b/backend/requirements.txt index eecb9c4a5b..0dd7b1a8ad 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -46,7 +46,7 @@ chromadb==0.6.2 pymilvus==2.5.0 qdrant-client~=1.12.0 opensearch-py==2.7.1 - +playwright==1.49.1 transformers sentence-transformers==3.3.1 diff --git a/backend/start.sh b/backend/start.sh index a945acb62e..ce56b1867b 100755 --- a/backend/start.sh +++ b/backend/start.sh @@ -3,6 +3,15 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR" || exit +# Add conditional Playwright browser installation +if [[ "${RAG_WEB_LOADER,,}" == "chromium" ]]; then + echo "Installing Playwright browsers..." + playwright install chromium + playwright install-deps chromium + + python -c "import nltk; nltk.download('punkt_tab')" +fi + KEY_FILE=.webui_secret_key PORT="${PORT:-8080}" diff --git a/backend/start_windows.bat b/backend/start_windows.bat index 3e8c6b97c4..3b64462582 100644 --- a/backend/start_windows.bat +++ b/backend/start_windows.bat @@ -6,6 +6,15 @@ SETLOCAL ENABLEDELAYEDEXPANSION SET "SCRIPT_DIR=%~dp0" cd /d "%SCRIPT_DIR%" || exit /b +:: Add conditional Playwright browser installation +IF /I "%RAG_WEB_LOADER%" == "chromium" ( + echo Installing Playwright browsers... + playwright install chromium + playwright install-deps chromium + + python -c "import nltk; nltk.download('punkt_tab')" +) + SET "KEY_FILE=.webui_secret_key" IF "%PORT%"=="" SET PORT=8080 IF "%HOST%"=="" SET HOST=0.0.0.0 diff --git a/pyproject.toml b/pyproject.toml index edd01db8f6..c8ec0f497c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ dependencies = [ "pymilvus==2.5.0", "qdrant-client~=1.12.0", "opensearch-py==2.7.1", + "playwright==1.49.1", "transformers", "sentence-transformers==3.3.1", From 2452e271cddccf0c835ae17f4505471eb41a4313 Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Thu, 30 Jan 2025 20:31:31 -0600 Subject: [PATCH 002/232] Refine RAG_WEB_LOADER --- backend/open_webui/retrieval/web/utils.py | 34 +++++++++++------------ backend/start.sh | 2 +- backend/start_windows.bat | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index bdc626749a..3c0c34074c 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -5,6 +5,7 @@ import ssl import urllib.parse import certifi import validators +from collections import defaultdict from typing import AsyncIterator, Dict, List, Optional, Union, Sequence, Iterator from langchain_community.document_loaders import ( @@ -211,28 +212,27 @@ class SafeWebBaseLoader(WebBaseLoader): # Log the error and continue with the next URL log.error(f"Error loading {path}: {e}") +RAG_WEB_LOADERS = defaultdict(lambda: SafeWebBaseLoader) +RAG_WEB_LOADERS["playwright"] = SafePlaywrightURLLoader +RAG_WEB_LOADERS["safe_web"] = SafeWebBaseLoader def get_web_loader( urls: Union[str, Sequence[str]], verify_ssl: bool = True, requests_per_second: int = 2, ): - # Check if the URL is valid + # Check if the URLs are valid safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls) - if RAG_WEB_LOADER.value == "chromium": - log.info("Using SafePlaywrightURLLoader") - return SafePlaywrightURLLoader( - safe_urls, - verify_ssl=verify_ssl, - requests_per_second=requests_per_second, - continue_on_failure=True, - ) - else: - log.info("Using SafeWebBaseLoader") - return SafeWebBaseLoader( - safe_urls, - verify_ssl=verify_ssl, - requests_per_second=requests_per_second, - continue_on_failure=True, - ) + # Get the appropriate WebLoader based on the configuration + WebLoaderClass = RAG_WEB_LOADERS[RAG_WEB_LOADER.value] + web_loader = WebLoaderClass( + safe_urls, + verify_ssl=verify_ssl, + requests_per_second=requests_per_second, + continue_on_failure=True, + ) + + log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(safe_urls)) + + return web_loader \ No newline at end of file diff --git a/backend/start.sh b/backend/start.sh index ce56b1867b..2501f413f0 100755 --- a/backend/start.sh +++ b/backend/start.sh @@ -4,7 +4,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR" || exit # Add conditional Playwright browser installation -if [[ "${RAG_WEB_LOADER,,}" == "chromium" ]]; then +if [[ "${RAG_WEB_LOADER,,}" == "playwright" ]]; then echo "Installing Playwright browsers..." playwright install chromium playwright install-deps chromium diff --git a/backend/start_windows.bat b/backend/start_windows.bat index 3b64462582..0f2792cc0c 100644 --- a/backend/start_windows.bat +++ b/backend/start_windows.bat @@ -7,7 +7,7 @@ SET "SCRIPT_DIR=%~dp0" cd /d "%SCRIPT_DIR%" || exit /b :: Add conditional Playwright browser installation -IF /I "%RAG_WEB_LOADER%" == "chromium" ( +IF /I "%RAG_WEB_LOADER%" == "playwright" ( echo Installing Playwright browsers... playwright install chromium playwright install-deps chromium From 77ae73e659e6fea6da34c3ea913edb3dc4f037a9 Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Thu, 30 Jan 2025 23:18:11 -0600 Subject: [PATCH 003/232] Adjust search event messages + translations --- backend/open_webui/routers/retrieval.py | 2 +- backend/open_webui/utils/middleware.py | 2 +- src/lib/components/chat/Messages/ResponseMessage.svelte | 1 + src/lib/i18n/locales/ar-BH/translation.json | 1 + src/lib/i18n/locales/bg-BG/translation.json | 1 + src/lib/i18n/locales/bn-BD/translation.json | 1 + src/lib/i18n/locales/ca-ES/translation.json | 1 + src/lib/i18n/locales/ceb-PH/translation.json | 1 + src/lib/i18n/locales/cs-CZ/translation.json | 1 + src/lib/i18n/locales/da-DK/translation.json | 1 + src/lib/i18n/locales/de-DE/translation.json | 1 + src/lib/i18n/locales/dg-DG/translation.json | 1 + src/lib/i18n/locales/el-GR/translation.json | 1 + src/lib/i18n/locales/en-GB/translation.json | 1 + src/lib/i18n/locales/en-US/translation.json | 1 + src/lib/i18n/locales/es-ES/translation.json | 1 + src/lib/i18n/locales/eu-ES/translation.json | 1 + src/lib/i18n/locales/fa-IR/translation.json | 1 + src/lib/i18n/locales/fi-FI/translation.json | 1 + src/lib/i18n/locales/fr-CA/translation.json | 1 + src/lib/i18n/locales/fr-FR/translation.json | 1 + src/lib/i18n/locales/he-IL/translation.json | 1 + src/lib/i18n/locales/hi-IN/translation.json | 1 + src/lib/i18n/locales/hr-HR/translation.json | 1 + src/lib/i18n/locales/hu-HU/translation.json | 1 + src/lib/i18n/locales/id-ID/translation.json | 1 + src/lib/i18n/locales/ie-GA/translation.json | 1 + src/lib/i18n/locales/it-IT/translation.json | 1 + src/lib/i18n/locales/ja-JP/translation.json | 1 + src/lib/i18n/locales/ka-GE/translation.json | 1 + src/lib/i18n/locales/ko-KR/translation.json | 1 + src/lib/i18n/locales/lt-LT/translation.json | 1 + src/lib/i18n/locales/ms-MY/translation.json | 1 + src/lib/i18n/locales/nb-NO/translation.json | 1 + src/lib/i18n/locales/nl-NL/translation.json | 1 + src/lib/i18n/locales/pa-IN/translation.json | 1 + src/lib/i18n/locales/pl-PL/translation.json | 1 + src/lib/i18n/locales/pt-BR/translation.json | 1 + src/lib/i18n/locales/pt-PT/translation.json | 1 + src/lib/i18n/locales/ro-RO/translation.json | 1 + src/lib/i18n/locales/ru-RU/translation.json | 1 + src/lib/i18n/locales/sk-SK/translation.json | 1 + src/lib/i18n/locales/sr-RS/translation.json | 1 + src/lib/i18n/locales/sv-SE/translation.json | 1 + src/lib/i18n/locales/th-TH/translation.json | 1 + src/lib/i18n/locales/tk-TW/translation.json | 1 + src/lib/i18n/locales/tr-TR/translation.json | 1 + src/lib/i18n/locales/uk-UA/translation.json | 1 + src/lib/i18n/locales/ur-PK/translation.json | 1 + src/lib/i18n/locales/vi-VN/translation.json | 1 + src/lib/i18n/locales/zh-CN/translation.json | 1 + src/lib/i18n/locales/zh-TW/translation.json | 1 + 52 files changed, 52 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index e65a76050a..5076980844 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1265,7 +1265,7 @@ async def process_web_search( "type": "status", "data": { "action": "web_search", - "description": "Loading {{count}} sites...", + "description": "Loading {{count}} sites", "urls": [result.link for result in web_results], "done": False }, diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index dde34bec84..2a68d8d0a9 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -436,7 +436,7 @@ async def chat_web_search_handler( "type": "status", "data": { "action": "web_search", - "description": "Loaded {{count}} sites", + "description": "Searched {{count}} sites", "query": searchQuery, "urls": results["filenames"], "done": True, diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index d6b31e6a0c..9952f0f8ec 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -580,6 +580,7 @@ : ''} text-gray-500 dark:text-gray-500 text-base line-clamp-1 text-wrap" > + {#if status?.description.includes('{{searchQuery}}')} {$i18n.t(status?.description, { searchQuery: status?.query diff --git a/src/lib/i18n/locales/ar-BH/translation.json b/src/lib/i18n/locales/ar-BH/translation.json index d115f42279..f2f9d25f25 100644 --- a/src/lib/i18n/locales/ar-BH/translation.json +++ b/src/lib/i18n/locales/ar-BH/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "يمكن أن تصدر بعض الأخطاء. لذلك يجب التحقق من المعلومات المهمة", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/bg-BG/translation.json b/src/lib/i18n/locales/bg-BG/translation.json index f2eea288a4..a4d06d3898 100644 --- a/src/lib/i18n/locales/bg-BG/translation.json +++ b/src/lib/i18n/locales/bg-BG/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs могат да правят грешки. Проверете важните данни.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/bn-BD/translation.json b/src/lib/i18n/locales/bn-BD/translation.json index 18a270eb2b..5ef08af65f 100644 --- a/src/lib/i18n/locales/bn-BD/translation.json +++ b/src/lib/i18n/locales/bn-BD/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM ভুল করতে পারে। গুরুত্বপূর্ণ তথ্য যাচাই করে নিন।", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/ca-ES/translation.json b/src/lib/i18n/locales/ca-ES/translation.json index 3eeef48d4b..db3589a2db 100644 --- a/src/lib/i18n/locales/ca-ES/translation.json +++ b/src/lib/i18n/locales/ca-ES/translation.json @@ -561,6 +561,7 @@ "Listening...": "Escoltant...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "Els models de llenguatge poden cometre errors. Verifica la informació important.", + "Loading {{count}} sites": "", "Local": "Local", "Local Models": "Models locals", "Lost": "Perdut", diff --git a/src/lib/i18n/locales/ceb-PH/translation.json b/src/lib/i18n/locales/ceb-PH/translation.json index 9029fbae0c..23a7c01ba5 100644 --- a/src/lib/i18n/locales/ceb-PH/translation.json +++ b/src/lib/i18n/locales/ceb-PH/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Ang mga LLM mahimong masayop. ", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/cs-CZ/translation.json b/src/lib/i18n/locales/cs-CZ/translation.json index d07b3b6ec7..6eefeb3237 100644 --- a/src/lib/i18n/locales/cs-CZ/translation.json +++ b/src/lib/i18n/locales/cs-CZ/translation.json @@ -561,6 +561,7 @@ "Listening...": "Poslouchání...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM mohou dělat chyby. Ověřte si důležité informace.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokální modely", "Lost": "Ztracený", diff --git a/src/lib/i18n/locales/da-DK/translation.json b/src/lib/i18n/locales/da-DK/translation.json index dca76c74db..4f7cbd00da 100644 --- a/src/lib/i18n/locales/da-DK/translation.json +++ b/src/lib/i18n/locales/da-DK/translation.json @@ -561,6 +561,7 @@ "Listening...": "Lytter...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM'er kan lave fejl. Bekræft vigtige oplysninger.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokale modeller", "Lost": "", diff --git a/src/lib/i18n/locales/de-DE/translation.json b/src/lib/i18n/locales/de-DE/translation.json index f96e6bf087..bf934cedf7 100644 --- a/src/lib/i18n/locales/de-DE/translation.json +++ b/src/lib/i18n/locales/de-DE/translation.json @@ -561,6 +561,7 @@ "Listening...": "Höre zu...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs können Fehler machen. Überprüfe wichtige Informationen.", + "Loading {{count}} sites": "", "Local": "Lokal", "Local Models": "Lokale Modelle", "Lost": "Verloren", diff --git a/src/lib/i18n/locales/dg-DG/translation.json b/src/lib/i18n/locales/dg-DG/translation.json index aaca0565ba..bfe1b7602b 100644 --- a/src/lib/i18n/locales/dg-DG/translation.json +++ b/src/lib/i18n/locales/dg-DG/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs can make borks. Verify important info.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/el-GR/translation.json b/src/lib/i18n/locales/el-GR/translation.json index 8058615a06..dcc608769f 100644 --- a/src/lib/i18n/locales/el-GR/translation.json +++ b/src/lib/i18n/locales/el-GR/translation.json @@ -561,6 +561,7 @@ "Listening...": "Ακούγεται...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Τα LLM μπορούν να κάνουν λάθη. Επαληθεύστε σημαντικές πληροφορίες.", + "Loading {{count}} sites": "", "Local": "Τοπικό", "Local Models": "Τοπικά Μοντέλα", "Lost": "Χαμένος", diff --git a/src/lib/i18n/locales/en-GB/translation.json b/src/lib/i18n/locales/en-GB/translation.json index 3075e7c0f1..108c1d9e8c 100644 --- a/src/lib/i18n/locales/en-GB/translation.json +++ b/src/lib/i18n/locales/en-GB/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/en-US/translation.json b/src/lib/i18n/locales/en-US/translation.json index 3075e7c0f1..108c1d9e8c 100644 --- a/src/lib/i18n/locales/en-US/translation.json +++ b/src/lib/i18n/locales/en-US/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/es-ES/translation.json b/src/lib/i18n/locales/es-ES/translation.json index d748943ca2..4a1d421667 100644 --- a/src/lib/i18n/locales/es-ES/translation.json +++ b/src/lib/i18n/locales/es-ES/translation.json @@ -561,6 +561,7 @@ "Listening...": "Escuchando...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Los LLM pueden cometer errores. Verifica la información importante.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Modelos locales", "Lost": "", diff --git a/src/lib/i18n/locales/eu-ES/translation.json b/src/lib/i18n/locales/eu-ES/translation.json index 1e5da410f4..334fc11f6d 100644 --- a/src/lib/i18n/locales/eu-ES/translation.json +++ b/src/lib/i18n/locales/eu-ES/translation.json @@ -561,6 +561,7 @@ "Listening...": "Entzuten...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMek akatsak egin ditzakete. Egiaztatu informazio garrantzitsua.", + "Loading {{count}} sites": "", "Local": "Lokala", "Local Models": "Modelo lokalak", "Lost": "Galduta", diff --git a/src/lib/i18n/locales/fa-IR/translation.json b/src/lib/i18n/locales/fa-IR/translation.json index cd647b8a71..512bb0dbbc 100644 --- a/src/lib/i18n/locales/fa-IR/translation.json +++ b/src/lib/i18n/locales/fa-IR/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "مدل\u200cهای زبانی بزرگ می\u200cتوانند اشتباه کنند. اطلاعات مهم را راستی\u200cآزمایی کنید.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/fi-FI/translation.json b/src/lib/i18n/locales/fi-FI/translation.json index 399da492f9..bbc0a2d9a6 100644 --- a/src/lib/i18n/locales/fi-FI/translation.json +++ b/src/lib/i18n/locales/fi-FI/translation.json @@ -561,6 +561,7 @@ "Listening...": "Kuuntelee...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Kielimallit voivat tehdä virheitä. Tarkista tärkeät tiedot.", + "Loading {{count}} sites": "", "Local": "Paikallinen", "Local Models": "Paikalliset mallit", "Lost": "Mennyt", diff --git a/src/lib/i18n/locales/fr-CA/translation.json b/src/lib/i18n/locales/fr-CA/translation.json index 051780d71d..3f16e3a614 100644 --- a/src/lib/i18n/locales/fr-CA/translation.json +++ b/src/lib/i18n/locales/fr-CA/translation.json @@ -561,6 +561,7 @@ "Listening...": "En train d'écouter...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Les LLM peuvent faire des erreurs. Vérifiez les informations importantes.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Modèles locaux", "Lost": "", diff --git a/src/lib/i18n/locales/fr-FR/translation.json b/src/lib/i18n/locales/fr-FR/translation.json index 62bdd81397..e8fe232d1d 100644 --- a/src/lib/i18n/locales/fr-FR/translation.json +++ b/src/lib/i18n/locales/fr-FR/translation.json @@ -561,6 +561,7 @@ "Listening...": "Écoute en cours...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Les LLM peuvent faire des erreurs. Vérifiez les informations importantes.", + "Loading {{count}} sites": "", "Local": "Local", "Local Models": "Modèles locaux", "Lost": "Perdu", diff --git a/src/lib/i18n/locales/he-IL/translation.json b/src/lib/i18n/locales/he-IL/translation.json index 17756cbb09..d54937e0fd 100644 --- a/src/lib/i18n/locales/he-IL/translation.json +++ b/src/lib/i18n/locales/he-IL/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "מודלים בשפה טבעית יכולים לטעות. אמת מידע חשוב.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/hi-IN/translation.json b/src/lib/i18n/locales/hi-IN/translation.json index ea039a59d8..9cd2a5899e 100644 --- a/src/lib/i18n/locales/hi-IN/translation.json +++ b/src/lib/i18n/locales/hi-IN/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "एलएलएम गलतियाँ कर सकते हैं। महत्वपूर्ण जानकारी सत्यापित करें.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/hr-HR/translation.json b/src/lib/i18n/locales/hr-HR/translation.json index c9e293389b..72826f0d98 100644 --- a/src/lib/i18n/locales/hr-HR/translation.json +++ b/src/lib/i18n/locales/hr-HR/translation.json @@ -561,6 +561,7 @@ "Listening...": "Slušam...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM-ovi mogu pogriješiti. Provjerite važne informacije.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokalni modeli", "Lost": "", diff --git a/src/lib/i18n/locales/hu-HU/translation.json b/src/lib/i18n/locales/hu-HU/translation.json index 0b9e15140d..7b4e388198 100644 --- a/src/lib/i18n/locales/hu-HU/translation.json +++ b/src/lib/i18n/locales/hu-HU/translation.json @@ -561,6 +561,7 @@ "Listening...": "Hallgatás...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Az LLM-ek hibázhatnak. Ellenőrizze a fontos információkat.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Helyi modellek", "Lost": "Elveszett", diff --git a/src/lib/i18n/locales/id-ID/translation.json b/src/lib/i18n/locales/id-ID/translation.json index 9c4d477a0f..4d9a9e8a1b 100644 --- a/src/lib/i18n/locales/id-ID/translation.json +++ b/src/lib/i18n/locales/id-ID/translation.json @@ -561,6 +561,7 @@ "Listening...": "Mendengarkan", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM dapat membuat kesalahan. Verifikasi informasi penting.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Model Lokal", "Lost": "", diff --git a/src/lib/i18n/locales/ie-GA/translation.json b/src/lib/i18n/locales/ie-GA/translation.json index a07d360652..21e6005ed5 100644 --- a/src/lib/i18n/locales/ie-GA/translation.json +++ b/src/lib/i18n/locales/ie-GA/translation.json @@ -561,6 +561,7 @@ "Listening...": "Éisteacht...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Is féidir le LLManna botúin a dhéanamh. Fíoraigh faisnéis thábhachtach.", + "Loading {{count}} sites": "", "Local": "Áitiúil", "Local Models": "Múnlaí Áitiúla", "Lost": "Cailleadh", diff --git a/src/lib/i18n/locales/it-IT/translation.json b/src/lib/i18n/locales/it-IT/translation.json index 2fa94bace6..97b0a5e44e 100644 --- a/src/lib/i18n/locales/it-IT/translation.json +++ b/src/lib/i18n/locales/it-IT/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Gli LLM possono commettere errori. Verifica le informazioni importanti.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/ja-JP/translation.json b/src/lib/i18n/locales/ja-JP/translation.json index 492acce1b7..ca1f5d1ff8 100644 --- a/src/lib/i18n/locales/ja-JP/translation.json +++ b/src/lib/i18n/locales/ja-JP/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM は間違いを犯す可能性があります。重要な情報を検証してください。", + "Loading {{count}} sites": "", "Local": "", "Local Models": "ローカルモデル", "Lost": "", diff --git a/src/lib/i18n/locales/ka-GE/translation.json b/src/lib/i18n/locales/ka-GE/translation.json index b26aa6c73e..2d93798e57 100644 --- a/src/lib/i18n/locales/ka-GE/translation.json +++ b/src/lib/i18n/locales/ka-GE/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "შესაძლოა LLM-ებმა შეცდომები დაუშვან. გადაამოწმეთ მნიშვნელოვანი ინფორმაცია.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/ko-KR/translation.json b/src/lib/i18n/locales/ko-KR/translation.json index ff33f8b967..c34103efab 100644 --- a/src/lib/i18n/locales/ko-KR/translation.json +++ b/src/lib/i18n/locales/ko-KR/translation.json @@ -561,6 +561,7 @@ "Listening...": "듣는 중...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM에 오류가 있을 수 있습니다. 중요한 정보는 확인이 필요합니다.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "로컬 모델", "Lost": "패배", diff --git a/src/lib/i18n/locales/lt-LT/translation.json b/src/lib/i18n/locales/lt-LT/translation.json index b562d3f7c6..e21ed8f805 100644 --- a/src/lib/i18n/locales/lt-LT/translation.json +++ b/src/lib/i18n/locales/lt-LT/translation.json @@ -561,6 +561,7 @@ "Listening...": "Klausoma...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Dideli kalbos modeliai gali klysti. Patikrinkite atsakymų teisingumą.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokalūs modeliai", "Lost": "", diff --git a/src/lib/i18n/locales/ms-MY/translation.json b/src/lib/i18n/locales/ms-MY/translation.json index 950a065afc..a37f71376d 100644 --- a/src/lib/i18n/locales/ms-MY/translation.json +++ b/src/lib/i18n/locales/ms-MY/translation.json @@ -561,6 +561,7 @@ "Listening...": "Mendengar...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM boleh membuat kesilapan. Sahkan maklumat penting", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Model Tempatan", "Lost": "", diff --git a/src/lib/i18n/locales/nb-NO/translation.json b/src/lib/i18n/locales/nb-NO/translation.json index 1ce3a9aa96..9619ad9e10 100644 --- a/src/lib/i18n/locales/nb-NO/translation.json +++ b/src/lib/i18n/locales/nb-NO/translation.json @@ -561,6 +561,7 @@ "Listening...": "Lytter ...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Språkmodeller kan gjøre feil. Kontroller viktige opplysninger.", + "Loading {{count}} sites": "", "Local": "Lokal", "Local Models": "Lokale modeller", "Lost": "Tapt", diff --git a/src/lib/i18n/locales/nl-NL/translation.json b/src/lib/i18n/locales/nl-NL/translation.json index 68764d8a8a..dd82213ed1 100644 --- a/src/lib/i18n/locales/nl-NL/translation.json +++ b/src/lib/i18n/locales/nl-NL/translation.json @@ -561,6 +561,7 @@ "Listening...": "Aan het luisteren...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs kunnen fouten maken. Verifieer belangrijke informatie.", + "Loading {{count}} sites": "", "Local": "Lokaal", "Local Models": "Lokale modellen", "Lost": "Verloren", diff --git a/src/lib/i18n/locales/pa-IN/translation.json b/src/lib/i18n/locales/pa-IN/translation.json index 58e8bf3398..59826ad7c2 100644 --- a/src/lib/i18n/locales/pa-IN/translation.json +++ b/src/lib/i18n/locales/pa-IN/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs ਗਲਤੀਆਂ ਕਰ ਸਕਦੇ ਹਨ। ਮਹੱਤਵਪੂਰਨ ਜਾਣਕਾਰੀ ਦੀ ਪੁਸ਼ਟੀ ਕਰੋ।", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/pl-PL/translation.json b/src/lib/i18n/locales/pl-PL/translation.json index 64168bfe3c..aaf9c2c8cb 100644 --- a/src/lib/i18n/locales/pl-PL/translation.json +++ b/src/lib/i18n/locales/pl-PL/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMy mogą popełniać błędy. Zweryfikuj ważne informacje.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/pt-BR/translation.json b/src/lib/i18n/locales/pt-BR/translation.json index 4c416f409b..3773f43141 100644 --- a/src/lib/i18n/locales/pt-BR/translation.json +++ b/src/lib/i18n/locales/pt-BR/translation.json @@ -561,6 +561,7 @@ "Listening...": "Escutando...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs podem cometer erros. Verifique informações importantes.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Modelos Locais", "Lost": "Perdeu", diff --git a/src/lib/i18n/locales/pt-PT/translation.json b/src/lib/i18n/locales/pt-PT/translation.json index 092669629c..864fa90d51 100644 --- a/src/lib/i18n/locales/pt-PT/translation.json +++ b/src/lib/i18n/locales/pt-PT/translation.json @@ -561,6 +561,7 @@ "Listening...": "A escutar...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs podem cometer erros. Verifique informações importantes.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Modelos Locais", "Lost": "", diff --git a/src/lib/i18n/locales/ro-RO/translation.json b/src/lib/i18n/locales/ro-RO/translation.json index 01994c6317..0de81364b6 100644 --- a/src/lib/i18n/locales/ro-RO/translation.json +++ b/src/lib/i18n/locales/ro-RO/translation.json @@ -561,6 +561,7 @@ "Listening...": "Ascult...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM-urile pot face greșeli. Verificați informațiile importante.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Modele Locale", "Lost": "Pierdut", diff --git a/src/lib/i18n/locales/ru-RU/translation.json b/src/lib/i18n/locales/ru-RU/translation.json index b4d6aa916d..34ef2a98a5 100644 --- a/src/lib/i18n/locales/ru-RU/translation.json +++ b/src/lib/i18n/locales/ru-RU/translation.json @@ -561,6 +561,7 @@ "Listening...": "Слушаю...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs могут допускать ошибки. Проверяйте важную информацию.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Локальные модели", "Lost": "", diff --git a/src/lib/i18n/locales/sk-SK/translation.json b/src/lib/i18n/locales/sk-SK/translation.json index 29d8622ea2..f720bfed8d 100644 --- a/src/lib/i18n/locales/sk-SK/translation.json +++ b/src/lib/i18n/locales/sk-SK/translation.json @@ -561,6 +561,7 @@ "Listening...": "Počúvanie...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM môžu robiť chyby. Overte si dôležité informácie.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokálne modely", "Lost": "Stratený", diff --git a/src/lib/i18n/locales/sr-RS/translation.json b/src/lib/i18n/locales/sr-RS/translation.json index 8dc2556551..a6d13b4dd1 100644 --- a/src/lib/i18n/locales/sr-RS/translation.json +++ b/src/lib/i18n/locales/sr-RS/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "ВЈМ-ови (LLM-ови) могу правити грешке. Проверите важне податке.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "Пораза", diff --git a/src/lib/i18n/locales/sv-SE/translation.json b/src/lib/i18n/locales/sv-SE/translation.json index 0abc82341c..ae936e3ee1 100644 --- a/src/lib/i18n/locales/sv-SE/translation.json +++ b/src/lib/i18n/locales/sv-SE/translation.json @@ -561,6 +561,7 @@ "Listening...": "Lyssnar...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM:er kan göra misstag. Granska viktig information.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokala modeller", "Lost": "", diff --git a/src/lib/i18n/locales/th-TH/translation.json b/src/lib/i18n/locales/th-TH/translation.json index f1f23478bd..a57eaa3b47 100644 --- a/src/lib/i18n/locales/th-TH/translation.json +++ b/src/lib/i18n/locales/th-TH/translation.json @@ -561,6 +561,7 @@ "Listening...": "กำลังฟัง...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs สามารถทำผิดพลาดได้ ตรวจสอบข้อมูลสำคัญ", + "Loading {{count}} sites": "", "Local": "", "Local Models": "โมเดลท้องถิ่น", "Lost": "", diff --git a/src/lib/i18n/locales/tk-TW/translation.json b/src/lib/i18n/locales/tk-TW/translation.json index 3075e7c0f1..108c1d9e8c 100644 --- a/src/lib/i18n/locales/tk-TW/translation.json +++ b/src/lib/i18n/locales/tk-TW/translation.json @@ -561,6 +561,7 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/tr-TR/translation.json b/src/lib/i18n/locales/tr-TR/translation.json index e6d05dfe16..892db13e67 100644 --- a/src/lib/i18n/locales/tr-TR/translation.json +++ b/src/lib/i18n/locales/tr-TR/translation.json @@ -561,6 +561,7 @@ "Listening...": "Dinleniyor...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM'ler hata yapabilir. Önemli bilgileri doğrulayın.", + "Loading {{count}} sites": "", "Local": "Yerel", "Local Models": "Yerel Modeller", "Lost": "Kayıp", diff --git a/src/lib/i18n/locales/uk-UA/translation.json b/src/lib/i18n/locales/uk-UA/translation.json index 449958c2b5..c971d0049d 100644 --- a/src/lib/i18n/locales/uk-UA/translation.json +++ b/src/lib/i18n/locales/uk-UA/translation.json @@ -561,6 +561,7 @@ "Listening...": "Слухаю...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs можуть помилятися. Перевірте важливу інформацію.", + "Loading {{count}} sites": "", "Local": "Локальний", "Local Models": "Локальні моделі", "Lost": "Втрачене", diff --git a/src/lib/i18n/locales/ur-PK/translation.json b/src/lib/i18n/locales/ur-PK/translation.json index 437d943ae2..31662f6951 100644 --- a/src/lib/i18n/locales/ur-PK/translation.json +++ b/src/lib/i18n/locales/ur-PK/translation.json @@ -561,6 +561,7 @@ "Listening...": "سن رہے ہیں...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "ایل ایل ایم غلطیاں کر سکتے ہیں اہم معلومات کی تصدیق کریں", + "Loading {{count}} sites": "", "Local": "", "Local Models": "مقامی ماڈلز", "Lost": "گم شدہ", diff --git a/src/lib/i18n/locales/vi-VN/translation.json b/src/lib/i18n/locales/vi-VN/translation.json index c786b67a7a..f6f8db5521 100644 --- a/src/lib/i18n/locales/vi-VN/translation.json +++ b/src/lib/i18n/locales/vi-VN/translation.json @@ -561,6 +561,7 @@ "Listening...": "Đang nghe...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Hệ thống có thể tạo ra nội dung không chính xác hoặc sai. Hãy kiểm chứng kỹ lưỡng thông tin trước khi tiếp nhận và sử dụng.", + "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/zh-CN/translation.json b/src/lib/i18n/locales/zh-CN/translation.json index da26f9cc92..9d81494715 100644 --- a/src/lib/i18n/locales/zh-CN/translation.json +++ b/src/lib/i18n/locales/zh-CN/translation.json @@ -561,6 +561,7 @@ "Listening...": "正在倾听...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "大语言模型可能会生成误导性错误信息,请对关键信息加以验证。", + "Loading {{count}} sites": "", "Local": "本地", "Local Models": "本地模型", "Lost": "落败", diff --git a/src/lib/i18n/locales/zh-TW/translation.json b/src/lib/i18n/locales/zh-TW/translation.json index 81db887669..fb22677fa8 100644 --- a/src/lib/i18n/locales/zh-TW/translation.json +++ b/src/lib/i18n/locales/zh-TW/translation.json @@ -561,6 +561,7 @@ "Listening...": "正在聆聽...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "大型語言模型可能會出錯。請驗證重要資訊。", + "Loading {{count}} sites": "", "Local": "本機", "Local Models": "本機模型", "Lost": "已遺失", From a84e488a4ea681c580a2b9cca22fe176f8c0014c Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Sat, 1 Feb 2025 22:58:28 -0600 Subject: [PATCH 004/232] Fix playwright in docker by updating unstructured --- backend/open_webui/retrieval/web/utils.py | 6 +++--- backend/requirements.txt | 2 +- pyproject.toml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 3c0c34074c..0568c795c4 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -175,7 +175,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): yield document except Exception as e: if self.continue_on_failure: - log.error(f"Error processing {url}, exception: {e}") + log.exception(e, "Error loading %s", url) continue raise e @@ -190,7 +190,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): yield document except Exception as e: if self.continue_on_failure: - log.error(f"Error processing {url}, exception: {e}") + log.exception(e, "Error loading %s", url) continue raise e @@ -210,7 +210,7 @@ class SafeWebBaseLoader(WebBaseLoader): yield Document(page_content=text, metadata=metadata) except Exception as e: # Log the error and continue with the next URL - log.error(f"Error loading {path}: {e}") + log.exception(e, "Error loading %s", path) RAG_WEB_LOADERS = defaultdict(lambda: SafeWebBaseLoader) RAG_WEB_LOADERS["playwright"] = SafePlaywrightURLLoader diff --git a/backend/requirements.txt b/backend/requirements.txt index bb124bf118..cf5cb4a2f5 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -60,7 +60,7 @@ fpdf2==2.8.2 pymdown-extensions==10.11.2 docx2txt==0.8 python-pptx==1.0.0 -unstructured==0.15.9 +unstructured==0.16.17 nltk==3.9.1 Markdown==3.7 pypandoc==1.13 diff --git a/pyproject.toml b/pyproject.toml index 41c79ddb83..6e7f607b3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ dependencies = [ "pymdown-extensions==10.11.2", "docx2txt==0.8", "python-pptx==1.0.0", - "unstructured==0.15.9", + "unstructured==0.16.17", "nltk==3.9.1", "Markdown==3.7", "pypandoc==1.13", From 8da33721d563754becd0d03bf86605441e0bd9e3 Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Sun, 2 Feb 2025 17:58:09 -0600 Subject: [PATCH 005/232] Support PLAYWRIGHT_WS_URI --- backend/open_webui/config.py | 6 ++ backend/open_webui/main.py | 2 + backend/open_webui/retrieval/web/utils.py | 121 ++++++++++++++-------- backend/start.sh | 8 +- backend/start_windows.bat | 8 +- 5 files changed, 97 insertions(+), 48 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 278f506634..80e1e7ab24 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1724,6 +1724,12 @@ RAG_WEB_LOADER = PersistentConfig( os.environ.get("RAG_WEB_LOADER", "safe_web") ) +PLAYWRIGHT_WS_URI = PersistentConfig( + "PLAYWRIGHT_WS_URI", + "rag.web.loader.playwright.ws.uri", + os.environ.get("PLAYWRIGHT_WS_URI", None) +) + #################################### # Images #################################### diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index c5dfad0477..fd8a4c957d 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -129,6 +129,7 @@ from open_webui.config import ( AUDIO_TTS_VOICE, AUDIO_TTS_AZURE_SPEECH_REGION, AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT, + PLAYWRIGHT_WS_URI, RAG_WEB_LOADER, WHISPER_MODEL, WHISPER_MODEL_AUTO_UPDATE, @@ -528,6 +529,7 @@ app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = BING_SEARCH_V7_SUBSCRIPTION_K app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS app.state.config.RAG_WEB_LOADER = RAG_WEB_LOADER +app.state.config.PLAYWRIGHT_WS_URI = PLAYWRIGHT_WS_URI app.state.EMBEDDING_FUNCTION = None app.state.ef = None diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 0568c795c4..3c77402c39 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -16,7 +16,7 @@ from langchain_core.documents import Document from open_webui.constants import ERROR_MESSAGES -from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH, RAG_WEB_LOADER +from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH, PLAYWRIGHT_WS_URI, RAG_WEB_LOADER from open_webui.env import SRC_LOG_LEVELS import logging @@ -83,7 +83,7 @@ def extract_metadata(soup, url): return metadata class SafePlaywrightURLLoader(PlaywrightURLLoader): - """Load HTML pages safely with Playwright, supporting SSL verification and rate limiting. + """Load HTML pages safely with Playwright, supporting SSL verification, rate limiting, and remote browser connection. Attributes: urls (List[str]): List of URLs to load. @@ -91,6 +91,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): requests_per_second (Optional[float]): Number of requests per second to limit to. continue_on_failure (bool): If True, continue loading other URLs on failure. headless (bool): If True, the browser will run in headless mode. + playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection. """ def __init__( @@ -101,19 +102,80 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): continue_on_failure: bool = True, headless: bool = True, remove_selectors: Optional[List[str]] = None, - proxy: Optional[Dict[str, str]] = None + proxy: Optional[Dict[str, str]] = None, + playwright_ws_url: Optional[str] = None ): - """Initialize with additional safety parameters.""" + """Initialize with additional safety parameters and remote browser support.""" + # We'll set headless to False if using playwright_ws_url since it's handled by the remote browser super().__init__( urls=urls, continue_on_failure=continue_on_failure, - headless=headless, + headless=headless if playwright_ws_url is None else False, remove_selectors=remove_selectors, proxy=proxy ) self.verify_ssl = verify_ssl self.requests_per_second = requests_per_second self.last_request_time = None + self.playwright_ws_url = playwright_ws_url + + def lazy_load(self) -> Iterator[Document]: + """Safely load URLs synchronously with support for remote browser.""" + from playwright.sync_api import sync_playwright + + with sync_playwright() as p: + # Use remote browser if ws_endpoint is provided, otherwise use local browser + if self.playwright_ws_url: + browser = p.chromium.connect(self.playwright_ws_url) + else: + browser = p.chromium.launch(headless=self.headless, proxy=self.proxy) + + for url in self.urls: + try: + self._safe_process_url_sync(url) + page = browser.new_page() + response = page.goto(url) + if response is None: + raise ValueError(f"page.goto() returned None for url {url}") + + text = self.evaluator.evaluate(page, browser, response) + metadata = {"source": url} + yield Document(page_content=text, metadata=metadata) + except Exception as e: + if self.continue_on_failure: + log.exception(e, "Error loading %s", url) + continue + raise e + browser.close() + + async def alazy_load(self) -> AsyncIterator[Document]: + """Safely load URLs asynchronously with support for remote browser.""" + from playwright.async_api import async_playwright + + async with async_playwright() as p: + # Use remote browser if ws_endpoint is provided, otherwise use local browser + if self.playwright_ws_url: + browser = await p.chromium.connect(self.playwright_ws_url) + else: + browser = await p.chromium.launch(headless=self.headless, proxy=self.proxy) + + for url in self.urls: + try: + await self._safe_process_url(url) + page = await browser.new_page() + response = await page.goto(url) + if response is None: + raise ValueError(f"page.goto() returned None for url {url}") + + text = await self.evaluator.evaluate_async(page, browser, response) + metadata = {"source": url} + yield Document(page_content=text, metadata=metadata) + except Exception as e: + if self.continue_on_failure: + log.exception(e, "Error loading %s", url) + continue + raise e + await browser.close() def _verify_ssl_cert(self, url: str) -> bool: """Verify SSL certificate for the given URL.""" @@ -164,36 +226,6 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): self._sync_wait_for_rate_limit() return True - async def alazy_load(self) -> AsyncIterator[Document]: - """Safely load URLs asynchronously.""" - parent_iterator = super().alazy_load() - - async for document in parent_iterator: - url = document.metadata["source"] - try: - await self._safe_process_url(url) - yield document - except Exception as e: - if self.continue_on_failure: - log.exception(e, "Error loading %s", url) - continue - raise e - - def lazy_load(self) -> Iterator[Document]: - """Safely load URLs synchronously.""" - parent_iterator = super().lazy_load() - - for document in parent_iterator: - url = document.metadata["source"] - try: - self._safe_process_url_sync(url) - yield document - except Exception as e: - if self.continue_on_failure: - log.exception(e, "Error loading %s", url) - continue - raise e - class SafeWebBaseLoader(WebBaseLoader): """WebBaseLoader with enhanced error handling for URLs.""" @@ -224,14 +256,19 @@ def get_web_loader( # Check if the URLs are valid safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls) - # Get the appropriate WebLoader based on the configuration + web_loader_args = { + "urls": safe_urls, + "verify_ssl": verify_ssl, + "requests_per_second": requests_per_second, + "continue_on_failure": True + } + + if PLAYWRIGHT_WS_URI.value: + web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value + + # Create the appropriate WebLoader based on the configuration WebLoaderClass = RAG_WEB_LOADERS[RAG_WEB_LOADER.value] - web_loader = WebLoaderClass( - safe_urls, - verify_ssl=verify_ssl, - requests_per_second=requests_per_second, - continue_on_failure=True, - ) + web_loader = WebLoaderClass(**web_loader_args) log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(safe_urls)) diff --git a/backend/start.sh b/backend/start.sh index 2501f413f0..3b08cf5491 100755 --- a/backend/start.sh +++ b/backend/start.sh @@ -5,9 +5,11 @@ cd "$SCRIPT_DIR" || exit # Add conditional Playwright browser installation if [[ "${RAG_WEB_LOADER,,}" == "playwright" ]]; then - echo "Installing Playwright browsers..." - playwright install chromium - playwright install-deps chromium + if [[ -z "${PLAYWRIGHT_WS_URI}" ]]; then + echo "Installing Playwright browsers..." + playwright install chromium + playwright install-deps chromium + fi python -c "import nltk; nltk.download('punkt_tab')" fi diff --git a/backend/start_windows.bat b/backend/start_windows.bat index 0f2792cc0c..036e1f721e 100644 --- a/backend/start_windows.bat +++ b/backend/start_windows.bat @@ -8,9 +8,11 @@ cd /d "%SCRIPT_DIR%" || exit /b :: Add conditional Playwright browser installation IF /I "%RAG_WEB_LOADER%" == "playwright" ( - echo Installing Playwright browsers... - playwright install chromium - playwright install-deps chromium + IF "%PLAYWRIGHT_WS_URI%" == "" ( + echo Installing Playwright browsers... + playwright install chromium + playwright install-deps chromium + ) python -c "import nltk; nltk.download('punkt_tab')" ) From c3df481b22d8bc13a7deb045e94b0bcf4235224e Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Sun, 2 Feb 2025 19:44:40 -0600 Subject: [PATCH 006/232] Introduce docker-compose.playwright.yaml + run-compose update --- backend/requirements.txt | 2 +- docker-compose.playwright.yaml | 10 ++++++++++ run-compose.sh | 9 +++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 docker-compose.playwright.yaml diff --git a/backend/requirements.txt b/backend/requirements.txt index cf5cb4a2f5..b08c176779 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -46,7 +46,7 @@ chromadb==0.6.2 pymilvus==2.5.0 qdrant-client~=1.12.0 opensearch-py==2.7.1 -playwright==1.49.1 +playwright==1.49.1 # Caution: version must match docker-compose.playwright.yaml transformers sentence-transformers==3.3.1 diff --git a/docker-compose.playwright.yaml b/docker-compose.playwright.yaml new file mode 100644 index 0000000000..0a4bb3f766 --- /dev/null +++ b/docker-compose.playwright.yaml @@ -0,0 +1,10 @@ +services: + playwright: + image: mcr.microsoft.com/playwright:v1.49.1-noble # Version must match requirements.txt + container_name: playwright + command: npx -y playwright@1.49.1 run-server --port 3000 --host 0.0.0.0 + + open-webui: + environment: + - 'RAG_WEB_LOADER=playwright' + - 'PLAYWRIGHT_WS_URI=ws://playwright:3000' \ No newline at end of file diff --git a/run-compose.sh b/run-compose.sh index 21574e9599..4fafedc6f7 100755 --- a/run-compose.sh +++ b/run-compose.sh @@ -74,6 +74,7 @@ usage() { echo " --enable-api[port=PORT] Enable API and expose it on the specified port." echo " --webui[port=PORT] Set the port for the web user interface." echo " --data[folder=PATH] Bind mount for ollama data folder (by default will create the 'ollama' volume)." + echo " --playwright Enable Playwright support for web scraping." echo " --build Build the docker image before running the compose project." echo " --drop Drop the compose project." echo " -q, --quiet Run script in headless mode." @@ -100,6 +101,7 @@ webui_port=3000 headless=false build_image=false kill_compose=false +enable_playwright=false # Function to extract value from the parameter extract_value() { @@ -129,6 +131,9 @@ while [[ $# -gt 0 ]]; do value=$(extract_value "$key") data_dir=${value:-"./ollama-data"} ;; + --playwright) + enable_playwright=true + ;; --drop) kill_compose=true ;; @@ -182,6 +187,9 @@ else DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.data.yaml" export OLLAMA_DATA_DIR=$data_dir # Set OLLAMA_DATA_DIR environment variable fi + if [[ $enable_playwright == true ]]; then + DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.playwright.yaml" + fi if [[ -n $webui_port ]]; then export OPEN_WEBUI_PORT=$webui_port # Set OPEN_WEBUI_PORT environment variable fi @@ -201,6 +209,7 @@ echo -e " ${GREEN}${BOLD}GPU Count:${NC} ${OLLAMA_GPU_COUNT:-Not Enabled}" echo -e " ${GREEN}${BOLD}WebAPI Port:${NC} ${OLLAMA_WEBAPI_PORT:-Not Enabled}" echo -e " ${GREEN}${BOLD}Data Folder:${NC} ${data_dir:-Using ollama volume}" echo -e " ${GREEN}${BOLD}WebUI Port:${NC} $webui_port" +echo -e " ${GREEN}${BOLD}Playwright:${NC} ${enable_playwright:-false}" echo if [[ $headless == true ]]; then From 1b581b714f6749e51bf17c49434976a0c57900c6 Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Mon, 3 Feb 2025 18:47:26 -0600 Subject: [PATCH 007/232] Moving code out of playwright branch --- backend/open_webui/retrieval/web/main.py | 4 ---- backend/open_webui/retrieval/web/utils.py | 19 +++++-------------- backend/open_webui/routers/retrieval.py | 16 +--------------- backend/open_webui/utils/middleware.py | 1 - .../chat/Messages/ResponseMessage.svelte | 1 - src/lib/i18n/locales/ar-BH/translation.json | 1 - src/lib/i18n/locales/bg-BG/translation.json | 1 - src/lib/i18n/locales/bn-BD/translation.json | 1 - src/lib/i18n/locales/ca-ES/translation.json | 1 - src/lib/i18n/locales/ceb-PH/translation.json | 1 - src/lib/i18n/locales/cs-CZ/translation.json | 1 - src/lib/i18n/locales/da-DK/translation.json | 1 - src/lib/i18n/locales/de-DE/translation.json | 1 - src/lib/i18n/locales/dg-DG/translation.json | 1 - src/lib/i18n/locales/el-GR/translation.json | 1 - src/lib/i18n/locales/en-GB/translation.json | 1 - src/lib/i18n/locales/en-US/translation.json | 1 - src/lib/i18n/locales/es-ES/translation.json | 1 - src/lib/i18n/locales/eu-ES/translation.json | 1 - src/lib/i18n/locales/fa-IR/translation.json | 1 - src/lib/i18n/locales/fi-FI/translation.json | 1 - src/lib/i18n/locales/fr-CA/translation.json | 1 - src/lib/i18n/locales/fr-FR/translation.json | 1 - src/lib/i18n/locales/he-IL/translation.json | 1 - src/lib/i18n/locales/hi-IN/translation.json | 1 - src/lib/i18n/locales/hr-HR/translation.json | 1 - src/lib/i18n/locales/hu-HU/translation.json | 1 - src/lib/i18n/locales/id-ID/translation.json | 1 - src/lib/i18n/locales/ie-GA/translation.json | 1 - src/lib/i18n/locales/it-IT/translation.json | 1 - src/lib/i18n/locales/ja-JP/translation.json | 1 - src/lib/i18n/locales/ka-GE/translation.json | 1 - src/lib/i18n/locales/ko-KR/translation.json | 1 - src/lib/i18n/locales/lt-LT/translation.json | 1 - src/lib/i18n/locales/ms-MY/translation.json | 1 - src/lib/i18n/locales/nb-NO/translation.json | 1 - src/lib/i18n/locales/nl-NL/translation.json | 1 - src/lib/i18n/locales/pa-IN/translation.json | 1 - src/lib/i18n/locales/pl-PL/translation.json | 1 - src/lib/i18n/locales/pt-BR/translation.json | 1 - src/lib/i18n/locales/pt-PT/translation.json | 1 - src/lib/i18n/locales/ro-RO/translation.json | 1 - src/lib/i18n/locales/ru-RU/translation.json | 1 - src/lib/i18n/locales/sk-SK/translation.json | 1 - src/lib/i18n/locales/sr-RS/translation.json | 1 - src/lib/i18n/locales/sv-SE/translation.json | 1 - src/lib/i18n/locales/th-TH/translation.json | 1 - src/lib/i18n/locales/tk-TW/translation.json | 1 - src/lib/i18n/locales/tr-TR/translation.json | 1 - src/lib/i18n/locales/uk-UA/translation.json | 1 - src/lib/i18n/locales/ur-PK/translation.json | 1 - src/lib/i18n/locales/vi-VN/translation.json | 1 - src/lib/i18n/locales/zh-CN/translation.json | 1 - src/lib/i18n/locales/zh-TW/translation.json | 1 - 54 files changed, 6 insertions(+), 84 deletions(-) diff --git a/backend/open_webui/retrieval/web/main.py b/backend/open_webui/retrieval/web/main.py index 28a749e7d2..1af8a70aa1 100644 --- a/backend/open_webui/retrieval/web/main.py +++ b/backend/open_webui/retrieval/web/main.py @@ -1,5 +1,3 @@ -import validators - from typing import Optional from urllib.parse import urlparse @@ -12,8 +10,6 @@ def get_filtered_results(results, filter_list): filtered_results = [] for result in results: url = result.get("url") or result.get("link", "") - if not validators.url(url): - continue domain = urlparse(url).netloc if any(domain.endswith(filtered_domain) for filtered_domain in filter_list): filtered_results.append(result) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 3c77402c39..ddbdc60042 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -48,16 +48,6 @@ def validate_url(url: Union[str, Sequence[str]]): else: return False -def safe_validate_urls(url: Sequence[str]) -> Sequence[str]: - valid_urls = [] - for u in url: - try: - if validate_url(u): - valid_urls.append(u) - except ValueError: - continue - return valid_urls - def resolve_hostname(hostname): # Get address information addr_info = socket.getaddrinfo(hostname, None) @@ -253,11 +243,12 @@ def get_web_loader( verify_ssl: bool = True, requests_per_second: int = 2, ): - # Check if the URLs are valid - safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls) + # Check if the URL is valid + if not validate_url(urls): + raise ValueError(ERROR_MESSAGES.INVALID_URL) web_loader_args = { - "urls": safe_urls, + "urls": urls, "verify_ssl": verify_ssl, "requests_per_second": requests_per_second, "continue_on_failure": True @@ -270,6 +261,6 @@ def get_web_loader( WebLoaderClass = RAG_WEB_LOADERS[RAG_WEB_LOADER.value] web_loader = WebLoaderClass(**web_loader_args) - log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(safe_urls)) + log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(urls)) return web_loader \ No newline at end of file diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 5076980844..65fa12ab27 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1239,10 +1239,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: @router.post("/process/web/search") async def process_web_search( - request: Request, form_data: SearchForm, extra_params: dict, user=Depends(get_verified_user) + request: Request, form_data: SearchForm, user=Depends(get_verified_user) ): - event_emitter = extra_params["__event_emitter__"] - try: logging.info( f"trying to web search with {request.app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query}" @@ -1260,18 +1258,6 @@ async def process_web_search( log.debug(f"web_results: {web_results}") - await event_emitter( - { - "type": "status", - "data": { - "action": "web_search", - "description": "Loading {{count}} sites", - "urls": [result.link for result in web_results], - "done": False - }, - } - ) - try: collection_name = form_data.collection_name if collection_name == "" or collection_name is None: diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 961e57b9e6..77b820cd9d 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -443,7 +443,6 @@ async def chat_web_search_handler( "query": searchQuery, } ), - extra_params=extra_params, user=user ) diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index 479180ae80..f6a4b0bc0a 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -585,7 +585,6 @@ : ''} text-gray-500 dark:text-gray-500 text-base line-clamp-1 text-wrap" > - {#if status?.description.includes('{{searchQuery}}')} {$i18n.t(status?.description, { searchQuery: status?.query diff --git a/src/lib/i18n/locales/ar-BH/translation.json b/src/lib/i18n/locales/ar-BH/translation.json index 8149c9fe63..98a4e557b6 100644 --- a/src/lib/i18n/locales/ar-BH/translation.json +++ b/src/lib/i18n/locales/ar-BH/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "يمكن أن تصدر بعض الأخطاء. لذلك يجب التحقق من المعلومات المهمة", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/bg-BG/translation.json b/src/lib/i18n/locales/bg-BG/translation.json index e85f7bd531..e12120e77b 100644 --- a/src/lib/i18n/locales/bg-BG/translation.json +++ b/src/lib/i18n/locales/bg-BG/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs могат да правят грешки. Проверете важните данни.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/bn-BD/translation.json b/src/lib/i18n/locales/bn-BD/translation.json index cfd4ccd0cc..9aba7a61d8 100644 --- a/src/lib/i18n/locales/bn-BD/translation.json +++ b/src/lib/i18n/locales/bn-BD/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM ভুল করতে পারে। গুরুত্বপূর্ণ তথ্য যাচাই করে নিন।", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/ca-ES/translation.json b/src/lib/i18n/locales/ca-ES/translation.json index 6ebd4a7907..6de03ddda2 100644 --- a/src/lib/i18n/locales/ca-ES/translation.json +++ b/src/lib/i18n/locales/ca-ES/translation.json @@ -565,7 +565,6 @@ "Listening...": "Escoltant...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "Els models de llenguatge poden cometre errors. Verifica la informació important.", - "Loading {{count}} sites": "", "Local": "Local", "Local Models": "Models locals", "Lost": "Perdut", diff --git a/src/lib/i18n/locales/ceb-PH/translation.json b/src/lib/i18n/locales/ceb-PH/translation.json index 3ca5045f0b..8d85343a19 100644 --- a/src/lib/i18n/locales/ceb-PH/translation.json +++ b/src/lib/i18n/locales/ceb-PH/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Ang mga LLM mahimong masayop. ", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/cs-CZ/translation.json b/src/lib/i18n/locales/cs-CZ/translation.json index 76d4b9d394..fe7c4cef35 100644 --- a/src/lib/i18n/locales/cs-CZ/translation.json +++ b/src/lib/i18n/locales/cs-CZ/translation.json @@ -565,7 +565,6 @@ "Listening...": "Poslouchání...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM mohou dělat chyby. Ověřte si důležité informace.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokální modely", "Lost": "Ztracený", diff --git a/src/lib/i18n/locales/da-DK/translation.json b/src/lib/i18n/locales/da-DK/translation.json index 79c9595421..a358dc25b4 100644 --- a/src/lib/i18n/locales/da-DK/translation.json +++ b/src/lib/i18n/locales/da-DK/translation.json @@ -565,7 +565,6 @@ "Listening...": "Lytter...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM'er kan lave fejl. Bekræft vigtige oplysninger.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokale modeller", "Lost": "", diff --git a/src/lib/i18n/locales/de-DE/translation.json b/src/lib/i18n/locales/de-DE/translation.json index f8d09d3225..64ec0776da 100644 --- a/src/lib/i18n/locales/de-DE/translation.json +++ b/src/lib/i18n/locales/de-DE/translation.json @@ -565,7 +565,6 @@ "Listening...": "Höre zu...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs können Fehler machen. Überprüfe wichtige Informationen.", - "Loading {{count}} sites": "", "Local": "Lokal", "Local Models": "Lokale Modelle", "Lost": "Verloren", diff --git a/src/lib/i18n/locales/dg-DG/translation.json b/src/lib/i18n/locales/dg-DG/translation.json index 9439b4b370..6733ae692c 100644 --- a/src/lib/i18n/locales/dg-DG/translation.json +++ b/src/lib/i18n/locales/dg-DG/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs can make borks. Verify important info.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/el-GR/translation.json b/src/lib/i18n/locales/el-GR/translation.json index 7e531fc94c..c44018fe0a 100644 --- a/src/lib/i18n/locales/el-GR/translation.json +++ b/src/lib/i18n/locales/el-GR/translation.json @@ -565,7 +565,6 @@ "Listening...": "Ακούγεται...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Τα LLM μπορούν να κάνουν λάθη. Επαληθεύστε σημαντικές πληροφορίες.", - "Loading {{count}} sites": "", "Local": "Τοπικό", "Local Models": "Τοπικά Μοντέλα", "Lost": "Χαμένος", diff --git a/src/lib/i18n/locales/en-GB/translation.json b/src/lib/i18n/locales/en-GB/translation.json index da563d9207..8d8c14864d 100644 --- a/src/lib/i18n/locales/en-GB/translation.json +++ b/src/lib/i18n/locales/en-GB/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/en-US/translation.json b/src/lib/i18n/locales/en-US/translation.json index da563d9207..8d8c14864d 100644 --- a/src/lib/i18n/locales/en-US/translation.json +++ b/src/lib/i18n/locales/en-US/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/es-ES/translation.json b/src/lib/i18n/locales/es-ES/translation.json index 4a0162a53c..1b75f4727c 100644 --- a/src/lib/i18n/locales/es-ES/translation.json +++ b/src/lib/i18n/locales/es-ES/translation.json @@ -565,7 +565,6 @@ "Listening...": "Escuchando...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Los LLM pueden cometer errores. Verifica la información importante.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Modelos locales", "Lost": "", diff --git a/src/lib/i18n/locales/eu-ES/translation.json b/src/lib/i18n/locales/eu-ES/translation.json index 1bbb0fbe36..47c5df84e4 100644 --- a/src/lib/i18n/locales/eu-ES/translation.json +++ b/src/lib/i18n/locales/eu-ES/translation.json @@ -565,7 +565,6 @@ "Listening...": "Entzuten...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMek akatsak egin ditzakete. Egiaztatu informazio garrantzitsua.", - "Loading {{count}} sites": "", "Local": "Lokala", "Local Models": "Modelo lokalak", "Lost": "Galduta", diff --git a/src/lib/i18n/locales/fa-IR/translation.json b/src/lib/i18n/locales/fa-IR/translation.json index 80a646e4f3..75609ffdf5 100644 --- a/src/lib/i18n/locales/fa-IR/translation.json +++ b/src/lib/i18n/locales/fa-IR/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "مدل\u200cهای زبانی بزرگ می\u200cتوانند اشتباه کنند. اطلاعات مهم را راستی\u200cآزمایی کنید.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/fi-FI/translation.json b/src/lib/i18n/locales/fi-FI/translation.json index bd60c57896..e9f7f56536 100644 --- a/src/lib/i18n/locales/fi-FI/translation.json +++ b/src/lib/i18n/locales/fi-FI/translation.json @@ -565,7 +565,6 @@ "Listening...": "Kuuntelee...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Kielimallit voivat tehdä virheitä. Tarkista tärkeät tiedot.", - "Loading {{count}} sites": "", "Local": "Paikallinen", "Local Models": "Paikalliset mallit", "Lost": "Mennyt", diff --git a/src/lib/i18n/locales/fr-CA/translation.json b/src/lib/i18n/locales/fr-CA/translation.json index 6f507c1331..a6abf89089 100644 --- a/src/lib/i18n/locales/fr-CA/translation.json +++ b/src/lib/i18n/locales/fr-CA/translation.json @@ -565,7 +565,6 @@ "Listening...": "En train d'écouter...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Les LLM peuvent faire des erreurs. Vérifiez les informations importantes.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Modèles locaux", "Lost": "", diff --git a/src/lib/i18n/locales/fr-FR/translation.json b/src/lib/i18n/locales/fr-FR/translation.json index 491a86f88b..d188ff917c 100644 --- a/src/lib/i18n/locales/fr-FR/translation.json +++ b/src/lib/i18n/locales/fr-FR/translation.json @@ -565,7 +565,6 @@ "Listening...": "Écoute en cours...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "Les LLM peuvent faire des erreurs. Vérifiez les informations importantes.", - "Loading {{count}} sites": "", "Local": "Local", "Local Models": "Modèles locaux", "Lost": "Perdu", diff --git a/src/lib/i18n/locales/he-IL/translation.json b/src/lib/i18n/locales/he-IL/translation.json index 06599d9d05..695fd575e2 100644 --- a/src/lib/i18n/locales/he-IL/translation.json +++ b/src/lib/i18n/locales/he-IL/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "מודלים בשפה טבעית יכולים לטעות. אמת מידע חשוב.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/hi-IN/translation.json b/src/lib/i18n/locales/hi-IN/translation.json index 70d4584e1c..e98ca6e0ee 100644 --- a/src/lib/i18n/locales/hi-IN/translation.json +++ b/src/lib/i18n/locales/hi-IN/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "एलएलएम गलतियाँ कर सकते हैं। महत्वपूर्ण जानकारी सत्यापित करें.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/hr-HR/translation.json b/src/lib/i18n/locales/hr-HR/translation.json index b4a27a7817..61c6f76013 100644 --- a/src/lib/i18n/locales/hr-HR/translation.json +++ b/src/lib/i18n/locales/hr-HR/translation.json @@ -565,7 +565,6 @@ "Listening...": "Slušam...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM-ovi mogu pogriješiti. Provjerite važne informacije.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokalni modeli", "Lost": "", diff --git a/src/lib/i18n/locales/hu-HU/translation.json b/src/lib/i18n/locales/hu-HU/translation.json index 764cf2db51..14818eeb15 100644 --- a/src/lib/i18n/locales/hu-HU/translation.json +++ b/src/lib/i18n/locales/hu-HU/translation.json @@ -565,7 +565,6 @@ "Listening...": "Hallgatás...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Az LLM-ek hibázhatnak. Ellenőrizze a fontos információkat.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Helyi modellek", "Lost": "Elveszett", diff --git a/src/lib/i18n/locales/id-ID/translation.json b/src/lib/i18n/locales/id-ID/translation.json index e47acd089b..6a61ea7178 100644 --- a/src/lib/i18n/locales/id-ID/translation.json +++ b/src/lib/i18n/locales/id-ID/translation.json @@ -565,7 +565,6 @@ "Listening...": "Mendengarkan", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM dapat membuat kesalahan. Verifikasi informasi penting.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Model Lokal", "Lost": "", diff --git a/src/lib/i18n/locales/ie-GA/translation.json b/src/lib/i18n/locales/ie-GA/translation.json index 1e114503bf..4f320ba7f6 100644 --- a/src/lib/i18n/locales/ie-GA/translation.json +++ b/src/lib/i18n/locales/ie-GA/translation.json @@ -565,7 +565,6 @@ "Listening...": "Éisteacht...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Is féidir le LLManna botúin a dhéanamh. Fíoraigh faisnéis thábhachtach.", - "Loading {{count}} sites": "", "Local": "Áitiúil", "Local Models": "Múnlaí Áitiúla", "Lost": "Cailleadh", diff --git a/src/lib/i18n/locales/it-IT/translation.json b/src/lib/i18n/locales/it-IT/translation.json index 08954b5025..2770d9c9ce 100644 --- a/src/lib/i18n/locales/it-IT/translation.json +++ b/src/lib/i18n/locales/it-IT/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Gli LLM possono commettere errori. Verifica le informazioni importanti.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/ja-JP/translation.json b/src/lib/i18n/locales/ja-JP/translation.json index e529a6092d..baa80bc2b1 100644 --- a/src/lib/i18n/locales/ja-JP/translation.json +++ b/src/lib/i18n/locales/ja-JP/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM は間違いを犯す可能性があります。重要な情報を検証してください。", - "Loading {{count}} sites": "", "Local": "", "Local Models": "ローカルモデル", "Lost": "", diff --git a/src/lib/i18n/locales/ka-GE/translation.json b/src/lib/i18n/locales/ka-GE/translation.json index ffb1eb9fc7..edac4d43f7 100644 --- a/src/lib/i18n/locales/ka-GE/translation.json +++ b/src/lib/i18n/locales/ka-GE/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "შესაძლოა LLM-ებმა შეცდომები დაუშვან. გადაამოწმეთ მნიშვნელოვანი ინფორმაცია.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/ko-KR/translation.json b/src/lib/i18n/locales/ko-KR/translation.json index 56703424fe..43de6554d8 100644 --- a/src/lib/i18n/locales/ko-KR/translation.json +++ b/src/lib/i18n/locales/ko-KR/translation.json @@ -565,7 +565,6 @@ "Listening...": "듣는 중...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM에 오류가 있을 수 있습니다. 중요한 정보는 확인이 필요합니다.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "로컬 모델", "Lost": "패배", diff --git a/src/lib/i18n/locales/lt-LT/translation.json b/src/lib/i18n/locales/lt-LT/translation.json index 09f8ed02ba..5a644032e5 100644 --- a/src/lib/i18n/locales/lt-LT/translation.json +++ b/src/lib/i18n/locales/lt-LT/translation.json @@ -565,7 +565,6 @@ "Listening...": "Klausoma...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Dideli kalbos modeliai gali klysti. Patikrinkite atsakymų teisingumą.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokalūs modeliai", "Lost": "", diff --git a/src/lib/i18n/locales/ms-MY/translation.json b/src/lib/i18n/locales/ms-MY/translation.json index 31f29d4b60..236d0bbc5d 100644 --- a/src/lib/i18n/locales/ms-MY/translation.json +++ b/src/lib/i18n/locales/ms-MY/translation.json @@ -565,7 +565,6 @@ "Listening...": "Mendengar...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM boleh membuat kesilapan. Sahkan maklumat penting", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Model Tempatan", "Lost": "", diff --git a/src/lib/i18n/locales/nb-NO/translation.json b/src/lib/i18n/locales/nb-NO/translation.json index 352044fee5..5121d297ea 100644 --- a/src/lib/i18n/locales/nb-NO/translation.json +++ b/src/lib/i18n/locales/nb-NO/translation.json @@ -565,7 +565,6 @@ "Listening...": "Lytter ...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Språkmodeller kan gjøre feil. Kontroller viktige opplysninger.", - "Loading {{count}} sites": "", "Local": "Lokal", "Local Models": "Lokale modeller", "Lost": "Tapt", diff --git a/src/lib/i18n/locales/nl-NL/translation.json b/src/lib/i18n/locales/nl-NL/translation.json index e053910486..78dfd0a54f 100644 --- a/src/lib/i18n/locales/nl-NL/translation.json +++ b/src/lib/i18n/locales/nl-NL/translation.json @@ -565,7 +565,6 @@ "Listening...": "Aan het luisteren...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs kunnen fouten maken. Verifieer belangrijke informatie.", - "Loading {{count}} sites": "", "Local": "Lokaal", "Local Models": "Lokale modellen", "Lost": "Verloren", diff --git a/src/lib/i18n/locales/pa-IN/translation.json b/src/lib/i18n/locales/pa-IN/translation.json index 408be6142b..cae4f25c6c 100644 --- a/src/lib/i18n/locales/pa-IN/translation.json +++ b/src/lib/i18n/locales/pa-IN/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs ਗਲਤੀਆਂ ਕਰ ਸਕਦੇ ਹਨ। ਮਹੱਤਵਪੂਰਨ ਜਾਣਕਾਰੀ ਦੀ ਪੁਸ਼ਟੀ ਕਰੋ।", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/pl-PL/translation.json b/src/lib/i18n/locales/pl-PL/translation.json index 2f38deaf79..caebf0272d 100644 --- a/src/lib/i18n/locales/pl-PL/translation.json +++ b/src/lib/i18n/locales/pl-PL/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMy mogą popełniać błędy. Zweryfikuj ważne informacje.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/pt-BR/translation.json b/src/lib/i18n/locales/pt-BR/translation.json index fa1187e852..b895920351 100644 --- a/src/lib/i18n/locales/pt-BR/translation.json +++ b/src/lib/i18n/locales/pt-BR/translation.json @@ -565,7 +565,6 @@ "Listening...": "Escutando...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs podem cometer erros. Verifique informações importantes.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Modelos Locais", "Lost": "Perdeu", diff --git a/src/lib/i18n/locales/pt-PT/translation.json b/src/lib/i18n/locales/pt-PT/translation.json index 0863ed7da5..18e2b137a0 100644 --- a/src/lib/i18n/locales/pt-PT/translation.json +++ b/src/lib/i18n/locales/pt-PT/translation.json @@ -565,7 +565,6 @@ "Listening...": "A escutar...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs podem cometer erros. Verifique informações importantes.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Modelos Locais", "Lost": "", diff --git a/src/lib/i18n/locales/ro-RO/translation.json b/src/lib/i18n/locales/ro-RO/translation.json index 1a3905d4f5..2d6c022776 100644 --- a/src/lib/i18n/locales/ro-RO/translation.json +++ b/src/lib/i18n/locales/ro-RO/translation.json @@ -565,7 +565,6 @@ "Listening...": "Ascult...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM-urile pot face greșeli. Verificați informațiile importante.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Modele Locale", "Lost": "Pierdut", diff --git a/src/lib/i18n/locales/ru-RU/translation.json b/src/lib/i18n/locales/ru-RU/translation.json index ca6d12bae4..f0fc3c8e1a 100644 --- a/src/lib/i18n/locales/ru-RU/translation.json +++ b/src/lib/i18n/locales/ru-RU/translation.json @@ -565,7 +565,6 @@ "Listening...": "Слушаю...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs могут допускать ошибки. Проверяйте важную информацию.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Локальные модели", "Lost": "", diff --git a/src/lib/i18n/locales/sk-SK/translation.json b/src/lib/i18n/locales/sk-SK/translation.json index d4bf1ea356..8e4742fbb6 100644 --- a/src/lib/i18n/locales/sk-SK/translation.json +++ b/src/lib/i18n/locales/sk-SK/translation.json @@ -565,7 +565,6 @@ "Listening...": "Počúvanie...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM môžu robiť chyby. Overte si dôležité informácie.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokálne modely", "Lost": "Stratený", diff --git a/src/lib/i18n/locales/sr-RS/translation.json b/src/lib/i18n/locales/sr-RS/translation.json index c6bdaf359a..a16b783281 100644 --- a/src/lib/i18n/locales/sr-RS/translation.json +++ b/src/lib/i18n/locales/sr-RS/translation.json @@ -565,7 +565,6 @@ "Listening...": "Слушам...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "ВЈМ-ови (LLM-ови) могу правити грешке. Проверите важне податке.", - "Loading {{count}} sites": "", "Local": "Локално", "Local Models": "Локални модели", "Lost": "Пораза", diff --git a/src/lib/i18n/locales/sv-SE/translation.json b/src/lib/i18n/locales/sv-SE/translation.json index c8ea56901a..a2d103b23c 100644 --- a/src/lib/i18n/locales/sv-SE/translation.json +++ b/src/lib/i18n/locales/sv-SE/translation.json @@ -565,7 +565,6 @@ "Listening...": "Lyssnar...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM:er kan göra misstag. Granska viktig information.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "Lokala modeller", "Lost": "", diff --git a/src/lib/i18n/locales/th-TH/translation.json b/src/lib/i18n/locales/th-TH/translation.json index 8f41d77d6c..8b2e591993 100644 --- a/src/lib/i18n/locales/th-TH/translation.json +++ b/src/lib/i18n/locales/th-TH/translation.json @@ -565,7 +565,6 @@ "Listening...": "กำลังฟัง...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLMs สามารถทำผิดพลาดได้ ตรวจสอบข้อมูลสำคัญ", - "Loading {{count}} sites": "", "Local": "", "Local Models": "โมเดลท้องถิ่น", "Lost": "", diff --git a/src/lib/i18n/locales/tk-TW/translation.json b/src/lib/i18n/locales/tk-TW/translation.json index da563d9207..8d8c14864d 100644 --- a/src/lib/i18n/locales/tk-TW/translation.json +++ b/src/lib/i18n/locales/tk-TW/translation.json @@ -565,7 +565,6 @@ "Listening...": "", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/tr-TR/translation.json b/src/lib/i18n/locales/tr-TR/translation.json index 6eb6bcb7c5..8195df8d0f 100644 --- a/src/lib/i18n/locales/tr-TR/translation.json +++ b/src/lib/i18n/locales/tr-TR/translation.json @@ -565,7 +565,6 @@ "Listening...": "Dinleniyor...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "LLM'ler hata yapabilir. Önemli bilgileri doğrulayın.", - "Loading {{count}} sites": "", "Local": "Yerel", "Local Models": "Yerel Modeller", "Lost": "Kayıp", diff --git a/src/lib/i18n/locales/uk-UA/translation.json b/src/lib/i18n/locales/uk-UA/translation.json index 1bd9a15bde..2aa92ae15d 100644 --- a/src/lib/i18n/locales/uk-UA/translation.json +++ b/src/lib/i18n/locales/uk-UA/translation.json @@ -565,7 +565,6 @@ "Listening...": "Слухаю...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "LLMs можуть помилятися. Перевірте важливу інформацію.", - "Loading {{count}} sites": "", "Local": "Локальний", "Local Models": "Локальні моделі", "Lost": "Втрачене", diff --git a/src/lib/i18n/locales/ur-PK/translation.json b/src/lib/i18n/locales/ur-PK/translation.json index 806598987d..f733eda3de 100644 --- a/src/lib/i18n/locales/ur-PK/translation.json +++ b/src/lib/i18n/locales/ur-PK/translation.json @@ -565,7 +565,6 @@ "Listening...": "سن رہے ہیں...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "ایل ایل ایم غلطیاں کر سکتے ہیں اہم معلومات کی تصدیق کریں", - "Loading {{count}} sites": "", "Local": "", "Local Models": "مقامی ماڈلز", "Lost": "گم شدہ", diff --git a/src/lib/i18n/locales/vi-VN/translation.json b/src/lib/i18n/locales/vi-VN/translation.json index 9a6e896105..93e98680b9 100644 --- a/src/lib/i18n/locales/vi-VN/translation.json +++ b/src/lib/i18n/locales/vi-VN/translation.json @@ -565,7 +565,6 @@ "Listening...": "Đang nghe...", "Llama.cpp": "", "LLMs can make mistakes. Verify important information.": "Hệ thống có thể tạo ra nội dung không chính xác hoặc sai. Hãy kiểm chứng kỹ lưỡng thông tin trước khi tiếp nhận và sử dụng.", - "Loading {{count}} sites": "", "Local": "", "Local Models": "", "Lost": "", diff --git a/src/lib/i18n/locales/zh-CN/translation.json b/src/lib/i18n/locales/zh-CN/translation.json index b8fbd36172..f32067afb4 100644 --- a/src/lib/i18n/locales/zh-CN/translation.json +++ b/src/lib/i18n/locales/zh-CN/translation.json @@ -565,7 +565,6 @@ "Listening...": "正在倾听...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "大语言模型可能会生成误导性错误信息,请对关键信息加以验证。", - "Loading {{count}} sites": "", "Local": "本地", "Local Models": "本地模型", "Lost": "落败", diff --git a/src/lib/i18n/locales/zh-TW/translation.json b/src/lib/i18n/locales/zh-TW/translation.json index c838cad4e0..78dcddafff 100644 --- a/src/lib/i18n/locales/zh-TW/translation.json +++ b/src/lib/i18n/locales/zh-TW/translation.json @@ -565,7 +565,6 @@ "Listening...": "正在聆聽...", "Llama.cpp": "Llama.cpp", "LLMs can make mistakes. Verify important information.": "大型語言模型可能會出錯。請驗證重要資訊。", - "Loading {{count}} sites": "", "Local": "本機", "Local Models": "本機模型", "Lost": "已遺失", From 35f3824932833fe77ef3bce54b86803cda4838a6 Mon Sep 17 00:00:00 2001 From: Mazurek Michal Date: Fri, 7 Feb 2025 13:44:47 +0100 Subject: [PATCH 008/232] feat: Implement Document Intelligence as Content Extraction Engine --- backend/open_webui/config.py | 12 +++++++ backend/open_webui/main.py | 4 +++ backend/open_webui/retrieval/loaders/main.py | 22 ++++++++++++ backend/open_webui/routers/retrieval.py | 27 +++++++++++++- backend/requirements.txt | 1 + pyproject.toml | 1 + src/lib/apis/retrieval/index.ts | 6 ++++ .../admin/Settings/Documents.svelte | 36 ++++++++++++++++++- 8 files changed, 107 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index bf6f1d0256..e46a87cd5e 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1431,6 +1431,18 @@ TIKA_SERVER_URL = PersistentConfig( os.getenv("TIKA_SERVER_URL", "http://tika:9998"), # Default for sidecar deployment ) +DOCUMENT_INTELLIGENCE_ENDPOINT = PersistentConfig( + "DOCUMENT_INTELLIGENCE_ENDPOINT", + "rag.document_intelligence_endpoint", + os.getenv("DOCUMENT_INTELLIGENCE_ENDPOINT", ""), +) + +DOCUMENT_INTELLIGENCE_KEY = PersistentConfig( + "DOCUMENT_INTELLIGENCE_KEY", + "rag.document_intelligence_key", + os.getenv("DOCUMENT_INTELLIGENCE_KEY", ""), +) + RAG_TOP_K = PersistentConfig( "RAG_TOP_K", "rag.top_k", int(os.environ.get("RAG_TOP_K", "3")) ) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 863f58dea5..2f1b92b1d5 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -154,6 +154,8 @@ from open_webui.config import ( CHUNK_SIZE, CONTENT_EXTRACTION_ENGINE, TIKA_SERVER_URL, + DOCUMENT_INTELLIGENCE_ENDPOINT, + DOCUMENT_INTELLIGENCE_KEY, RAG_TOP_K, RAG_TEXT_SPLITTER, TIKTOKEN_ENCODING_NAME, @@ -478,6 +480,8 @@ app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = ( app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL +app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT +app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY app.state.config.TEXT_SPLITTER = RAG_TEXT_SPLITTER app.state.config.TIKTOKEN_ENCODING_NAME = TIKTOKEN_ENCODING_NAME diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index a9372f65a6..19d590f5cb 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -4,6 +4,7 @@ import ftfy import sys from langchain_community.document_loaders import ( + AzureAIDocumentIntelligenceLoader, BSHTMLLoader, CSVLoader, Docx2txtLoader, @@ -147,6 +148,27 @@ class Loader: file_path=file_path, mime_type=file_content_type, ) + elif ( + self.engine == "document_intelligence" + and self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT") != "" + and self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY") != "" + and ( + file_ext in ["pdf", "xls", "xlsx", "docx", "ppt", "pptx"] + or file_content_type + in [ + "application/vnd.ms-excel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.ms-powerpoint", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + ] + ) + ): + loader = AzureAIDocumentIntelligenceLoader( + file_path=file_path, + api_endpoint=self.kwargs.get("DOCUMENT_INTELLIGENCE_ENDPOINT"), + api_key=self.kwargs.get("DOCUMENT_INTELLIGENCE_KEY"), + ) else: if file_ext == "pdf": loader = PyPDFLoader( diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 77f04a4be5..4cfcd490d1 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -352,6 +352,10 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "content_extraction": { "engine": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "tika_server_url": request.app.state.config.TIKA_SERVER_URL, + "document_intelligence_config": { + "endpoint": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, + "key": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + }, }, "chunk": { "text_splitter": request.app.state.config.TEXT_SPLITTER, @@ -402,9 +406,15 @@ class FileConfig(BaseModel): max_count: Optional[int] = None +class DocumentIntelligenceConfigForm(BaseModel): + endpoint: str + key: str + + class ContentExtractionConfig(BaseModel): engine: str = "" tika_server_url: Optional[str] = None + document_intelligence_config: Optional[DocumentIntelligenceConfigForm] = None class ChunkParamUpdateForm(BaseModel): @@ -479,13 +489,22 @@ async def update_rag_config( request.app.state.config.FILE_MAX_COUNT = form_data.file.max_count if form_data.content_extraction is not None: - log.info(f"Updating text settings: {form_data.content_extraction}") + log.info( + f"Updating content extraction: {request.app.state.config.CONTENT_EXTRACTION_ENGINE} to {form_data.content_extraction.engine}" + ) request.app.state.config.CONTENT_EXTRACTION_ENGINE = ( form_data.content_extraction.engine ) request.app.state.config.TIKA_SERVER_URL = ( form_data.content_extraction.tika_server_url ) + if form_data.content_extraction.document_intelligence_config is not None: + request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = ( + form_data.content_extraction.document_intelligence_config.endpoint + ) + request.app.state.config.DOCUMENT_INTELLIGENCE_KEY = ( + form_data.content_extraction.document_intelligence_config.key + ) if form_data.chunk is not None: request.app.state.config.TEXT_SPLITTER = form_data.chunk.text_splitter @@ -564,6 +583,10 @@ async def update_rag_config( "content_extraction": { "engine": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "tika_server_url": request.app.state.config.TIKA_SERVER_URL, + "document_intelligence_config": { + "endpoint": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, + "key": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + }, }, "chunk": { "text_splitter": request.app.state.config.TEXT_SPLITTER, @@ -887,6 +910,8 @@ def process_file( engine=request.app.state.config.CONTENT_EXTRACTION_ENGINE, TIKA_SERVER_URL=request.app.state.config.TIKA_SERVER_URL, PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES, + DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, + DOCUMENT_INTELLIGENCE_KEY=request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, ) docs = loader.load( file.filename, file.meta.get("content_type"), file_path diff --git a/backend/requirements.txt b/backend/requirements.txt index 14ad4b9cdf..4a39e77b54 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -72,6 +72,7 @@ validators==0.34.0 psutil sentencepiece soundfile==0.13.1 +azure-ai-documentintelligence==1.0.0 opencv-python-headless==4.11.0.86 rapidocr-onnxruntime==1.3.24 diff --git a/pyproject.toml b/pyproject.toml index f121089e8f..60d54afd6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,7 @@ dependencies = [ "psutil", "sentencepiece", "soundfile==0.13.1", + "azure-ai-documentintelligence==1.0.0", "opencv-python-headless==4.11.0.86", "rapidocr-onnxruntime==1.3.24", diff --git a/src/lib/apis/retrieval/index.ts b/src/lib/apis/retrieval/index.ts index c35c37847b..ed07ab5d0d 100644 --- a/src/lib/apis/retrieval/index.ts +++ b/src/lib/apis/retrieval/index.ts @@ -32,9 +32,15 @@ type ChunkConfigForm = { chunk_overlap: number; }; +type DocumentIntelligenceConfigForm = { + key: string; + endpoint: string; +}; + type ContentExtractConfigForm = { engine: string; tika_server_url: string | null; + document_intelligence_config: DocumentIntelligenceConfigForm | null; }; type YoutubeConfigForm = { diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index d3b7cfa01a..e624a51b3a 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -50,6 +50,9 @@ let contentExtractionEngine = 'default'; let tikaServerUrl = ''; let showTikaServerUrl = false; + let documentIntelligenceEndpoint = ''; + let documentIntelligenceKey = ''; + let showDocumentIntelligenceConfig = false; let textSplitter = ''; let chunkSize = 0; @@ -175,6 +178,13 @@ toast.error($i18n.t('Tika Server URL required.')); return; } + if ( + contentExtractionEngine === 'document_intelligence' && + (documentIntelligenceEndpoint === '' || documentIntelligenceKey === '') + ) { + toast.error($i18n.t('Document Intelligence endpoint and key required.')); + return; + } const res = await updateRAGConfig(localStorage.token, { pdf_extract_images: pdfExtractImages, enable_google_drive_integration: enableGoogleDriveIntegration, @@ -189,7 +199,11 @@ }, content_extraction: { engine: contentExtractionEngine, - tika_server_url: tikaServerUrl + tika_server_url: tikaServerUrl, + document_intelligence_config: { + key: documentIntelligenceKey, + endpoint: documentIntelligenceEndpoint + } } }); @@ -245,6 +259,9 @@ contentExtractionEngine = res.content_extraction.engine; tikaServerUrl = res.content_extraction.tika_server_url; showTikaServerUrl = contentExtractionEngine === 'tika'; + documentIntelligenceEndpoint = res.content_extraction.document_intelligence_config.endpoint; + documentIntelligenceKey = res.content_extraction.document_intelligence_config.key; + showDocumentIntelligenceConfig = contentExtractionEngine === 'document_intelligence'; fileMaxSize = res?.file.max_size ?? ''; fileMaxCount = res?.file.max_count ?? ''; @@ -568,10 +585,12 @@ bind:value={contentExtractionEngine} on:change={(e) => { showTikaServerUrl = e.target.value === 'tika'; + showDocumentIntelligenceConfig = e.target.value === 'document_intelligence'; }} > + @@ -587,6 +606,21 @@ {/if} + + {#if showDocumentIntelligenceConfig} +
+ + + +
+ {/if}
From 6d62e71c3431f3305a3802970bf5a18055a7fe39 Mon Sep 17 00:00:00 2001 From: Didier FOURNOUT Date: Thu, 13 Feb 2025 15:29:26 +0000 Subject: [PATCH 009/232] Add x-Open-Webui headers for ollama + more for openai --- backend/open_webui/main.py | 4 +- backend/open_webui/routers/ollama.py | 144 ++++++++++++++++++++++++--- backend/open_webui/routers/openai.py | 43 ++++++-- backend/open_webui/utils/chat.py | 4 +- backend/open_webui/utils/models.py | 11 +- 5 files changed, 173 insertions(+), 33 deletions(-) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 88b5b3f692..3e5f20ceee 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -858,7 +858,7 @@ async def get_models(request: Request, user=Depends(get_verified_user)): return filtered_models - models = await get_all_models(request) + models = await get_all_models(request, user=user) # Filter out filter pipelines models = [ @@ -898,7 +898,7 @@ async def chat_completion( user=Depends(get_verified_user), ): if not request.app.state.MODELS: - await get_all_models(request) + await get_all_models(request, user=user) model_item = form_data.pop("model_item", {}) tasks = form_data.pop("background_tasks", None) diff --git a/backend/open_webui/routers/ollama.py b/backend/open_webui/routers/ollama.py index 64373c616c..e825848d47 100644 --- a/backend/open_webui/routers/ollama.py +++ b/backend/open_webui/routers/ollama.py @@ -14,6 +14,11 @@ from urllib.parse import urlparse import aiohttp from aiocache import cached import requests +from open_webui.models.users import UserModel + +from open_webui.env import ( + ENABLE_FORWARD_USER_INFO_HEADERS, +) from fastapi import ( Depends, @@ -66,12 +71,26 @@ log.setLevel(SRC_LOG_LEVELS["OLLAMA"]) ########################################## -async def send_get_request(url, key=None): +async def send_get_request(url, key=None, user: UserModel = None): timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST) try: async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session: async with session.get( - url, headers={**({"Authorization": f"Bearer {key}"} if key else {})} + url, + headers={ + "Content-Type": "application/json", + **({"Authorization": f"Bearer {key}"} if key else {}), + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS and user + else {} + ), + }, ) as response: return await response.json() except Exception as e: @@ -96,6 +115,7 @@ async def send_post_request( stream: bool = True, key: Optional[str] = None, content_type: Optional[str] = None, + user: UserModel = None ): r = None @@ -110,6 +130,16 @@ async def send_post_request( headers={ "Content-Type": "application/json", **({"Authorization": f"Bearer {key}"} if key else {}), + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS and user + else {} + ), }, ) r.raise_for_status() @@ -191,7 +221,19 @@ async def verify_connection( try: async with session.get( f"{url}/api/version", - headers={**({"Authorization": f"Bearer {key}"} if key else {})}, + headers={ + **({"Authorization": f"Bearer {key}"} if key else {}), + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS and user + else {} + ), + }, ) as r: if r.status != 200: detail = f"HTTP Error: {r.status}" @@ -254,7 +296,7 @@ async def update_config( @cached(ttl=3) -async def get_all_models(request: Request): +async def get_all_models(request: Request, user: UserModel=None): log.info("get_all_models()") if request.app.state.config.ENABLE_OLLAMA_API: request_tasks = [] @@ -262,7 +304,7 @@ async def get_all_models(request: Request): if (str(idx) not in request.app.state.config.OLLAMA_API_CONFIGS) and ( url not in request.app.state.config.OLLAMA_API_CONFIGS # Legacy support ): - request_tasks.append(send_get_request(f"{url}/api/tags")) + request_tasks.append(send_get_request(f"{url}/api/tags", user=user)) else: api_config = request.app.state.config.OLLAMA_API_CONFIGS.get( str(idx), @@ -275,7 +317,7 @@ async def get_all_models(request: Request): key = api_config.get("key", None) if enable: - request_tasks.append(send_get_request(f"{url}/api/tags", key)) + request_tasks.append(send_get_request(f"{url}/api/tags", key, user=user)) else: request_tasks.append(asyncio.ensure_future(asyncio.sleep(0, None))) @@ -360,7 +402,7 @@ async def get_ollama_tags( models = [] if url_idx is None: - models = await get_all_models(request) + models = await get_all_models(request, user=user) else: url = request.app.state.config.OLLAMA_BASE_URLS[url_idx] key = get_api_key(url_idx, url, request.app.state.config.OLLAMA_API_CONFIGS) @@ -370,7 +412,19 @@ async def get_ollama_tags( r = requests.request( method="GET", url=f"{url}/api/tags", - headers={**({"Authorization": f"Bearer {key}"} if key else {})}, + headers={ + **({"Authorization": f"Bearer {key}"} if key else {}), + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS and user + else {} + ), + }, ) r.raise_for_status() @@ -477,6 +531,7 @@ async def get_ollama_loaded_models(request: Request, user=Depends(get_verified_u url, {} ), # Legacy support ).get("key", None), + user=user ) for idx, url in enumerate(request.app.state.config.OLLAMA_BASE_URLS) ] @@ -509,6 +564,7 @@ async def pull_model( url=f"{url}/api/pull", payload=json.dumps(payload), key=get_api_key(url_idx, url, request.app.state.config.OLLAMA_API_CONFIGS), + user=user, ) @@ -527,7 +583,7 @@ async def push_model( user=Depends(get_admin_user), ): if url_idx is None: - await get_all_models(request) + await get_all_models(request, user=user) models = request.app.state.OLLAMA_MODELS if form_data.name in models: @@ -545,6 +601,7 @@ async def push_model( url=f"{url}/api/push", payload=form_data.model_dump_json(exclude_none=True).encode(), key=get_api_key(url_idx, url, request.app.state.config.OLLAMA_API_CONFIGS), + user=user, ) @@ -571,6 +628,7 @@ async def create_model( url=f"{url}/api/create", payload=form_data.model_dump_json(exclude_none=True).encode(), key=get_api_key(url_idx, url, request.app.state.config.OLLAMA_API_CONFIGS), + user=user, ) @@ -588,7 +646,7 @@ async def copy_model( user=Depends(get_admin_user), ): if url_idx is None: - await get_all_models(request) + await get_all_models(request, user=user) models = request.app.state.OLLAMA_MODELS if form_data.source in models: @@ -609,6 +667,16 @@ async def copy_model( headers={ "Content-Type": "application/json", **({"Authorization": f"Bearer {key}"} if key else {}), + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS and user + else {} + ), }, data=form_data.model_dump_json(exclude_none=True).encode(), ) @@ -643,7 +711,7 @@ async def delete_model( user=Depends(get_admin_user), ): if url_idx is None: - await get_all_models(request) + await get_all_models(request, user=user) models = request.app.state.OLLAMA_MODELS if form_data.name in models: @@ -665,6 +733,16 @@ async def delete_model( headers={ "Content-Type": "application/json", **({"Authorization": f"Bearer {key}"} if key else {}), + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS and user + else {} + ), }, ) r.raise_for_status() @@ -693,7 +771,7 @@ async def delete_model( async def show_model_info( request: Request, form_data: ModelNameForm, user=Depends(get_verified_user) ): - await get_all_models(request) + await get_all_models(request, user=user) models = request.app.state.OLLAMA_MODELS if form_data.name not in models: @@ -714,6 +792,16 @@ async def show_model_info( headers={ "Content-Type": "application/json", **({"Authorization": f"Bearer {key}"} if key else {}), + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS and user + else {} + ), }, data=form_data.model_dump_json(exclude_none=True).encode(), ) @@ -757,7 +845,7 @@ async def embed( log.info(f"generate_ollama_batch_embeddings {form_data}") if url_idx is None: - await get_all_models(request) + await get_all_models(request, user=user) models = request.app.state.OLLAMA_MODELS model = form_data.model @@ -783,6 +871,16 @@ async def embed( headers={ "Content-Type": "application/json", **({"Authorization": f"Bearer {key}"} if key else {}), + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS and user + else {} + ), }, data=form_data.model_dump_json(exclude_none=True).encode(), ) @@ -826,7 +924,7 @@ async def embeddings( log.info(f"generate_ollama_embeddings {form_data}") if url_idx is None: - await get_all_models(request) + await get_all_models(request, user=user) models = request.app.state.OLLAMA_MODELS model = form_data.model @@ -852,6 +950,16 @@ async def embeddings( headers={ "Content-Type": "application/json", **({"Authorization": f"Bearer {key}"} if key else {}), + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS and user + else {} + ), }, data=form_data.model_dump_json(exclude_none=True).encode(), ) @@ -901,7 +1009,7 @@ async def generate_completion( user=Depends(get_verified_user), ): if url_idx is None: - await get_all_models(request) + await get_all_models(request, user=user) models = request.app.state.OLLAMA_MODELS model = form_data.model @@ -931,6 +1039,7 @@ async def generate_completion( url=f"{url}/api/generate", payload=form_data.model_dump_json(exclude_none=True).encode(), key=get_api_key(url_idx, url, request.app.state.config.OLLAMA_API_CONFIGS), + user=user, ) @@ -1047,6 +1156,7 @@ async def generate_chat_completion( stream=form_data.stream, key=get_api_key(url_idx, url, request.app.state.config.OLLAMA_API_CONFIGS), content_type="application/x-ndjson", + user=user, ) @@ -1149,6 +1259,7 @@ async def generate_openai_completion( payload=json.dumps(payload), stream=payload.get("stream", False), key=get_api_key(url_idx, url, request.app.state.config.OLLAMA_API_CONFIGS), + user=user, ) @@ -1227,6 +1338,7 @@ async def generate_openai_chat_completion( payload=json.dumps(payload), stream=payload.get("stream", False), key=get_api_key(url_idx, url, request.app.state.config.OLLAMA_API_CONFIGS), + user=user, ) @@ -1240,7 +1352,7 @@ async def get_openai_models( models = [] if url_idx is None: - model_list = await get_all_models(request) + model_list = await get_all_models(request, user=user) models = [ { "id": model["model"], diff --git a/backend/open_webui/routers/openai.py b/backend/open_webui/routers/openai.py index afda362373..f0d5d81dd6 100644 --- a/backend/open_webui/routers/openai.py +++ b/backend/open_webui/routers/openai.py @@ -26,6 +26,7 @@ from open_webui.env import ( ENABLE_FORWARD_USER_INFO_HEADERS, BYPASS_MODEL_ACCESS_CONTROL, ) +from open_webui.models.users import UserModel from open_webui.constants import ERROR_MESSAGES from open_webui.env import ENV, SRC_LOG_LEVELS @@ -51,12 +52,25 @@ log.setLevel(SRC_LOG_LEVELS["OPENAI"]) ########################################## -async def send_get_request(url, key=None): +async def send_get_request(url, key=None, user: UserModel=None): timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST) try: async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session: async with session.get( - url, headers={**({"Authorization": f"Bearer {key}"} if key else {})} + url, + headers={ + **({"Authorization": f"Bearer {key}"} if key else {}), + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS + else {} + ), + } ) as response: return await response.json() except Exception as e: @@ -247,7 +261,7 @@ async def speech(request: Request, user=Depends(get_verified_user)): raise HTTPException(status_code=401, detail=ERROR_MESSAGES.OPENAI_NOT_FOUND) -async def get_all_models_responses(request: Request) -> list: +async def get_all_models_responses(request: Request, user: UserModel) -> list: if not request.app.state.config.ENABLE_OPENAI_API: return [] @@ -271,7 +285,9 @@ async def get_all_models_responses(request: Request) -> list: ): request_tasks.append( send_get_request( - f"{url}/models", request.app.state.config.OPENAI_API_KEYS[idx] + f"{url}/models", + request.app.state.config.OPENAI_API_KEYS[idx], + user=user, ) ) else: @@ -291,6 +307,7 @@ async def get_all_models_responses(request: Request) -> list: send_get_request( f"{url}/models", request.app.state.config.OPENAI_API_KEYS[idx], + user=user, ) ) else: @@ -352,13 +369,13 @@ async def get_filtered_models(models, user): @cached(ttl=3) -async def get_all_models(request: Request) -> dict[str, list]: +async def get_all_models(request: Request, user: UserModel) -> dict[str, list]: log.info("get_all_models()") if not request.app.state.config.ENABLE_OPENAI_API: return {"data": []} - responses = await get_all_models_responses(request) + responses = await get_all_models_responses(request, user=user) def extract_data(response): if response and "data" in response: @@ -418,7 +435,7 @@ async def get_models( } if url_idx is None: - models = await get_all_models(request) + models = await get_all_models(request, user=user) else: url = request.app.state.config.OPENAI_API_BASE_URLS[url_idx] key = request.app.state.config.OPENAI_API_KEYS[url_idx] @@ -515,6 +532,16 @@ async def verify_connection( headers={ "Authorization": f"Bearer {key}", "Content-Type": "application/json", + **( + { + "X-OpenWebUI-User-Name": user.name, + "X-OpenWebUI-User-Id": user.id, + "X-OpenWebUI-User-Email": user.email, + "X-OpenWebUI-User-Role": user.role, + } + if ENABLE_FORWARD_USER_INFO_HEADERS + else {} + ), }, ) as r: if r.status != 200: @@ -587,7 +614,7 @@ async def generate_chat_completion( detail="Model not found", ) - await get_all_models(request) + await get_all_models(request, user=user) model = request.app.state.OPENAI_MODELS.get(model_id) if model: idx = model["urlIdx"] diff --git a/backend/open_webui/utils/chat.py b/backend/open_webui/utils/chat.py index 253eaedfb9..d8f44590b9 100644 --- a/backend/open_webui/utils/chat.py +++ b/backend/open_webui/utils/chat.py @@ -285,7 +285,7 @@ chat_completion = generate_chat_completion async def chat_completed(request: Request, form_data: dict, user: Any): if not request.app.state.MODELS: - await get_all_models(request) + await get_all_models(request, user=user) if getattr(request.state, "direct", False) and hasattr(request.state, "model"): models = { @@ -351,7 +351,7 @@ async def chat_action(request: Request, action_id: str, form_data: dict, user: A raise Exception(f"Action not found: {action_id}") if not request.app.state.MODELS: - await get_all_models(request) + await get_all_models(request, user=user) if getattr(request.state, "direct", False) and hasattr(request.state, "model"): models = { diff --git a/backend/open_webui/utils/models.py b/backend/open_webui/utils/models.py index 975f8cb095..00f8fd6661 100644 --- a/backend/open_webui/utils/models.py +++ b/backend/open_webui/utils/models.py @@ -22,6 +22,7 @@ from open_webui.config import ( ) from open_webui.env import SRC_LOG_LEVELS, GLOBAL_LOG_LEVEL +from open_webui.models.users import UserModel logging.basicConfig(stream=sys.stdout, level=GLOBAL_LOG_LEVEL) @@ -29,17 +30,17 @@ log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["MAIN"]) -async def get_all_base_models(request: Request): +async def get_all_base_models(request: Request, user: UserModel=None): function_models = [] openai_models = [] ollama_models = [] if request.app.state.config.ENABLE_OPENAI_API: - openai_models = await openai.get_all_models(request) + openai_models = await openai.get_all_models(request, user=user) openai_models = openai_models["data"] if request.app.state.config.ENABLE_OLLAMA_API: - ollama_models = await ollama.get_all_models(request) + ollama_models = await ollama.get_all_models(request, user=user) ollama_models = [ { "id": model["model"], @@ -58,8 +59,8 @@ async def get_all_base_models(request: Request): return models -async def get_all_models(request): - models = await get_all_base_models(request) +async def get_all_models(request, user: UserModel=None): + models = await get_all_base_models(request, user=user) # If there are no models, return an empty list if len(models) == 0: From 06062568c7f24a6c3a7e76c9070c11941b4018d5 Mon Sep 17 00:00:00 2001 From: Didier FOURNOUT Date: Thu, 13 Feb 2025 16:12:46 +0000 Subject: [PATCH 010/232] black formatting --- backend/open_webui/routers/ollama.py | 10 ++++++---- backend/open_webui/routers/openai.py | 4 ++-- backend/open_webui/utils/models.py | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/backend/open_webui/routers/ollama.py b/backend/open_webui/routers/ollama.py index e825848d47..a3d5064497 100644 --- a/backend/open_webui/routers/ollama.py +++ b/backend/open_webui/routers/ollama.py @@ -115,7 +115,7 @@ async def send_post_request( stream: bool = True, key: Optional[str] = None, content_type: Optional[str] = None, - user: UserModel = None + user: UserModel = None, ): r = None @@ -296,7 +296,7 @@ async def update_config( @cached(ttl=3) -async def get_all_models(request: Request, user: UserModel=None): +async def get_all_models(request: Request, user: UserModel = None): log.info("get_all_models()") if request.app.state.config.ENABLE_OLLAMA_API: request_tasks = [] @@ -317,7 +317,9 @@ async def get_all_models(request: Request, user: UserModel=None): key = api_config.get("key", None) if enable: - request_tasks.append(send_get_request(f"{url}/api/tags", key, user=user)) + request_tasks.append( + send_get_request(f"{url}/api/tags", key, user=user) + ) else: request_tasks.append(asyncio.ensure_future(asyncio.sleep(0, None))) @@ -531,7 +533,7 @@ async def get_ollama_loaded_models(request: Request, user=Depends(get_verified_u url, {} ), # Legacy support ).get("key", None), - user=user + user=user, ) for idx, url in enumerate(request.app.state.config.OLLAMA_BASE_URLS) ] diff --git a/backend/open_webui/routers/openai.py b/backend/open_webui/routers/openai.py index f0d5d81dd6..1ef913df47 100644 --- a/backend/open_webui/routers/openai.py +++ b/backend/open_webui/routers/openai.py @@ -52,7 +52,7 @@ log.setLevel(SRC_LOG_LEVELS["OPENAI"]) ########################################## -async def send_get_request(url, key=None, user: UserModel=None): +async def send_get_request(url, key=None, user: UserModel = None): timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST) try: async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session: @@ -70,7 +70,7 @@ async def send_get_request(url, key=None, user: UserModel=None): if ENABLE_FORWARD_USER_INFO_HEADERS else {} ), - } + }, ) as response: return await response.json() except Exception as e: diff --git a/backend/open_webui/utils/models.py b/backend/open_webui/utils/models.py index 00f8fd6661..872049f0f8 100644 --- a/backend/open_webui/utils/models.py +++ b/backend/open_webui/utils/models.py @@ -30,7 +30,7 @@ log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["MAIN"]) -async def get_all_base_models(request: Request, user: UserModel=None): +async def get_all_base_models(request: Request, user: UserModel = None): function_models = [] openai_models = [] ollama_models = [] @@ -59,7 +59,7 @@ async def get_all_base_models(request: Request, user: UserModel=None): return models -async def get_all_models(request, user: UserModel=None): +async def get_all_models(request, user: UserModel = None): models = await get_all_base_models(request, user=user) # If there are no models, return an empty list From 1a2d81bc01ecfbd0578902196834ab317f1e8d5a Mon Sep 17 00:00:00 2001 From: silentoplayz <50341825+silentoplayz@users.noreply.github.com> Date: Fri, 14 Feb 2025 12:48:33 -0500 Subject: [PATCH 011/232] Update main.py Change ASCII font for "Open WebUI" from "Standard" to "ANSI Shadow" --- backend/open_webui/main.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index a363231512..71142978c9 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -348,12 +348,12 @@ class SPAStaticFiles(StaticFiles): print( rf""" - ___ __ __ _ _ _ ___ - / _ \ _ __ ___ _ __ \ \ / /__| |__ | | | |_ _| -| | | | '_ \ / _ \ '_ \ \ \ /\ / / _ \ '_ \| | | || | -| |_| | |_) | __/ | | | \ V V / __/ |_) | |_| || | - \___/| .__/ \___|_| |_| \_/\_/ \___|_.__/ \___/|___| - |_| + ██████╗ ██████╗ ███████╗███╗ ██╗ ██╗ ██╗███████╗██████╗ ██╗ ██╗██╗ +██╔═══██╗██╔══██╗██╔════╝████╗ ██║ ██║ ██║██╔════╝██╔══██╗██║ ██║██║ +██║ ██║██████╔╝█████╗ ██╔██╗ ██║ ██║ █╗ ██║█████╗ ██████╔╝██║ ██║██║ +██║ ██║██╔═══╝ ██╔══╝ ██║╚██╗██║ ██║███╗██║██╔══╝ ██╔══██╗██║ ██║██║ +╚██████╔╝██║ ███████╗██║ ╚████║ ╚███╔███╔╝███████╗██████╔╝╚██████╔╝██║ + ╚═════╝ ╚═╝ ╚══════╝╚═╝ ╚═══╝ ╚══╝╚══╝ ╚══════╝╚═════╝ ╚═════╝ ╚═╝ v{VERSION} - building the best open-source AI user interface. From aa2b764d743f348f6d380aa43fd2184e49ab6745 Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Fri, 14 Feb 2025 22:32:45 -0600 Subject: [PATCH 012/232] Finalize incomplete merge to update playwright branch Introduced feature parity for trust_env --- backend/open_webui/retrieval/web/utils.py | 59 ++++++++++++++--------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 25ca5aef52..b9fbbee3a5 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -1,30 +1,33 @@ import asyncio -from datetime import datetime, time, timedelta +import logging import socket import ssl -import aiohttp -import asyncio import urllib.parse +import urllib.request +from collections import defaultdict +from datetime import datetime, time, timedelta +from typing import ( + Any, + AsyncIterator, + Dict, + Iterator, + List, + Optional, + Sequence, + Union +) +import aiohttp import certifi import validators -from collections import defaultdict -from typing import AsyncIterator, Dict, List, Optional, Union, Sequence, Iterator -from typing import Any, AsyncIterator, Dict, Iterator, List, Sequence, Union - - from langchain_community.document_loaders import ( - WebBaseLoader, - PlaywrightURLLoader + PlaywrightURLLoader, + WebBaseLoader ) from langchain_core.documents import Document - - from open_webui.constants import ERROR_MESSAGES from open_webui.config import ENABLE_RAG_LOCAL_WEB_FETCH, PLAYWRIGHT_WS_URI, RAG_WEB_LOADER from open_webui.env import SRC_LOG_LEVELS -import logging - log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -91,18 +94,20 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): """Load HTML pages safely with Playwright, supporting SSL verification, rate limiting, and remote browser connection. Attributes: - urls (List[str]): List of URLs to load. + web_paths (List[str]): List of URLs to load. verify_ssl (bool): If True, verify SSL certificates. requests_per_second (Optional[float]): Number of requests per second to limit to. continue_on_failure (bool): If True, continue loading other URLs on failure. headless (bool): If True, the browser will run in headless mode. playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection. + trust_env (bool): If True, use proxy settings from environment variables. """ def __init__( self, - urls: List[str], + web_paths: List[str], verify_ssl: bool = True, + trust_env: bool = False, requests_per_second: Optional[float] = None, continue_on_failure: bool = True, headless: bool = True, @@ -111,9 +116,20 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): playwright_ws_url: Optional[str] = None ): """Initialize with additional safety parameters and remote browser support.""" + + proxy_server = proxy.get('server') if proxy else None + if trust_env and not proxy_server: + env_proxies = urllib.request.getproxies() + env_proxy_server = env_proxies.get('https') or env_proxies.get('http') + if env_proxy_server: + if proxy: + proxy['server'] = env_proxy_server + else: + proxy = { 'server': env_proxy_server } + # We'll set headless to False if using playwright_ws_url since it's handled by the remote browser super().__init__( - urls=urls, + urls=web_paths, continue_on_failure=continue_on_failure, headless=headless if playwright_ws_url is None else False, remove_selectors=remove_selectors, @@ -123,6 +139,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): self.requests_per_second = requests_per_second self.last_request_time = None self.playwright_ws_url = playwright_ws_url + self.trust_env = trust_env def lazy_load(self) -> Iterator[Document]: """Safely load URLs synchronously with support for remote browser.""" @@ -347,14 +364,12 @@ def get_web_loader( # Check if the URLs are valid safe_urls = safe_validate_urls([urls] if isinstance(urls, str) else urls) - web_loader_args = { - web_path=safe_urls, - "urls": safe_urls, + "web_paths": safe_urls, "verify_ssl": verify_ssl, "requests_per_second": requests_per_second, "continue_on_failure": True, - trust_env=trust_env + "trust_env": trust_env } if PLAYWRIGHT_WS_URI.value: @@ -364,6 +379,6 @@ def get_web_loader( WebLoaderClass = RAG_WEB_LOADERS[RAG_WEB_LOADER.value] web_loader = WebLoaderClass(**web_loader_args) - log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(urls)) + log.debug("Using RAG_WEB_LOADER %s for %s URLs", web_loader.__class__.__name__, len(safe_urls)) return web_loader \ No newline at end of file From b1bab2ece88618a34deb228b72c44b8981d7db8c Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Fri, 14 Feb 2025 22:43:46 -0600 Subject: [PATCH 013/232] Remove duplicate loader.alazy_load line from merge --- backend/open_webui/routers/retrieval.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index e79e414b1c..84a49e4ca6 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1348,8 +1348,6 @@ async def process_web_search( requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, ) - docs = [doc async for doc in loader.alazy_load()] - # docs = loader.load() docs = await loader.aload() await run_in_threadpool( save_docs_to_vector_db, From 8e9b00a017ddd5995fe2d5f2dd4dd7dcf3955efb Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Fri, 14 Feb 2025 22:48:15 -0600 Subject: [PATCH 014/232] Fix docstring --- backend/open_webui/retrieval/web/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index b9fbbee3a5..0718b6b85c 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -96,11 +96,12 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): Attributes: web_paths (List[str]): List of URLs to load. verify_ssl (bool): If True, verify SSL certificates. + trust_env (bool): If True, use proxy settings from environment variables. requests_per_second (Optional[float]): Number of requests per second to limit to. continue_on_failure (bool): If True, continue loading other URLs on failure. headless (bool): If True, the browser will run in headless mode. + proxy (dict): Proxy override settings for the Playwright session. playwright_ws_url (Optional[str]): WebSocket endpoint URI for remote browser connection. - trust_env (bool): If True, use proxy settings from environment variables. """ def __init__( From fb12ee3f52079139917700e2f60a53b8b4eec94d Mon Sep 17 00:00:00 2001 From: i-infra <77526490+i-infra@users.noreply.github.com> Date: Sat, 15 Feb 2025 19:41:41 -0500 Subject: [PATCH 015/232] fix exception where openrouter doesn't populate - no longer index without fallback --- backend/open_webui/routers/tasks.py | 4 ++-- backend/open_webui/utils/chat.py | 4 ++-- backend/open_webui/utils/middleware.py | 2 +- backend/open_webui/utils/task.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/backend/open_webui/routers/tasks.py b/backend/open_webui/routers/tasks.py index 8b17c6c4b3..0328cefe04 100644 --- a/backend/open_webui/routers/tasks.py +++ b/backend/open_webui/routers/tasks.py @@ -208,7 +208,7 @@ async def generate_title( "stream": False, **( {"max_tokens": 1000} - if models[task_model_id]["owned_by"] == "ollama" + if models[task_model_id].get("owned_by") == "ollama" else { "max_completion_tokens": 1000, } @@ -571,7 +571,7 @@ async def generate_emoji( "stream": False, **( {"max_tokens": 4} - if models[task_model_id]["owned_by"] == "ollama" + if models[task_model_id].get("owned_by") == "ollama" else { "max_completion_tokens": 4, } diff --git a/backend/open_webui/utils/chat.py b/backend/open_webui/utils/chat.py index 569bcad854..335ff838f2 100644 --- a/backend/open_webui/utils/chat.py +++ b/backend/open_webui/utils/chat.py @@ -206,7 +206,7 @@ async def generate_chat_completion( except Exception as e: raise e - if model["owned_by"] == "arena": + if model.get("owned_by") == "arena": model_ids = model.get("info", {}).get("meta", {}).get("model_ids") filter_mode = model.get("info", {}).get("meta", {}).get("filter_mode") if model_ids and filter_mode == "exclude": @@ -259,7 +259,7 @@ async def generate_chat_completion( return await generate_function_chat_completion( request, form_data, user=user, models=models ) - if model["owned_by"] == "ollama": + if model.get("owned_by") == "ollama": # Using /ollama/api/chat endpoint form_data = convert_payload_openai_to_ollama(form_data) response = await generate_ollama_chat_completion( diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 4e4ba8d307..9a62fdbc9f 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -795,7 +795,7 @@ async def process_chat_payload(request, form_data, metadata, user, model): # Workaround for Ollama 2.0+ system prompt issue # TODO: replace with add_or_update_system_message - if model["owned_by"] == "ollama": + if model.get("owned_by") == "ollama": form_data["messages"] = prepend_to_first_user_message_content( rag_template( request.app.state.config.RAG_TEMPLATE, context_string, prompt diff --git a/backend/open_webui/utils/task.py b/backend/open_webui/utils/task.py index 3d8c05d455..5663ce2acb 100644 --- a/backend/open_webui/utils/task.py +++ b/backend/open_webui/utils/task.py @@ -22,7 +22,7 @@ def get_task_model_id( # Set the task model task_model_id = default_model_id # Check if the user has a custom task model and use that model - if models[task_model_id]["owned_by"] == "ollama": + if models[task_model_id].get("owned_by") == "ollama": if task_model and task_model in models: task_model_id = task_model else: From 5e3742f89924068a8b21e4f69ba3c10336f40ff6 Mon Sep 17 00:00:00 2001 From: i-infra <77526490+i-infra@users.noreply.github.com> Date: Sat, 15 Feb 2025 19:42:58 -0500 Subject: [PATCH 016/232] and one last edit --- backend/open_webui/utils/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/open_webui/utils/models.py b/backend/open_webui/utils/models.py index 975f8cb095..a2c0eadcaf 100644 --- a/backend/open_webui/utils/models.py +++ b/backend/open_webui/utils/models.py @@ -142,7 +142,7 @@ async def get_all_models(request): custom_model.base_model_id == model["id"] or custom_model.base_model_id == model["id"].split(":")[0] ): - owned_by = model["owned_by"] + owned_by = model.get("owned_by", "unknown owner") if "pipe" in model: pipe = model["pipe"] break From ab94017e3a5c2c2f7582cf1f716387824861e09c Mon Sep 17 00:00:00 2001 From: Elkana Bardugo Date: Mon, 17 Feb 2025 21:27:05 +0200 Subject: [PATCH 017/232] Auto text direction (fix for RTL) in most cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit to test it use the prompt: "write please a sample headers (level 1,2,3), text (bold/italic), list, numbered list, quetes, table, code write each example in hebrew dont output markdown in code block" need to be fixed: * Redundant border properties in quotes: Removed border-left as border-inline-start properly handles both LTR/RTL and by this prevents double borders in RTL layouts * table header have `text-align: left` instead of start e.g. text-align and direction is not the save thing the text direction is ok. the `dir="auto"` is fixing the direction שמג the text-align by default is set by the direction, if it set manually is should be start/end to respect the direction attribute --- .../chat/Messages/Markdown/MarkdownTokens.svelte | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte b/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte index 8a3d22f735..32a4b9e87c 100644 --- a/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte +++ b/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte @@ -169,14 +169,14 @@ {:else if token.type === 'blockquote'} -
+
{:else if token.type === 'list'} {#if token.ordered}
    {#each token.items as item, itemIdx} -
  1. +
  2. {#if item?.task} {#each token.items as item, itemIdx} -
  3. +
  4. {#if item?.task} {:else if token.type === 'paragraph'} -

    +

    {:else if token.type === 'text'} {#if top} -

    +

    {#if token.tokens} {:else} From b73b8343d58e9a29c4b6c4f991cb4015e4c8d966 Mon Sep 17 00:00:00 2001 From: Elkana Bardugo Date: Mon, 17 Feb 2025 21:40:11 +0200 Subject: [PATCH 018/232] fix table header align on rtl --- src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte b/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte index 32a4b9e87c..8dab96cf2d 100644 --- a/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte +++ b/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte @@ -118,7 +118,7 @@ class="!px-3 !py-1.5 cursor-pointer border border-gray-50 dark:border-gray-850" style={token.align[headerIdx] ? '' : `text-align: ${token.align[headerIdx]}`} > -

    +
    Date: Mon, 17 Feb 2025 21:57:47 +0200 Subject: [PATCH 019/232] fix blockquote border line direction in RTL --- src/app.css | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/app.css b/src/app.css index dadfda78f1..6e6d1d8b16 100644 --- a/src/app.css +++ b/src/app.css @@ -53,11 +53,11 @@ math { } .markdown-prose { - @apply prose dark:prose-invert prose-blockquote:border-gray-100 prose-blockquote:dark:border-gray-800 prose-blockquote:border-l-2 prose-blockquote:not-italic prose-blockquote:font-normal prose-headings:font-semibold prose-hr:my-4 prose-hr:border-gray-100 prose-hr:dark:border-gray-800 prose-p:my-0 prose-img:my-1 prose-headings:my-1 prose-pre:my-0 prose-table:my-0 prose-blockquote:my-0 prose-ul:-my-0 prose-ol:-my-0 prose-li:-my-0 whitespace-pre-line; + @apply prose dark:prose-invert prose-blockquote:border-s-gray-100 prose-blockquote:dark:border-gray-800 prose-blockquote:border-s-2 prose-blockquote:not-italic prose-blockquote:font-normal prose-headings:font-semibold prose-hr:my-4 prose-hr:border-gray-100 prose-hr:dark:border-gray-800 prose-p:my-0 prose-img:my-1 prose-headings:my-1 prose-pre:my-0 prose-table:my-0 prose-blockquote:my-0 prose-ul:-my-0 prose-ol:-my-0 prose-li:-my-0 whitespace-pre-line; } .markdown-prose-xs { - @apply text-xs prose dark:prose-invert prose-blockquote:border-gray-100 prose-blockquote:dark:border-gray-800 prose-blockquote:border-l-2 prose-blockquote:not-italic prose-blockquote:font-normal prose-headings:font-semibold prose-hr:my-0 prose-hr:border-gray-100 prose-hr:dark:border-gray-800 prose-p:my-0 prose-img:my-1 prose-headings:my-1 prose-pre:my-0 prose-table:my-0 prose-blockquote:my-0 prose-ul:-my-0 prose-ol:-my-0 prose-li:-my-0 whitespace-pre-line; + @apply text-xs prose dark:prose-invert prose-blockquote:border-s-gray-100 prose-blockquote:dark:border-gray-800 prose-blockquote:border-s-2 prose-blockquote:not-italic prose-blockquote:font-normal prose-headings:font-semibold prose-hr:my-0 prose-hr:border-gray-100 prose-hr:dark:border-gray-800 prose-p:my-0 prose-img:my-1 prose-headings:my-1 prose-pre:my-0 prose-table:my-0 prose-blockquote:my-0 prose-ul:-my-0 prose-ol:-my-0 prose-li:-my-0 whitespace-pre-line; } .markdown a { From 0888ddde7ccb201dae493c13378b7987ccbfa3fc Mon Sep 17 00:00:00 2001 From: Elkana Bardugo Date: Mon, 17 Feb 2025 22:03:03 +0200 Subject: [PATCH 020/232] Fix card title direction and alignment (for RTL) --- src/lib/components/layout/Sidebar/ChatItem.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/layout/Sidebar/ChatItem.svelte b/src/lib/components/layout/Sidebar/ChatItem.svelte index 1e92240937..7088fbd884 100644 --- a/src/lib/components/layout/Sidebar/ChatItem.svelte +++ b/src/lib/components/layout/Sidebar/ChatItem.svelte @@ -274,7 +274,7 @@ draggable="false" >
    -
    +
    {title}
    From d26569f63660629d9a596b058cdcb73fce90e079 Mon Sep 17 00:00:00 2001 From: Elkana Bardugo Date: Mon, 17 Feb 2025 22:42:37 +0200 Subject: [PATCH 021/232] Update ChatItem.svelte: Revert text-align for download button in rtl prevent button overriding because the menu button is in absolute position and it doesn't have start/end properties --- src/lib/components/layout/Sidebar/ChatItem.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/layout/Sidebar/ChatItem.svelte b/src/lib/components/layout/Sidebar/ChatItem.svelte index 2acf290450..c622b27326 100644 --- a/src/lib/components/layout/Sidebar/ChatItem.svelte +++ b/src/lib/components/layout/Sidebar/ChatItem.svelte @@ -274,7 +274,7 @@ draggable="false" >
    -
    +
    {title}
    From 8d149348a02d79a5f4f9aaea6fc44623d665f115 Mon Sep 17 00:00:00 2001 From: Elkana Bardugo Date: Mon, 17 Feb 2025 22:43:34 +0200 Subject: [PATCH 022/232] Update MarkdownTokens.svelte: Revert text-align for download button in rtl prevent button overriding because the download button is in absolute position and it doesn't have start/end properties --- src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte b/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte index e0aa4dd2ec..36cac4d17a 100644 --- a/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte +++ b/src/lib/components/chat/Messages/Markdown/MarkdownTokens.svelte @@ -118,7 +118,7 @@ class="px-3! py-1.5! cursor-pointer border border-gray-100 dark:border-gray-850" style={token.align[headerIdx] ? '' : `text-align: ${token.align[headerIdx]}`} > -
    +
    Date: Mon, 17 Feb 2025 16:25:50 -0800 Subject: [PATCH 023/232] enh: code execution settings --- backend/open_webui/config.py | 52 +++- backend/open_webui/main.py | 20 +- backend/open_webui/routers/configs.py | 38 ++- backend/open_webui/routers/utils.py | 52 +++- src/lib/apis/configs/index.ts | 8 +- src/lib/apis/utils/index.ts | 56 +++- src/lib/components/admin/Settings.svelte | 12 +- .../admin/Settings/CodeExecution.svelte | 257 ++++++++++++++++++ .../admin/Settings/CodeInterpreter.svelte | 166 ----------- .../components/admin/Settings/General.svelte | 4 +- .../components/chat/Messages/CodeBlock.svelte | 18 +- .../components/chat/Settings/Account.svelte | 2 +- src/lib/components/common/CodeEditor.svelte | 2 +- src/lib/components/layout/Navbar/Menu.svelte | 2 +- .../components/layout/Sidebar/ChatMenu.svelte | 2 +- 15 files changed, 478 insertions(+), 213 deletions(-) create mode 100644 src/lib/components/admin/Settings/CodeExecution.svelte delete mode 100644 src/lib/components/admin/Settings/CodeInterpreter.svelte diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index d62ac3114e..f34c78dd8a 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1377,6 +1377,39 @@ Responses from models: {{responses}}""" # Code Interpreter #################################### + +CODE_EXECUTION_ENGINE = PersistentConfig( + "CODE_EXECUTION_ENGINE", + "code_execution.engine", + os.environ.get("CODE_EXECUTION_ENGINE", "pyodide"), +) + +CODE_EXECUTION_JUPYTER_URL = PersistentConfig( + "CODE_EXECUTION_JUPYTER_URL", + "code_execution.jupyter.url", + os.environ.get("CODE_EXECUTION_JUPYTER_URL", ""), +) + +CODE_EXECUTION_JUPYTER_AUTH = PersistentConfig( + "CODE_EXECUTION_JUPYTER_AUTH", + "code_execution.jupyter.auth", + os.environ.get("CODE_EXECUTION_JUPYTER_AUTH", ""), +) + +CODE_EXECUTION_JUPYTER_AUTH_TOKEN = PersistentConfig( + "CODE_EXECUTION_JUPYTER_AUTH_TOKEN", + "code_execution.jupyter.auth_token", + os.environ.get("CODE_EXECUTION_JUPYTER_AUTH_TOKEN", ""), +) + + +CODE_EXECUTION_JUPYTER_AUTH_PASSWORD = PersistentConfig( + "CODE_EXECUTION_JUPYTER_AUTH_PASSWORD", + "code_execution.jupyter.auth_password", + os.environ.get("CODE_EXECUTION_JUPYTER_AUTH_PASSWORD", ""), +) + + ENABLE_CODE_INTERPRETER = PersistentConfig( "ENABLE_CODE_INTERPRETER", "code_interpreter.enable", @@ -1398,26 +1431,37 @@ CODE_INTERPRETER_PROMPT_TEMPLATE = PersistentConfig( CODE_INTERPRETER_JUPYTER_URL = PersistentConfig( "CODE_INTERPRETER_JUPYTER_URL", "code_interpreter.jupyter.url", - os.environ.get("CODE_INTERPRETER_JUPYTER_URL", ""), + os.environ.get( + "CODE_INTERPRETER_JUPYTER_URL", os.environ.get("CODE_EXECUTION_JUPYTER_URL", "") + ), ) CODE_INTERPRETER_JUPYTER_AUTH = PersistentConfig( "CODE_INTERPRETER_JUPYTER_AUTH", "code_interpreter.jupyter.auth", - os.environ.get("CODE_INTERPRETER_JUPYTER_AUTH", ""), + os.environ.get( + "CODE_INTERPRETER_JUPYTER_AUTH", + os.environ.get("CODE_EXECUTION_JUPYTER_AUTH", ""), + ), ) CODE_INTERPRETER_JUPYTER_AUTH_TOKEN = PersistentConfig( "CODE_INTERPRETER_JUPYTER_AUTH_TOKEN", "code_interpreter.jupyter.auth_token", - os.environ.get("CODE_INTERPRETER_JUPYTER_AUTH_TOKEN", ""), + os.environ.get( + "CODE_INTERPRETER_JUPYTER_AUTH_TOKEN", + os.environ.get("CODE_EXECUTION_JUPYTER_AUTH_TOKEN", ""), + ), ) CODE_INTERPRETER_JUPYTER_AUTH_PASSWORD = PersistentConfig( "CODE_INTERPRETER_JUPYTER_AUTH_PASSWORD", "code_interpreter.jupyter.auth_password", - os.environ.get("CODE_INTERPRETER_JUPYTER_AUTH_PASSWORD", ""), + os.environ.get( + "CODE_INTERPRETER_JUPYTER_AUTH_PASSWORD", + os.environ.get("CODE_EXECUTION_JUPYTER_AUTH_PASSWORD", ""), + ), ) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index a6fa6bd8c4..19ed898804 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -100,7 +100,12 @@ from open_webui.config import ( OPENAI_API_CONFIGS, # Direct Connections ENABLE_DIRECT_CONNECTIONS, - # Code Interpreter + # Code Execution + CODE_EXECUTION_ENGINE, + CODE_EXECUTION_JUPYTER_URL, + CODE_EXECUTION_JUPYTER_AUTH, + CODE_EXECUTION_JUPYTER_AUTH_TOKEN, + CODE_EXECUTION_JUPYTER_AUTH_PASSWORD, ENABLE_CODE_INTERPRETER, CODE_INTERPRETER_ENGINE, CODE_INTERPRETER_PROMPT_TEMPLATE, @@ -613,10 +618,18 @@ app.state.EMBEDDING_FUNCTION = get_embedding_function( ######################################## # -# CODE INTERPRETER +# CODE EXECUTION # ######################################## +app.state.config.CODE_EXECUTION_ENGINE = CODE_EXECUTION_ENGINE +app.state.config.CODE_EXECUTION_JUPYTER_URL = CODE_EXECUTION_JUPYTER_URL +app.state.config.CODE_EXECUTION_JUPYTER_AUTH = CODE_EXECUTION_JUPYTER_AUTH +app.state.config.CODE_EXECUTION_JUPYTER_AUTH_TOKEN = CODE_EXECUTION_JUPYTER_AUTH_TOKEN +app.state.config.CODE_EXECUTION_JUPYTER_AUTH_PASSWORD = ( + CODE_EXECUTION_JUPYTER_AUTH_PASSWORD +) + app.state.config.ENABLE_CODE_INTERPRETER = ENABLE_CODE_INTERPRETER app.state.config.CODE_INTERPRETER_ENGINE = CODE_INTERPRETER_ENGINE app.state.config.CODE_INTERPRETER_PROMPT_TEMPLATE = CODE_INTERPRETER_PROMPT_TEMPLATE @@ -1120,6 +1133,9 @@ async def get_app_config(request: Request): { "default_models": app.state.config.DEFAULT_MODELS, "default_prompt_suggestions": app.state.config.DEFAULT_PROMPT_SUGGESTIONS, + "code": { + "engine": app.state.config.CODE_EXECUTION_ENGINE, + }, "audio": { "tts": { "engine": app.state.config.TTS_ENGINE, diff --git a/backend/open_webui/routers/configs.py b/backend/open_webui/routers/configs.py index 016075234a..d460ae670f 100644 --- a/backend/open_webui/routers/configs.py +++ b/backend/open_webui/routers/configs.py @@ -70,6 +70,11 @@ async def set_direct_connections_config( # CodeInterpreterConfig ############################ class CodeInterpreterConfigForm(BaseModel): + CODE_EXECUTION_ENGINE: str + CODE_EXECUTION_JUPYTER_URL: Optional[str] + CODE_EXECUTION_JUPYTER_AUTH: Optional[str] + CODE_EXECUTION_JUPYTER_AUTH_TOKEN: Optional[str] + CODE_EXECUTION_JUPYTER_AUTH_PASSWORD: Optional[str] ENABLE_CODE_INTERPRETER: bool CODE_INTERPRETER_ENGINE: str CODE_INTERPRETER_PROMPT_TEMPLATE: Optional[str] @@ -79,9 +84,14 @@ class CodeInterpreterConfigForm(BaseModel): CODE_INTERPRETER_JUPYTER_AUTH_PASSWORD: Optional[str] -@router.get("/code_interpreter", response_model=CodeInterpreterConfigForm) -async def get_code_interpreter_config(request: Request, user=Depends(get_admin_user)): +@router.get("/code_execution", response_model=CodeInterpreterConfigForm) +async def get_code_execution_config(request: Request, user=Depends(get_admin_user)): return { + "CODE_EXECUTION_ENGINE": request.app.state.config.CODE_EXECUTION_ENGINE, + "CODE_EXECUTION_JUPYTER_URL": request.app.state.config.CODE_EXECUTION_JUPYTER_URL, + "CODE_EXECUTION_JUPYTER_AUTH": request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH, + "CODE_EXECUTION_JUPYTER_AUTH_TOKEN": request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH_TOKEN, + "CODE_EXECUTION_JUPYTER_AUTH_PASSWORD": request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH_PASSWORD, "ENABLE_CODE_INTERPRETER": request.app.state.config.ENABLE_CODE_INTERPRETER, "CODE_INTERPRETER_ENGINE": request.app.state.config.CODE_INTERPRETER_ENGINE, "CODE_INTERPRETER_PROMPT_TEMPLATE": request.app.state.config.CODE_INTERPRETER_PROMPT_TEMPLATE, @@ -92,10 +102,25 @@ async def get_code_interpreter_config(request: Request, user=Depends(get_admin_u } -@router.post("/code_interpreter", response_model=CodeInterpreterConfigForm) -async def set_code_interpreter_config( +@router.post("/code_execution", response_model=CodeInterpreterConfigForm) +async def set_code_execution_config( request: Request, form_data: CodeInterpreterConfigForm, user=Depends(get_admin_user) ): + + request.app.state.config.CODE_EXECUTION_ENGINE = form_data.CODE_EXECUTION_ENGINE + request.app.state.config.CODE_EXECUTION_JUPYTER_URL = ( + form_data.CODE_EXECUTION_JUPYTER_URL + ) + request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH = ( + form_data.CODE_EXECUTION_JUPYTER_AUTH + ) + request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH_TOKEN = ( + form_data.CODE_EXECUTION_JUPYTER_AUTH_TOKEN + ) + request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH_PASSWORD = ( + form_data.CODE_EXECUTION_JUPYTER_AUTH_PASSWORD + ) + request.app.state.config.ENABLE_CODE_INTERPRETER = form_data.ENABLE_CODE_INTERPRETER request.app.state.config.CODE_INTERPRETER_ENGINE = form_data.CODE_INTERPRETER_ENGINE request.app.state.config.CODE_INTERPRETER_PROMPT_TEMPLATE = ( @@ -118,6 +143,11 @@ async def set_code_interpreter_config( ) return { + "CODE_EXECUTION_ENGINE": request.app.state.config.CODE_EXECUTION_ENGINE, + "CODE_EXECUTION_JUPYTER_URL": request.app.state.config.CODE_EXECUTION_JUPYTER_URL, + "CODE_EXECUTION_JUPYTER_AUTH": request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH, + "CODE_EXECUTION_JUPYTER_AUTH_TOKEN": request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH_TOKEN, + "CODE_EXECUTION_JUPYTER_AUTH_PASSWORD": request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH_PASSWORD, "ENABLE_CODE_INTERPRETER": request.app.state.config.ENABLE_CODE_INTERPRETER, "CODE_INTERPRETER_ENGINE": request.app.state.config.CODE_INTERPRETER_ENGINE, "CODE_INTERPRETER_PROMPT_TEMPLATE": request.app.state.config.CODE_INTERPRETER_PROMPT_TEMPLATE, diff --git a/backend/open_webui/routers/utils.py b/backend/open_webui/routers/utils.py index ea73e97596..61863bda58 100644 --- a/backend/open_webui/routers/utils.py +++ b/backend/open_webui/routers/utils.py @@ -4,45 +4,75 @@ import markdown from open_webui.models.chats import ChatTitleMessagesForm from open_webui.config import DATA_DIR, ENABLE_ADMIN_EXPORT from open_webui.constants import ERROR_MESSAGES -from fastapi import APIRouter, Depends, HTTPException, Response, status +from fastapi import APIRouter, Depends, HTTPException, Request, Response, status from pydantic import BaseModel from starlette.responses import FileResponse + + from open_webui.utils.misc import get_gravatar_url from open_webui.utils.pdf_generator import PDFGenerator -from open_webui.utils.auth import get_admin_user +from open_webui.utils.auth import get_admin_user, get_verified_user +from open_webui.utils.code_interpreter import execute_code_jupyter + router = APIRouter() @router.get("/gravatar") -async def get_gravatar( - email: str, -): +async def get_gravatar(email: str, user=Depends(get_verified_user)): return get_gravatar_url(email) -class CodeFormatRequest(BaseModel): +class CodeForm(BaseModel): code: str @router.post("/code/format") -async def format_code(request: CodeFormatRequest): +async def format_code(form_data: CodeForm, user=Depends(get_verified_user)): try: - formatted_code = black.format_str(request.code, mode=black.Mode()) + formatted_code = black.format_str(form_data.code, mode=black.Mode()) return {"code": formatted_code} except black.NothingChanged: - return {"code": request.code} + return {"code": form_data.code} except Exception as e: raise HTTPException(status_code=400, detail=str(e)) +@router.post("/code/execute") +async def execute_code( + request: Request, form_data: CodeForm, user=Depends(get_verified_user) +): + if request.app.state.config.CODE_EXECUTION_ENGINE == "jupyter": + output = await execute_code_jupyter( + request.app.state.config.CODE_EXECUTION_JUPYTER_URL, + form_data.code, + ( + request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH_TOKEN + if request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH == "token" + else None + ), + ( + request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH_PASSWORD + if request.app.state.config.CODE_EXECUTION_JUPYTER_AUTH == "password" + else None + ), + ) + + return output + else: + raise HTTPException( + status_code=400, + detail="Code execution engine not supported", + ) + + class MarkdownForm(BaseModel): md: str @router.post("/markdown") async def get_html_from_markdown( - form_data: MarkdownForm, + form_data: MarkdownForm, user=Depends(get_verified_user) ): return {"html": markdown.markdown(form_data.md)} @@ -54,7 +84,7 @@ class ChatForm(BaseModel): @router.post("/pdf") async def download_chat_as_pdf( - form_data: ChatTitleMessagesForm, + form_data: ChatTitleMessagesForm, user=Depends(get_verified_user) ): try: pdf_bytes = PDFGenerator(form_data).generate_chat_pdf() diff --git a/src/lib/apis/configs/index.ts b/src/lib/apis/configs/index.ts index d7f02564ce..f7f02c7405 100644 --- a/src/lib/apis/configs/index.ts +++ b/src/lib/apis/configs/index.ts @@ -115,10 +115,10 @@ export const setDirectConnectionsConfig = async (token: string, config: object) return res; }; -export const getCodeInterpreterConfig = async (token: string) => { +export const getCodeExecutionConfig = async (token: string) => { let error = null; - const res = await fetch(`${WEBUI_API_BASE_URL}/configs/code_interpreter`, { + const res = await fetch(`${WEBUI_API_BASE_URL}/configs/code_execution`, { method: 'GET', headers: { 'Content-Type': 'application/json', @@ -142,10 +142,10 @@ export const getCodeInterpreterConfig = async (token: string) => { return res; }; -export const setCodeInterpreterConfig = async (token: string, config: object) => { +export const setCodeExecutionConfig = async (token: string, config: object) => { let error = null; - const res = await fetch(`${WEBUI_API_BASE_URL}/configs/code_interpreter`, { + const res = await fetch(`${WEBUI_API_BASE_URL}/configs/code_execution`, { method: 'POST', headers: { 'Content-Type': 'application/json', diff --git a/src/lib/apis/utils/index.ts b/src/lib/apis/utils/index.ts index 40fdbfcfa2..64db561249 100644 --- a/src/lib/apis/utils/index.ts +++ b/src/lib/apis/utils/index.ts @@ -1,12 +1,13 @@ import { WEBUI_API_BASE_URL } from '$lib/constants'; -export const getGravatarUrl = async (email: string) => { +export const getGravatarUrl = async (token: string, email: string) => { let error = null; const res = await fetch(`${WEBUI_API_BASE_URL}/utils/gravatar?email=${email}`, { method: 'GET', headers: { - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` } }) .then(async (res) => { @@ -22,13 +23,14 @@ export const getGravatarUrl = async (email: string) => { return res; }; -export const formatPythonCode = async (code: string) => { +export const executeCode = async (token: string, code: string) => { let error = null; - const res = await fetch(`${WEBUI_API_BASE_URL}/utils/code/format`, { + const res = await fetch(`${WEBUI_API_BASE_URL}/utils/code/execute`, { method: 'POST', headers: { - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` }, body: JSON.stringify({ code: code @@ -55,13 +57,48 @@ export const formatPythonCode = async (code: string) => { return res; }; -export const downloadChatAsPDF = async (title: string, messages: object[]) => { +export const formatPythonCode = async (token: string, code: string) => { + let error = null; + + const res = await fetch(`${WEBUI_API_BASE_URL}/utils/code/format`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + code: code + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + + error = err; + if (err.detail) { + error = err.detail; + } + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const downloadChatAsPDF = async (token: string, title: string, messages: object[]) => { let error = null; const blob = await fetch(`${WEBUI_API_BASE_URL}/utils/pdf`, { method: 'POST', headers: { - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` }, body: JSON.stringify({ title: title, @@ -81,13 +118,14 @@ export const downloadChatAsPDF = async (title: string, messages: object[]) => { return blob; }; -export const getHTMLFromMarkdown = async (md: string) => { +export const getHTMLFromMarkdown = async (token: string, md: string) => { let error = null; const res = await fetch(`${WEBUI_API_BASE_URL}/utils/markdown`, { method: 'POST', headers: { - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` }, body: JSON.stringify({ md: md diff --git a/src/lib/components/admin/Settings.svelte b/src/lib/components/admin/Settings.svelte index 415e4377a7..60edbd25a0 100644 --- a/src/lib/components/admin/Settings.svelte +++ b/src/lib/components/admin/Settings.svelte @@ -19,7 +19,7 @@ import ChartBar from '../icons/ChartBar.svelte'; import DocumentChartBar from '../icons/DocumentChartBar.svelte'; import Evaluations from './Settings/Evaluations.svelte'; - import CodeInterpreter from './Settings/CodeInterpreter.svelte'; + import CodeExecution from './Settings/CodeExecution.svelte'; const i18n = getContext('i18n'); @@ -191,11 +191,11 @@