From 35ea29b184ca4cf5b96a27429dfc62521873a2d3 Mon Sep 17 00:00:00 2001 From: Jan Kessler Date: Wed, 2 Apr 2025 21:50:00 +0200 Subject: [PATCH 01/66] prepare websocket redis sentinel code for upcoming native support of sentinel in python-socketio --- backend/open_webui/socket/main.py | 14 +----- backend/open_webui/utils/redis.py | 73 ++++--------------------------- 2 files changed, 11 insertions(+), 76 deletions(-) diff --git a/backend/open_webui/socket/main.py b/backend/open_webui/socket/main.py index 83dd74fff1..c1ce42c795 100644 --- a/backend/open_webui/socket/main.py +++ b/backend/open_webui/socket/main.py @@ -9,9 +9,8 @@ from open_webui.models.users import Users, UserNameResponse from open_webui.models.channels import Channels from open_webui.models.chats import Chats from open_webui.utils.redis import ( - parse_redis_sentinel_url, get_sentinels_from_env, - AsyncRedisSentinelManager, + get_sentinel_url_from_env, ) from open_webui.env import ( @@ -38,16 +37,7 @@ log.setLevel(SRC_LOG_LEVELS["SOCKET"]) if WEBSOCKET_MANAGER == "redis": if WEBSOCKET_SENTINEL_HOSTS: - redis_config = parse_redis_sentinel_url(WEBSOCKET_REDIS_URL) - mgr = AsyncRedisSentinelManager( - WEBSOCKET_SENTINEL_HOSTS.split(","), - sentinel_port=int(WEBSOCKET_SENTINEL_PORT), - redis_port=redis_config["port"], - service=redis_config["service"], - db=redis_config["db"], - username=redis_config["username"], - password=redis_config["password"], - ) + mgr = socketio.AsyncRedisManager(get_sentinel_url_from_env(WEBSOCKET_REDIS_URL, WEBSOCKET_SENTINEL_HOSTS, WEBSOCKET_SENTINEL_PORT)) else: mgr = socketio.AsyncRedisManager(WEBSOCKET_REDIS_URL) sio = socketio.AsyncServer( diff --git a/backend/open_webui/utils/redis.py b/backend/open_webui/utils/redis.py index baccb16ad6..715ac0d9ba 100644 --- a/backend/open_webui/utils/redis.py +++ b/backend/open_webui/utils/redis.py @@ -4,7 +4,7 @@ from redis import asyncio as aioredis from urllib.parse import urlparse -def parse_redis_sentinel_url(redis_url): +def parse_redis_service_url(redis_url): parsed_url = urlparse(redis_url) if parsed_url.scheme != "redis": raise ValueError("Invalid Redis URL scheme. Must be 'redis'.") @@ -20,7 +20,7 @@ def parse_redis_sentinel_url(redis_url): def get_redis_connection(redis_url, redis_sentinels, decode_responses=True): if redis_sentinels: - redis_config = parse_redis_sentinel_url(redis_url) + redis_config = parse_redis_service_url(redis_url) sentinel = redis.sentinel.Sentinel( redis_sentinels, port=redis_config["port"], @@ -45,65 +45,10 @@ def get_sentinels_from_env(sentinel_hosts_env, sentinel_port_env): return [] -class AsyncRedisSentinelManager(socketio.AsyncRedisManager): - def __init__( - self, - sentinel_hosts, - sentinel_port=26379, - redis_port=6379, - service="mymaster", - db=0, - username=None, - password=None, - channel="socketio", - write_only=False, - logger=None, - redis_options=None, - ): - """ - Initialize the Redis Sentinel Manager. - This implementation mostly replicates the __init__ of AsyncRedisManager and - overrides _redis_connect() with a version that uses Redis Sentinel - - :param sentinel_hosts: List of Sentinel hosts - :param sentinel_port: Sentinel Port - :param redis_port: Redis Port (currently unsupported by aioredis!) - :param service: Master service name in Sentinel - :param db: Redis database to use - :param username: Redis username (if any) (currently unsupported by aioredis!) - :param password: Redis password (if any) - :param channel: The channel name on which the server sends and receives - notifications. Must be the same in all the servers. - :param write_only: If set to ``True``, only initialize to emit events. The - default of ``False`` initializes the class for emitting - and receiving. - :param redis_options: additional keyword arguments to be passed to - ``aioredis.from_url()``. - """ - self._sentinels = [(host, sentinel_port) for host in sentinel_hosts] - self._redis_port = redis_port - self._service = service - self._db = db - self._username = username - self._password = password - self._channel = channel - self.redis_options = redis_options or {} - - # connect and call grandparent constructor - self._redis_connect() - super(socketio.AsyncRedisManager, self).__init__( - channel=channel, write_only=write_only, logger=logger - ) - - def _redis_connect(self): - """Establish connections to Redis through Sentinel.""" - sentinel = aioredis.sentinel.Sentinel( - self._sentinels, - port=self._redis_port, - db=self._db, - password=self._password, - **self.redis_options, - ) - - self.redis = sentinel.master_for(self._service) - self.pubsub = self.redis.pubsub(ignore_subscribe_messages=True) +def get_sentinel_url_from_env(redis_url, sentinel_hosts_env, sentinel_port_env): + redis_config = parse_redis_service_url(redis_url) + username = redis_config["username"] or "" + password = redis_config["password"] or "" + auth_part = f"{username}:{password}" + hosts_part = ",".join(f"{host}:{sentinel_port_env}" for host in sentinel_hosts_env.split(",")) + return f"redis+sentinel://{auth_part}@{hosts_part}/{redis_config['db']}/{redis_config['service']}" From 257ca454569f1c4bd936a039c751f170ab3c6c38 Mon Sep 17 00:00:00 2001 From: Jan Kessler Date: Thu, 3 Apr 2025 08:24:24 +0200 Subject: [PATCH 02/66] leave out @ in redis+sentine url when no username/password is provided --- backend/open_webui/utils/redis.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/utils/redis.py b/backend/open_webui/utils/redis.py index 715ac0d9ba..24d3eefc7a 100644 --- a/backend/open_webui/utils/redis.py +++ b/backend/open_webui/utils/redis.py @@ -49,6 +49,8 @@ def get_sentinel_url_from_env(redis_url, sentinel_hosts_env, sentinel_port_env): redis_config = parse_redis_service_url(redis_url) username = redis_config["username"] or "" password = redis_config["password"] or "" - auth_part = f"{username}:{password}" + auth_part = "" + if username or password: + auth_part = f"{username}:{password}@" hosts_part = ",".join(f"{host}:{sentinel_port_env}" for host in sentinel_hosts_env.split(",")) - return f"redis+sentinel://{auth_part}@{hosts_part}/{redis_config['db']}/{redis_config['service']}" + return f"redis+sentinel://{auth_part}{hosts_part}/{redis_config['db']}/{redis_config['service']}" From 59dd79815651625e84097365f492223d077413fa Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Fri, 11 Apr 2025 14:46:02 -0700 Subject: [PATCH 03/66] refac/security: set ac to private by default --- src/lib/components/AddServerModal.svelte | 2 +- src/lib/components/layout/Sidebar/ChannelModal.svelte | 2 +- .../components/workspace/Knowledge/CreateKnowledgeBase.svelte | 2 +- src/lib/components/workspace/Prompts/PromptEditor.svelte | 2 +- src/lib/components/workspace/Tools/ToolkitEditor.svelte | 2 +- src/lib/components/workspace/common/AccessControl.svelte | 2 +- src/lib/components/workspace/common/AccessControlModal.svelte | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/lib/components/AddServerModal.svelte b/src/lib/components/AddServerModal.svelte index 1ce7369e44..a5f0ca5c71 100644 --- a/src/lib/components/AddServerModal.svelte +++ b/src/lib/components/AddServerModal.svelte @@ -35,7 +35,7 @@ let auth_type = 'bearer'; let key = ''; - let accessControl = null; + let accessControl = {}; let enable = true; diff --git a/src/lib/components/layout/Sidebar/ChannelModal.svelte b/src/lib/components/layout/Sidebar/ChannelModal.svelte index 87492b84d0..8ae65d2207 100644 --- a/src/lib/components/layout/Sidebar/ChannelModal.svelte +++ b/src/lib/components/layout/Sidebar/ChannelModal.svelte @@ -19,7 +19,7 @@ export let edit = false; let name = ''; - let accessControl = null; + let accessControl = {}; let loading = false; diff --git a/src/lib/components/workspace/Knowledge/CreateKnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/CreateKnowledgeBase.svelte index fefbbefcda..e7c1248f58 100644 --- a/src/lib/components/workspace/Knowledge/CreateKnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/CreateKnowledgeBase.svelte @@ -12,7 +12,7 @@ let name = ''; let description = ''; - let accessControl = null; + let accessControl = {}; const submitHandler = async () => { loading = true; diff --git a/src/lib/components/workspace/Prompts/PromptEditor.svelte b/src/lib/components/workspace/Prompts/PromptEditor.svelte index 4abe5c067e..6a29d03b23 100644 --- a/src/lib/components/workspace/Prompts/PromptEditor.svelte +++ b/src/lib/components/workspace/Prompts/PromptEditor.svelte @@ -21,7 +21,7 @@ let command = ''; let content = ''; - let accessControl = null; + let accessControl = {}; let showAccessControlModal = false; diff --git a/src/lib/components/workspace/Tools/ToolkitEditor.svelte b/src/lib/components/workspace/Tools/ToolkitEditor.svelte index 6057be6cb5..ecfb2144f5 100644 --- a/src/lib/components/workspace/Tools/ToolkitEditor.svelte +++ b/src/lib/components/workspace/Tools/ToolkitEditor.svelte @@ -30,7 +30,7 @@ description: '' }; export let content = ''; - export let accessControl = null; + export let accessControl = {}; let _content = ''; diff --git a/src/lib/components/workspace/common/AccessControl.svelte b/src/lib/components/workspace/common/AccessControl.svelte index 9c3e0dd8b2..78feb9facd 100644 --- a/src/lib/components/workspace/common/AccessControl.svelte +++ b/src/lib/components/workspace/common/AccessControl.svelte @@ -13,7 +13,7 @@ export let onChange: Function = () => {}; export let accessRoles = ['read']; - export let accessControl = null; + export let accessControl = {}; export let allowPublic = true; diff --git a/src/lib/components/workspace/common/AccessControlModal.svelte b/src/lib/components/workspace/common/AccessControlModal.svelte index d694082630..41f0083cfd 100644 --- a/src/lib/components/workspace/common/AccessControlModal.svelte +++ b/src/lib/components/workspace/common/AccessControlModal.svelte @@ -6,7 +6,7 @@ import AccessControl from './AccessControl.svelte'; export let show = false; - export let accessControl = null; + export let accessControl = {}; export let accessRoles = ['read']; export let allowPublic = true; From c846a550fa6ad2591c9ce471da96ee934ce11028 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Fri, 11 Apr 2025 15:19:47 -0700 Subject: [PATCH 04/66] refac --- backend/open_webui/utils/task.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/open_webui/utils/task.py b/backend/open_webui/utils/task.py index 3a8b4b0a42..66bdb4b3e2 100644 --- a/backend/open_webui/utils/task.py +++ b/backend/open_webui/utils/task.py @@ -152,6 +152,8 @@ def rag_template(template: str, context: str, query: str): if template.strip() == "": template = DEFAULT_RAG_TEMPLATE + template = prompt_template(template) + if "[context]" not in template and "{{CONTEXT}}" not in template: log.debug( "WARNING: The RAG template does not contain the '[context]' or '{{CONTEXT}}' placeholder." From c5636ff68c4e9ca095cd6458cc740f4a3aa53831 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Fri, 11 Apr 2025 15:27:25 -0700 Subject: [PATCH 05/66] refac --- .../MessageInput/Commands/Knowledge.svelte | 10 +++++++++- .../components/chat/Messages/Citations.svelte | 14 +++++++++++--- .../chat/Messages/CitationsModal.svelte | 10 +++++++++- src/lib/components/common/FileItem.svelte | 18 +++++++++++------- .../workspace/Knowledge/KnowledgeBase.svelte | 10 +++++++++- 5 files changed, 49 insertions(+), 13 deletions(-) diff --git a/src/lib/components/chat/MessageInput/Commands/Knowledge.svelte b/src/lib/components/chat/MessageInput/Commands/Knowledge.svelte index 3ce444655d..bae077b9b8 100644 --- a/src/lib/components/chat/MessageInput/Commands/Knowledge.svelte +++ b/src/lib/components/chat/MessageInput/Commands/Knowledge.svelte @@ -154,6 +154,14 @@ keys: ['name', 'description'] }); }); + + const decodeString = (str: string) => { + try { + return decodeURIComponent(str); + } catch (e) { + return str; + } + }; {#if filteredItems.length > 0 || prompt.split(' ')?.at(0)?.substring(1).startsWith('http')} @@ -210,7 +218,7 @@ {/if}
- {decodeURIComponent(item?.name)} + {decodeString(item?.name)}
diff --git a/src/lib/components/chat/Messages/Citations.svelte b/src/lib/components/chat/Messages/Citations.svelte index 5c6ebbc106..8c2fbf799c 100644 --- a/src/lib/components/chat/Messages/Citations.svelte +++ b/src/lib/components/chat/Messages/Citations.svelte @@ -87,6 +87,14 @@ showRelevance = calculateShowRelevance(citations); showPercentage = shouldShowPercentage(citations); } + + const decodeString = (str: string) => { + try { + return decodeURIComponent(str); + } catch (e) { + return str; + } + }; - {decodeURIComponent(citation.source.name)} + {decodeString(citation.source.name)} {/each} @@ -157,7 +165,7 @@ {/if}
- {decodeURIComponent(citation.source.name)} + {decodeString(citation.source.name)}
{/each} @@ -194,7 +202,7 @@ {/if}
- {decodeURIComponent(citation.source.name)} + {decodeString(citation.source.name)}
{/each} diff --git a/src/lib/components/chat/Messages/CitationsModal.svelte b/src/lib/components/chat/Messages/CitationsModal.svelte index c77a8193b4..174d80c4fb 100644 --- a/src/lib/components/chat/Messages/CitationsModal.svelte +++ b/src/lib/components/chat/Messages/CitationsModal.svelte @@ -45,6 +45,14 @@ ); } } + + const decodeString = (str: string) => { + try { + return decodeURIComponent(str); + } catch (e) { + return str; + } + }; @@ -99,7 +107,7 @@ : `#`} target="_blank" > - {decodeURIComponent(document?.metadata?.name ?? document.source.name)} + {decodeString(document?.metadata?.name ?? document.source.name)} {#if document?.metadata?.page} diff --git a/src/lib/components/common/FileItem.svelte b/src/lib/components/common/FileItem.svelte index 772b078584..fda00046a7 100644 --- a/src/lib/components/common/FileItem.svelte +++ b/src/lib/components/common/FileItem.svelte @@ -28,6 +28,14 @@ import { deleteFileById } from '$lib/apis/files'; let showModal = false; + + const decodeString = (str: string) => { + try { + return decodeURIComponent(str); + } catch (e) { + return str; + } + }; {#if item} @@ -82,7 +90,7 @@ {#if !small}
- {decodeURIComponent(name)} + {decodeString(name)}
@@ -101,11 +109,7 @@
{:else} - +
{#if loading} @@ -113,7 +117,7 @@
{/if} -
{decodeURIComponent(name)}
+
{decodeString(name)}
{formatFileSize(size)}
diff --git a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte index c6f47e8def..dc0e354eca 100644 --- a/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte +++ b/src/lib/components/workspace/Knowledge/KnowledgeBase.svelte @@ -547,6 +547,14 @@ dropZone?.removeEventListener('drop', onDrop); dropZone?.removeEventListener('dragleave', onDragLeave); }); + + const decodeString = (str: string) => { + try { + return decodeURIComponent(str); + } catch (e) { + return str; + } + }; {#if dragged} @@ -698,7 +706,7 @@ href={selectedFile.id ? `/api/v1/files/${selectedFile.id}/content` : '#'} target="_blank" > - {decodeURIComponent(selectedFile?.meta?.name)} + {decodeString(selectedFile?.meta?.name)} From 30d02c638c8a51449da21c09c830245232e6c0c1 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Fri, 11 Apr 2025 15:34:21 -0700 Subject: [PATCH 06/66] refac: banners --- src/lib/components/chat/Chat.svelte | 50 +---- src/lib/components/chat/Navbar.svelte | 281 +++++++++++++++----------- 2 files changed, 169 insertions(+), 162 deletions(-) diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte index cbd6a72c41..4f40c6b13e 100644 --- a/src/lib/components/chat/Chat.svelte +++ b/src/lib/components/chat/Chat.svelte @@ -1957,61 +1957,13 @@ timestamp: Date.now() } }} + {history} title={$chatTitle} bind:selectedModels shareEnabled={!!history.currentId} {initNewChat} /> - {#if !history.currentId && !$chatId && selectedModels.length <= 1 && ($banners.length > 0 || ($config?.license_metadata?.type ?? null) === 'trial' || (($config?.license_metadata?.seats ?? null) !== null && $config?.user_count > $config?.license_metadata?.seats))} -
-
- {#if ($config?.license_metadata?.type ?? null) === 'trial'} - - {/if} - - {#if ($config?.license_metadata?.seats ?? null) !== null && $config?.user_count > $config?.license_metadata?.seats} - - {/if} - - {#each $banners.filter( (b) => (b.dismissible ? !JSON.parse(localStorage.getItem('dismissedBannerIds') ?? '[]').includes(b.id) : true) ) as banner} - { - const bannerId = e.detail; - - localStorage.setItem( - 'dismissedBannerIds', - JSON.stringify( - [ - bannerId, - ...JSON.parse(localStorage.getItem('dismissedBannerIds') ?? '[]') - ].filter((id) => $banners.find((b) => b.id === id)) - ) - ); - }} - /> - {/each} -
-
- {/if} -
{#if $settings?.landingPageMode === 'chat' || createMessagesList(history, history.currentId).length > 0}
-
+ + {#if !history.currentId && !$chatId && ($banners.length > 0 || ($config?.license_metadata?.type ?? null) === 'trial' || (($config?.license_metadata?.seats ?? null) !== null && $config?.user_count > $config?.license_metadata?.seats))} +
+
+ {#if ($config?.license_metadata?.type ?? null) === 'trial'} + + {/if} + + {#if ($config?.license_metadata?.seats ?? null) !== null && $config?.user_count > $config?.license_metadata?.seats} + + {/if} + + {#each $banners.filter( (b) => (b.dismissible ? !JSON.parse(localStorage.getItem('dismissedBannerIds') ?? '[]').includes(b.id) : true) ) as banner} + { + const bannerId = e.detail; + + localStorage.setItem( + 'dismissedBannerIds', + JSON.stringify( + [ + bannerId, + ...JSON.parse(localStorage.getItem('dismissedBannerIds') ?? '[]') + ].filter((id) => $banners.find((b) => b.id === id)) + ) + ); + }} + /> + {/each} +
+
+ {/if} From 5eac5960efe332e113359df9516c5fa7f1eb0da0 Mon Sep 17 00:00:00 2001 From: tth37 Date: Sat, 12 Apr 2025 17:13:30 +0800 Subject: [PATCH 07/66] feat: Add frontend configuration for web loader --- backend/open_webui/config.py | 36 +- backend/open_webui/routers/retrieval.py | 150 ++++--- .../admin/Settings/WebSearch.svelte | 370 ++++++++++++------ 3 files changed, 361 insertions(+), 195 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 9f5395154c..635151c7e5 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2087,18 +2087,6 @@ SERPLY_API_KEY = PersistentConfig( os.getenv("SERPLY_API_KEY", ""), ) -TAVILY_API_KEY = PersistentConfig( - "TAVILY_API_KEY", - "rag.web.search.tavily_api_key", - os.getenv("TAVILY_API_KEY", ""), -) - -TAVILY_EXTRACT_DEPTH = PersistentConfig( - "TAVILY_EXTRACT_DEPTH", - "rag.web.search.tavily_extract_depth", - os.getenv("TAVILY_EXTRACT_DEPTH", "basic"), -) - JINA_API_KEY = PersistentConfig( "JINA_API_KEY", "rag.web.search.jina_api_key", @@ -2193,28 +2181,40 @@ RAG_WEB_SEARCH_TRUST_ENV = PersistentConfig( PLAYWRIGHT_WS_URI = PersistentConfig( "PLAYWRIGHT_WS_URI", - "rag.web.loader.engine.playwright.ws.uri", - os.environ.get("PLAYWRIGHT_WS_URI", None), + "rag.web.loader.playwright_ws_uri", + os.environ.get("PLAYWRIGHT_WS_URI", ""), ) PLAYWRIGHT_TIMEOUT = PersistentConfig( "PLAYWRIGHT_TIMEOUT", - "rag.web.loader.engine.playwright.timeout", - int(os.environ.get("PLAYWRIGHT_TIMEOUT", "10")), + "rag.web.loader.playwright_timeout", + int(os.environ.get("PLAYWRIGHT_TIMEOUT", "10000")), ) FIRECRAWL_API_KEY = PersistentConfig( "FIRECRAWL_API_KEY", - "firecrawl.api_key", + "rag.web.loader.firecrawl_api_key", os.environ.get("FIRECRAWL_API_KEY", ""), ) FIRECRAWL_API_BASE_URL = PersistentConfig( "FIRECRAWL_API_BASE_URL", - "firecrawl.api_url", + "rag.web.loader.firecrawl_api_url", os.environ.get("FIRECRAWL_API_BASE_URL", "https://api.firecrawl.dev"), ) +TAVILY_API_KEY = PersistentConfig( + "TAVILY_API_KEY", + "rag.web.loader.tavily_api_key", + os.getenv("TAVILY_API_KEY", ""), +) + +TAVILY_EXTRACT_DEPTH = PersistentConfig( + "TAVILY_EXTRACT_DEPTH", + "rag.web.loader.tavily_extract_depth", + os.getenv("TAVILY_EXTRACT_DEPTH", "basic"), +) + #################################### # Images #################################### diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 8e1708c65b..d00e303f1b 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -378,18 +378,9 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "max_size": request.app.state.config.FILE_MAX_SIZE, "max_count": request.app.state.config.FILE_MAX_COUNT, }, - "youtube": { - "language": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, - "translation": request.app.state.YOUTUBE_LOADER_TRANSLATION, - "proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, - }, "web": { - "ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, - "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, + "ENABLE_RAG_WEB_SEARCH": request.app.state.config.ENABLE_RAG_WEB_SEARCH, "search": { - "enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH, - "drive": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION, - "onedrive": request.app.state.config.ENABLE_ONEDRIVE_INTEGRATION, "engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE, "searxng_query_url": request.app.state.config.SEARXNG_QUERY_URL, "google_pse_api_key": request.app.state.config.GOOGLE_PSE_API_KEY, @@ -415,10 +406,26 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "sougou_api_sid": request.app.state.config.SOUGOU_API_SID, "sougou_api_sk": request.app.state.config.SOUGOU_API_SK, "result_count": request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - "trust_env": request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, "concurrent_requests": request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, "domain_filter_list": request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, }, + "loader": { + "engine": request.app.state.config.RAG_WEB_LOADER_ENGINE, + "enable_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, + "trust_env": request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, + "bypass_embedding_and_retrieval": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, + "playwright_ws_uri": request.app.state.config.PLAYWRIGHT_WS_URI, + "playwright_timeout": request.app.state.config.PLAYWRIGHT_TIMEOUT, + "firecrawl_api_key": request.app.state.config.FIRECRAWL_API_KEY, + "firecrawl_api_base_url": request.app.state.config.FIRECRAWL_API_BASE_URL, + "tavily_api_key": request.app.state.config.TAVILY_API_KEY, + "tavily_extract_depth": request.app.state.config.TAVILY_EXTRACT_DEPTH, + "youtube": { + "language": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, + "proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, + "translation": request.app.state.YOUTUBE_LOADER_TRANSLATION, + }, + }, }, } @@ -458,7 +465,6 @@ class YoutubeLoaderConfig(BaseModel): class WebSearchConfig(BaseModel): - enabled: bool engine: Optional[str] = None searxng_query_url: Optional[str] = None google_pse_api_key: Optional[str] = None @@ -485,14 +491,27 @@ class WebSearchConfig(BaseModel): sougou_api_sk: Optional[str] = None result_count: Optional[int] = None concurrent_requests: Optional[int] = None - trust_env: Optional[bool] = None domain_filter_list: Optional[List[str]] = [] +class WebLoaderConfig(BaseModel): + engine: Optional[str] = None + enable_ssl_verification: Optional[bool] = None + trust_env: Optional[bool] = None + bypass_embedding_and_retrieval: Optional[bool] = None + playwright_ws_uri: Optional[str] = None + playwright_timeout: Optional[int] = None + firecrawl_api_key: Optional[str] = None + firecrawl_api_base_url: Optional[str] = None + tavily_api_key: Optional[str] = None + tavily_extract_depth: Optional[str] = None + youtube: Optional[YoutubeLoaderConfig] = None + + class WebConfig(BaseModel): + ENABLE_RAG_WEB_SEARCH: Optional[bool] = None search: WebSearchConfig - ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None - BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None + loader: WebLoaderConfig class ConfigUpdateForm(BaseModel): @@ -504,7 +523,6 @@ class ConfigUpdateForm(BaseModel): file: Optional[FileConfig] = None content_extraction: Optional[ContentExtractionConfig] = None chunk: Optional[ChunkParamUpdateForm] = None - youtube: Optional[YoutubeLoaderConfig] = None web: Optional[WebConfig] = None @@ -576,24 +594,12 @@ async def update_rag_config( request.app.state.config.CHUNK_SIZE = form_data.chunk.chunk_size request.app.state.config.CHUNK_OVERLAP = form_data.chunk.chunk_overlap - if form_data.youtube is not None: - request.app.state.config.YOUTUBE_LOADER_LANGUAGE = form_data.youtube.language - request.app.state.config.YOUTUBE_LOADER_PROXY_URL = form_data.youtube.proxy_url - request.app.state.YOUTUBE_LOADER_TRANSLATION = form_data.youtube.translation - if form_data.web is not None: - request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = ( - # Note: When UI "Bypass SSL verification for Websites"=True then ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION=False - form_data.web.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION + request.app.state.config.ENABLE_RAG_WEB_SEARCH = ( + form_data.web.ENABLE_RAG_WEB_SEARCH ) - request.app.state.config.ENABLE_RAG_WEB_SEARCH = form_data.web.search.enabled request.app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine - - request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( - form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL - ) - request.app.state.config.SEARXNG_QUERY_URL = ( form_data.web.search.searxng_query_url ) @@ -628,10 +634,8 @@ async def update_rag_config( request.app.state.config.SEARCHAPI_ENGINE = ( form_data.web.search.searchapi_engine ) - request.app.state.config.SERPAPI_API_KEY = form_data.web.search.serpapi_api_key request.app.state.config.SERPAPI_ENGINE = form_data.web.search.serpapi_engine - request.app.state.config.JINA_API_KEY = form_data.web.search.jina_api_key request.app.state.config.BING_SEARCH_V7_ENDPOINT = ( form_data.web.search.bing_search_v7_endpoint @@ -639,32 +643,59 @@ async def update_rag_config( request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = ( form_data.web.search.bing_search_v7_subscription_key ) - request.app.state.config.EXA_API_KEY = form_data.web.search.exa_api_key - request.app.state.config.PERPLEXITY_API_KEY = ( form_data.web.search.perplexity_api_key ) - request.app.state.config.SOUGOU_API_SID = ( - form_data.web.search.sougou_api_sid - ) - request.app.state.config.SOUGOU_API_SK = ( - form_data.web.search.sougou_api_sk - ) - + request.app.state.config.SOUGOU_API_SID = form_data.web.search.sougou_api_sid + request.app.state.config.SOUGOU_API_SK = form_data.web.search.sougou_api_sk request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = ( form_data.web.search.result_count ) request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = ( form_data.web.search.concurrent_requests ) - request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV = ( - form_data.web.search.trust_env - ) request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = ( form_data.web.search.domain_filter_list ) + request.app.state.config.RAG_WEB_LOADER_ENGINE = form_data.web.loader.engine + request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = ( + # Note: When UI "Bypass SSL verification for Websites"=True then ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION=False + form_data.web.loader.enable_ssl_verification + ) + request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV = ( + form_data.web.loader.trust_env + ) + request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( + form_data.web.loader.bypass_embedding_and_retrieval + ) + request.app.state.config.PLAYWRIGHT_WS_URI = ( + form_data.web.loader.playwright_ws_uri + ) + request.app.state.config.PLAYWRIGHT_TIMEOUT = ( + form_data.web.loader.playwright_timeout + ) + request.app.state.config.FIRECRAWL_API_KEY = ( + form_data.web.loader.firecrawl_api_key + ) + request.app.state.config.FIRECRAWL_API_BASE_URL = ( + form_data.web.loader.firecrawl_api_base_url + ) + request.app.state.config.TAVILY_API_KEY = form_data.web.loader.tavily_api_key + request.app.state.config.TAVILY_EXTRACT_DEPTH = ( + form_data.web.loader.tavily_extract_depth + ) + request.app.state.config.YOUTUBE_LOADER_LANGUAGE = ( + form_data.web.loader.youtube.language + ) + request.app.state.config.YOUTUBE_LOADER_PROXY_URL = ( + form_data.web.loader.youtube.proxy_url + ) + request.app.state.YOUTUBE_LOADER_TRANSLATION = ( + form_data.web.loader.youtube.translation + ) + return { "status": True, "pdf_extract_images": request.app.state.config.PDF_EXTRACT_IMAGES, @@ -691,16 +722,9 @@ async def update_rag_config( "chunk_size": request.app.state.config.CHUNK_SIZE, "chunk_overlap": request.app.state.config.CHUNK_OVERLAP, }, - "youtube": { - "language": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, - "proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, - "translation": request.app.state.YOUTUBE_LOADER_TRANSLATION, - }, "web": { - "ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, - "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, + "ENABLE_RAG_WEB_SEARCH": request.app.state.config.ENABLE_RAG_WEB_SEARCH, "search": { - "enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH, "engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE, "searxng_query_url": request.app.state.config.SEARXNG_QUERY_URL, "google_pse_api_key": request.app.state.config.GOOGLE_PSE_API_KEY, @@ -713,11 +737,11 @@ async def update_rag_config( "serpstack_https": request.app.state.config.SERPSTACK_HTTPS, "serper_api_key": request.app.state.config.SERPER_API_KEY, "serply_api_key": request.app.state.config.SERPLY_API_KEY, - "serachapi_api_key": request.app.state.config.SEARCHAPI_API_KEY, + "tavily_api_key": request.app.state.config.TAVILY_API_KEY, + "searchapi_api_key": request.app.state.config.SEARCHAPI_API_KEY, "searchapi_engine": request.app.state.config.SEARCHAPI_ENGINE, "serpapi_api_key": request.app.state.config.SERPAPI_API_KEY, "serpapi_engine": request.app.state.config.SERPAPI_ENGINE, - "tavily_api_key": request.app.state.config.TAVILY_API_KEY, "jina_api_key": request.app.state.config.JINA_API_KEY, "bing_search_v7_endpoint": request.app.state.config.BING_SEARCH_V7_ENDPOINT, "bing_search_v7_subscription_key": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, @@ -727,9 +751,25 @@ async def update_rag_config( "sougou_api_sk": request.app.state.config.SOUGOU_API_SK, "result_count": request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, "concurrent_requests": request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, - "trust_env": request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, "domain_filter_list": request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, }, + "loader": { + "engine": request.app.state.config.RAG_WEB_LOADER_ENGINE, + "enable_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, + "trust_env": request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, + "bypass_embedding_and_retrieval": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, + "playwright_ws_uri": request.app.state.config.PLAYWRIGHT_WS_URI, + "playwright_timeout": request.app.state.config.PLAYWRIGHT_TIMEOUT, + "firecrawl_api_key": request.app.state.config.FIRECRAWL_API_KEY, + "firecrawl_api_base_url": request.app.state.config.FIRECRAWL_API_BASE_URL, + "tavily_api_key": request.app.state.config.TAVILY_API_KEY, + "tavily_extract_depth": request.app.state.config.TAVILY_EXTRACT_DEPTH, + "youtube": { + "language": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, + "proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, + "translation": request.app.state.YOUTUBE_LOADER_TRANSLATION, + }, + }, }, } diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index 82e61bdc37..32dfe02fac 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -13,6 +13,11 @@ export let saveHandler: Function; let webConfig = null; + + let bypass_ssl_verification = null; + let tavily_api_key = null; + let youtube_language = null; + let webSearchEngines = [ 'searxng', 'google_pse', @@ -33,10 +38,7 @@ 'perplexity', 'sougou' ]; - - let youtubeLanguage = 'en'; - let youtubeTranslation = null; - let youtubeProxyUrl = ''; + let webLoaderEngines = ['safe_web', 'playwright', 'firecrawl', 'tavily']; const submitHandler = async () => { // Convert domain filter string to array before sending @@ -49,16 +51,20 @@ webConfig.search.domain_filter_list = []; } + // Set the enable_ssl_verification flag based on the switch state + webConfig.loader.enable_ssl_verification = !bypass_ssl_verification; + + // Set shared tavily_api_key + webConfig.search.tavily_api_key = tavily_api_key; + webConfig.loader.tavily_api_key = tavily_api_key; + webConfig.loader.youtube.language = youtube_language.split(',').map((lang) => lang.trim()); + const res = await updateRAGConfig(localStorage.token, { - web: webConfig, - youtube: { - language: youtubeLanguage.split(',').map((lang) => lang.trim()), - translation: youtubeTranslation, - proxy_url: youtubeProxyUrl - } + web: webConfig }); webConfig.search.domain_filter_list = webConfig.search.domain_filter_list.join(', '); + youtube_language = webConfig.loader.youtube.language.join(', '); }; onMount(async () => { @@ -70,10 +76,9 @@ if (webConfig?.search?.domain_filter_list) { webConfig.search.domain_filter_list = webConfig.search.domain_filter_list.join(', '); } - - youtubeLanguage = res.youtube.language.join(','); - youtubeTranslation = res.youtube.translation; - youtubeProxyUrl = res.youtube.proxy_url; + bypass_ssl_verification = !webConfig.loader.enable_ssl_verification; + tavily_api_key = webConfig.search.tavily_api_key || webConfig.loader.tavily_api_key; + youtube_language = webConfig.loader.youtube.language.join(', '); } }); @@ -95,10 +100,10 @@
- {$i18n.t('Web Search')} + {$i18n.t('Enable Web Search')}
- +
@@ -197,7 +202,6 @@ bind:value={webConfig.search.kagi_search_api_key} /> - . {:else if webConfig.search.engine === 'mojeek'}
@@ -333,7 +337,7 @@
@@ -405,135 +409,208 @@ /> - {:else if webConfig.search.engine === 'sougou'} -
-
-
- {$i18n.t('Sougou Search API sID')} -
- - + {:else if webConfig.search.engine === 'sougou'} +
+
+
+ {$i18n.t('Sougou Search API sID')}
+ +
-
-
-
- {$i18n.t('Sougou Search API SK')} -
- - +
+
+
+
+ {$i18n.t('Sougou Search API SK')}
+ +
+
{/if} {/if} - {#if webConfig.search.enabled} -
-
-
-
- {$i18n.t('Search Result Count')} -
- - +
+
+
+
+ {$i18n.t('Search Result Count')}
-
-
- {$i18n.t('Concurrent Requests')} -
+ +
- +
+
+ {$i18n.t('Concurrent Requests')}
+ +
- -
-
- {$i18n.t('Domain Filter List')} -
- - -
- {/if} - -
-
- - {$i18n.t('Bypass Embedding and Retrieval')} - -
-
- - - -
-
-
- {$i18n.t('Trust Proxy Environment')} +
+
+ {$i18n.t('Domain Filter List')}
-
- - - -
-
-
-
+ +
+
{$i18n.t('Loader')}

- {$i18n.t('Bypass SSL verification for Websites')} + {$i18n.t('Web Loader Engine')}
- +
+ {#if webConfig.loader.engine !== ''} + {#if webConfig.loader.engine === 'playwright'} +
+
+
+ {$i18n.t('Playwright WebSocket URL')} +
+ +
+
+ +
+
+
+ +
+
+ {$i18n.t('Playwright Timeout (ms)')} +
+ +
+
+ +
+
+
+
+ {:else if webConfig.loader.engine === 'firecrawl'} +
+
+
+ {$i18n.t('Firecrawl API Base URL')} +
+ +
+
+ +
+
+
+ +
+
+ {$i18n.t('Firecrawl API Key')} +
+ + +
+
+ {:else if webConfig.loader.engine === 'tavily'} +
+
+
+ {$i18n.t('Tavily Extract Depth')} +
+ +
+
+ +
+
+
+ + {#if webConfig.search.engine !== 'tavily'} +
+
+ {$i18n.t('Tavily API Key')} +
+ + +
+ {/if} +
+ {/if} + {/if} + +
+
{$i18n.t('Youtube Language')} @@ -543,7 +620,7 @@ class="flex-1 w-full rounded-lg text-sm bg-transparent outline-hidden" type="text" placeholder={$i18n.t('Enter language codes')} - bind:value={youtubeLanguage} + bind:value={youtube_language} autocomplete="off" />
@@ -555,14 +632,63 @@
+ +
+ +
+
+ {$i18n.t('Bypass SSL verification for Websites')} +
+
+ +
+
+ +
+
+ {$i18n.t('Trust Proxy Environment')} +
+
+ + + +
+
+ +
+
+ + {$i18n.t('Bypass Embedding and Retrieval')} + +
+
+ + + +
+
{/if} From c6755f9151fe10295e25bab3212f4756ac675458 Mon Sep 17 00:00:00 2001 From: Jan Kessler Date: Sat, 12 Apr 2025 18:48:07 +0200 Subject: [PATCH 08/66] bump python-socketio to 5.13.0 (to support Redis Sentinel natively) --- backend/requirements.txt | 2 +- pyproject.toml | 2 +- uv.lock | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index dd7c859329..02e0babae3 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -3,7 +3,7 @@ uvicorn[standard]==0.34.0 pydantic==2.10.6 python-multipart==0.0.20 -python-socketio==5.11.3 +python-socketio==5.13.0 python-jose==3.4.0 passlib[bcrypt]==1.7.4 diff --git a/pyproject.toml b/pyproject.toml index 2e8537a770..5be1baf649 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ dependencies = [ "pydantic==2.10.6", "python-multipart==0.0.18", - "python-socketio==5.11.3", + "python-socketio==5.13.0", "python-jose==3.4.0", "passlib[bcrypt]==1.7.4", diff --git a/uv.lock b/uv.lock index ab2e57ae44..5578499595 100644 --- a/uv.lock +++ b/uv.lock @@ -2966,7 +2966,7 @@ requires-dist = [ { name = "python-jose", specifier = "==3.3.0" }, { name = "python-multipart", specifier = "==0.0.18" }, { name = "python-pptx", specifier = "==1.0.0" }, - { name = "python-socketio", specifier = "==5.11.3" }, + { name = "python-socketio", specifier = "==5.13.0" }, { name = "pytube", specifier = "==15.0.0" }, { name = "pyxlsb", specifier = "==1.0.10" }, { name = "qdrant-client", specifier = "~=1.12.0" }, @@ -4073,15 +4073,15 @@ wheels = [ [[package]] name = "python-socketio" -version = "5.11.3" +version = "5.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "bidict" }, { name = "python-engineio" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1e/74/b1e8787cea757e1f533a7878e94f929679ef7e07a2aaf44de6b71065b1f2/python_socketio-5.11.3.tar.gz", hash = "sha256:194af8cdbb7b0768c2e807ba76c7abc288eb5bb85559b7cddee51a6bc7a65737", size = 117702 } +sdist = { url = "https://files.pythonhosted.org/packages/21/1a/396d50ccf06ee539fa758ce5623b59a9cb27637fc4b2dc07ed08bf495e77/python_socketio-5.13.0.tar.gz", hash = "sha256:ac4e19a0302ae812e23b712ec8b6427ca0521f7c582d6abb096e36e24a263029", size = 121125 } wheels = [ - { url = "https://files.pythonhosted.org/packages/e9/59/5ee858d5736594d75385b9a8c0f65af6eca5da2b359ed3fb6a7486526399/python_socketio-5.11.3-py3-none-any.whl", hash = "sha256:2a923a831ff70664b7c502df093c423eb6aa93c1ce68b8319e840227a26d8b69", size = 76180 }, + { url = "https://files.pythonhosted.org/packages/3c/32/b4fb8585d1be0f68bde7e110dffbcf354915f77ad8c778563f0ad9655c02/python_socketio-5.13.0-py3-none-any.whl", hash = "sha256:51f68d6499f2df8524668c24bcec13ba1414117cfb3a90115c559b601ab10caf", size = 77800 }, ] [[package]] From bdef1001ac0ff4003dc7e1fdfce4ccf4b16a80ab Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sat, 12 Apr 2025 15:10:43 -0700 Subject: [PATCH 09/66] refac --- backend/open_webui/models/memories.py | 5 +++-- backend/open_webui/routers/memories.py | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/models/memories.py b/backend/open_webui/models/memories.py index c8dae97267..8b10a77cf9 100644 --- a/backend/open_webui/models/memories.py +++ b/backend/open_webui/models/memories.py @@ -63,14 +63,15 @@ class MemoriesTable: else: return None - def update_memory_by_id( + def update_memory_by_id_and_user_id( self, id: str, + user_id: str, content: str, ) -> Optional[MemoryModel]: with get_db() as db: try: - db.query(Memory).filter_by(id=id).update( + db.query(Memory).filter_by(id=id, user_id=user_id).update( {"content": content, "updated_at": int(time.time())} ) db.commit() diff --git a/backend/open_webui/routers/memories.py b/backend/open_webui/routers/memories.py index e660ef852b..6d54c9c170 100644 --- a/backend/open_webui/routers/memories.py +++ b/backend/open_webui/routers/memories.py @@ -153,7 +153,9 @@ async def update_memory_by_id( form_data: MemoryUpdateModel, user=Depends(get_verified_user), ): - memory = Memories.update_memory_by_id(memory_id, form_data.content) + memory = Memories.update_memory_by_id_and_user_id( + memory_id, user.id, form_data.content + ) if memory is None: raise HTTPException(status_code=404, detail="Memory not found") From c3497da5dd5a61f0e75e2a0bfdcc70e9e0cdaa8a Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sat, 12 Apr 2025 15:11:03 -0700 Subject: [PATCH 10/66] enh: only copy text message content --- src/lib/components/chat/Messages/ResponseMessage.svelte | 5 ++++- src/lib/utils/index.ts | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index f3c5c306e7..5d6a1c3a8a 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -24,7 +24,9 @@ getMessageContentParts, sanitizeResponseContent, createMessagesList, - formatDate + formatDate, + removeDetails, + removeAllDetails } from '$lib/utils'; import { WEBUI_BASE_URL } from '$lib/constants'; @@ -152,6 +154,7 @@ let showRateComment = false; const copyToClipboard = async (text) => { + text = removeAllDetails(text); const res = await _copyToClipboard(text); if (res) { toast.success($i18n.t('Copying to clipboard was successful!')); diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index bcf39f76dc..ffcd6e27a7 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -683,6 +683,11 @@ export const removeDetails = (content, types) => { return content; }; +export const removeAllDetails = (content) => { + content = content.replace(/]*>.*?<\/details>/gis, ''); + return content; +}; + export const processDetails = (content) => { content = removeDetails(content, ['reasoning', 'code_interpreter']); From 48a23ce3fe06ca62faa7c1a19c95229126ad2b91 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Sat, 12 Apr 2025 16:33:36 -0700 Subject: [PATCH 11/66] refac: web/rag config --- backend/open_webui/config.py | 120 +- backend/open_webui/main.py | 41 +- backend/open_webui/retrieval/web/utils.py | 48 +- backend/open_webui/routers/retrieval.py | 873 ++++++------ backend/start.sh | 4 +- backend/start_windows.bat | 4 +- docker-compose.playwright.yaml | 4 +- src/lib/apis/retrieval/index.ts | 33 +- .../admin/Settings/Documents.svelte | 1206 ++++++++--------- .../admin/Settings/WebSearch.svelte | 540 ++++---- src/lib/utils/rag/index.ts | 24 - 11 files changed, 1367 insertions(+), 1530 deletions(-) delete mode 100644 src/lib/utils/rag/index.ts diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 635151c7e5..e13ace6681 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -201,7 +201,10 @@ def save_config(config): T = TypeVar("T") -ENABLE_PERSISTENT_CONFIG = os.environ.get("ENABLE_PERSISTENT_CONFIG", "True").lower() == "true" +ENABLE_PERSISTENT_CONFIG = ( + os.environ.get("ENABLE_PERSISTENT_CONFIG", "True").lower() == "true" +) + class PersistentConfig(Generic[T]): def __init__(self, env_name: str, config_path: str, env_value: T): @@ -612,10 +615,16 @@ def load_oauth_providers(): "scope": OAUTH_SCOPES.value, } - if OAUTH_CODE_CHALLENGE_METHOD.value and OAUTH_CODE_CHALLENGE_METHOD.value == "S256": + if ( + OAUTH_CODE_CHALLENGE_METHOD.value + and OAUTH_CODE_CHALLENGE_METHOD.value == "S256" + ): client_kwargs["code_challenge_method"] = "S256" elif OAUTH_CODE_CHALLENGE_METHOD.value: - raise Exception('Code challenge methods other than "%s" not supported. Given: "%s"' % ("S256", OAUTH_CODE_CHALLENGE_METHOD.value)) + raise Exception( + 'Code challenge methods other than "%s" not supported. Given: "%s"' + % ("S256", OAUTH_CODE_CHALLENGE_METHOD.value) + ) client.register( name="oidc", @@ -1820,12 +1829,6 @@ RAG_FILE_MAX_SIZE = PersistentConfig( ), ) -ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = PersistentConfig( - "ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION", - "rag.enable_web_loader_ssl_verification", - os.environ.get("ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION", "True").lower() == "true", -) - RAG_EMBEDDING_ENGINE = PersistentConfig( "RAG_EMBEDDING_ENGINE", "rag.embedding_engine", @@ -1990,16 +1993,20 @@ YOUTUBE_LOADER_PROXY_URL = PersistentConfig( ) -ENABLE_RAG_WEB_SEARCH = PersistentConfig( - "ENABLE_RAG_WEB_SEARCH", +#################################### +# Web Search (RAG) +#################################### + +ENABLE_WEB_SEARCH = PersistentConfig( + "ENABLE_WEB_SEARCH", "rag.web.search.enable", - os.getenv("ENABLE_RAG_WEB_SEARCH", "False").lower() == "true", + os.getenv("ENABLE_WEB_SEARCH", "False").lower() == "true", ) -RAG_WEB_SEARCH_ENGINE = PersistentConfig( - "RAG_WEB_SEARCH_ENGINE", +WEB_SEARCH_ENGINE = PersistentConfig( + "WEB_SEARCH_ENGINE", "rag.web.search.engine", - os.getenv("RAG_WEB_SEARCH_ENGINE", ""), + os.getenv("WEB_SEARCH_ENGINE", ""), ) BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig( @@ -2008,10 +2015,18 @@ BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = PersistentConfig( os.getenv("BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL", "False").lower() == "true", ) + +WEB_SEARCH_RESULT_COUNT = PersistentConfig( + "WEB_SEARCH_RESULT_COUNT", + "rag.web.search.result_count", + int(os.getenv("WEB_SEARCH_RESULT_COUNT", "3")), +) + + # You can provide a list of your own websites to filter after performing a web search. # This ensures the highest level of safety and reliability of the information sources. -RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig( - "RAG_WEB_SEARCH_DOMAIN_FILTER_LIST", +WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig( + "WEB_SEARCH_DOMAIN_FILTER_LIST", "rag.web.search.domain.filter_list", [ # "wikipedia.com", @@ -2020,6 +2035,30 @@ RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig( ], ) +WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig( + "WEB_SEARCH_CONCURRENT_REQUESTS", + "rag.web.search.concurrent_requests", + int(os.getenv("WEB_SEARCH_CONCURRENT_REQUESTS", "10")), +) + +WEB_LOADER_ENGINE = PersistentConfig( + "WEB_LOADER_ENGINE", + "rag.web.loader.engine", + os.environ.get("WEB_LOADER_ENGINE", ""), +) + +ENABLE_WEB_LOADER_SSL_VERIFICATION = PersistentConfig( + "ENABLE_WEB_LOADER_SSL_VERIFICATION", + "rag.web.loader.ssl_verification", + os.environ.get("ENABLE_WEB_LOADER_SSL_VERIFICATION", "True").lower() == "true", +) + +WEB_SEARCH_TRUST_ENV = PersistentConfig( + "WEB_SEARCH_TRUST_ENV", + "rag.web.search.trust_env", + os.getenv("WEB_SEARCH_TRUST_ENV", "False").lower() == "true", +) + SEARXNG_QUERY_URL = PersistentConfig( "SEARXNG_QUERY_URL", @@ -2155,34 +2194,22 @@ SOUGOU_API_SK = PersistentConfig( os.getenv("SOUGOU_API_SK", ""), ) -RAG_WEB_SEARCH_RESULT_COUNT = PersistentConfig( - "RAG_WEB_SEARCH_RESULT_COUNT", - "rag.web.search.result_count", - int(os.getenv("RAG_WEB_SEARCH_RESULT_COUNT", "3")), +TAVILY_API_KEY = PersistentConfig( + "TAVILY_API_KEY", + "rag.web.search.tavily_api_key", + os.getenv("TAVILY_API_KEY", ""), ) -RAG_WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig( - "RAG_WEB_SEARCH_CONCURRENT_REQUESTS", - "rag.web.search.concurrent_requests", - int(os.getenv("RAG_WEB_SEARCH_CONCURRENT_REQUESTS", "10")), +TAVILY_EXTRACT_DEPTH = PersistentConfig( + "TAVILY_EXTRACT_DEPTH", + "rag.web.search.tavily_extract_depth", + os.getenv("TAVILY_EXTRACT_DEPTH", "basic"), ) -RAG_WEB_LOADER_ENGINE = PersistentConfig( - "RAG_WEB_LOADER_ENGINE", - "rag.web.loader.engine", - os.environ.get("RAG_WEB_LOADER_ENGINE", "safe_web"), -) - -RAG_WEB_SEARCH_TRUST_ENV = PersistentConfig( - "RAG_WEB_SEARCH_TRUST_ENV", - "rag.web.search.trust_env", - os.getenv("RAG_WEB_SEARCH_TRUST_ENV", "False").lower() == "true", -) - -PLAYWRIGHT_WS_URI = PersistentConfig( - "PLAYWRIGHT_WS_URI", - "rag.web.loader.playwright_ws_uri", - os.environ.get("PLAYWRIGHT_WS_URI", ""), +PLAYWRIGHT_WS_URL = PersistentConfig( + "PLAYWRIGHT_WS_URL", + "rag.web.loader.PLAYWRIGHT_WS_URL", + os.environ.get("PLAYWRIGHT_WS_URL", ""), ) PLAYWRIGHT_TIMEOUT = PersistentConfig( @@ -2203,17 +2230,6 @@ FIRECRAWL_API_BASE_URL = PersistentConfig( os.environ.get("FIRECRAWL_API_BASE_URL", "https://api.firecrawl.dev"), ) -TAVILY_API_KEY = PersistentConfig( - "TAVILY_API_KEY", - "rag.web.loader.tavily_api_key", - os.getenv("TAVILY_API_KEY", ""), -) - -TAVILY_EXTRACT_DEPTH = PersistentConfig( - "TAVILY_EXTRACT_DEPTH", - "rag.web.loader.tavily_extract_depth", - os.getenv("TAVILY_EXTRACT_DEPTH", "basic"), -) #################################### # Images diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 8095affac3..e95de90a54 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -160,11 +160,11 @@ from open_webui.config import ( AUDIO_TTS_VOICE, AUDIO_TTS_AZURE_SPEECH_REGION, AUDIO_TTS_AZURE_SPEECH_OUTPUT_FORMAT, - PLAYWRIGHT_WS_URI, + PLAYWRIGHT_WS_URL, PLAYWRIGHT_TIMEOUT, FIRECRAWL_API_BASE_URL, FIRECRAWL_API_KEY, - RAG_WEB_LOADER_ENGINE, + WEB_LOADER_ENGINE, WHISPER_MODEL, DEEPGRAM_API_KEY, WHISPER_MODEL_AUTO_UPDATE, @@ -205,12 +205,13 @@ from open_webui.config import ( YOUTUBE_LOADER_LANGUAGE, YOUTUBE_LOADER_PROXY_URL, # Retrieval (Web Search) - RAG_WEB_SEARCH_ENGINE, + ENABLE_WEB_SEARCH, + WEB_SEARCH_ENGINE, BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, - RAG_WEB_SEARCH_RESULT_COUNT, - RAG_WEB_SEARCH_CONCURRENT_REQUESTS, - RAG_WEB_SEARCH_TRUST_ENV, - RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + WEB_SEARCH_RESULT_COUNT, + WEB_SEARCH_CONCURRENT_REQUESTS, + WEB_SEARCH_TRUST_ENV, + WEB_SEARCH_DOMAIN_FILTER_LIST, JINA_API_KEY, SEARCHAPI_API_KEY, SEARCHAPI_ENGINE, @@ -240,8 +241,7 @@ from open_webui.config import ( ONEDRIVE_CLIENT_ID, ENABLE_RAG_HYBRID_SEARCH, ENABLE_RAG_LOCAL_WEB_FETCH, - ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, - ENABLE_RAG_WEB_SEARCH, + ENABLE_WEB_LOADER_SSL_VERIFICATION, ENABLE_GOOGLE_DRIVE_INTEGRATION, ENABLE_ONEDRIVE_INTEGRATION, UPLOAD_DIR, @@ -594,9 +594,7 @@ app.state.config.FILE_MAX_COUNT = RAG_FILE_MAX_COUNT app.state.config.RAG_FULL_CONTEXT = RAG_FULL_CONTEXT app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL = BYPASS_EMBEDDING_AND_RETRIEVAL app.state.config.ENABLE_RAG_HYBRID_SEARCH = ENABLE_RAG_HYBRID_SEARCH -app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = ( - ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION -) +app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION = ENABLE_WEB_LOADER_SSL_VERIFICATION app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL @@ -629,12 +627,16 @@ app.state.config.YOUTUBE_LOADER_LANGUAGE = YOUTUBE_LOADER_LANGUAGE app.state.config.YOUTUBE_LOADER_PROXY_URL = YOUTUBE_LOADER_PROXY_URL -app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH -app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE +app.state.config.ENABLE_WEB_SEARCH = ENABLE_WEB_SEARCH +app.state.config.WEB_SEARCH_ENGINE = WEB_SEARCH_ENGINE +app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = WEB_SEARCH_DOMAIN_FILTER_LIST +app.state.config.WEB_SEARCH_RESULT_COUNT = WEB_SEARCH_RESULT_COUNT +app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = WEB_SEARCH_CONCURRENT_REQUESTS +app.state.config.WEB_LOADER_ENGINE = WEB_LOADER_ENGINE +app.state.config.WEB_SEARCH_TRUST_ENV = WEB_SEARCH_TRUST_ENV app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL ) -app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ENABLE_GOOGLE_DRIVE_INTEGRATION app.state.config.ENABLE_ONEDRIVE_INTEGRATION = ENABLE_ONEDRIVE_INTEGRATION @@ -662,11 +664,8 @@ app.state.config.PERPLEXITY_API_KEY = PERPLEXITY_API_KEY app.state.config.SOUGOU_API_SID = SOUGOU_API_SID app.state.config.SOUGOU_API_SK = SOUGOU_API_SK -app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT -app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS -app.state.config.RAG_WEB_LOADER_ENGINE = RAG_WEB_LOADER_ENGINE -app.state.config.RAG_WEB_SEARCH_TRUST_ENV = RAG_WEB_SEARCH_TRUST_ENV -app.state.config.PLAYWRIGHT_WS_URI = PLAYWRIGHT_WS_URI + +app.state.config.PLAYWRIGHT_WS_URL = PLAYWRIGHT_WS_URL app.state.config.PLAYWRIGHT_TIMEOUT = PLAYWRIGHT_TIMEOUT app.state.config.FIRECRAWL_API_BASE_URL = FIRECRAWL_API_BASE_URL app.state.config.FIRECRAWL_API_KEY = FIRECRAWL_API_KEY @@ -1261,7 +1260,7 @@ async def get_app_config(request: Request): { "enable_direct_connections": app.state.config.ENABLE_DIRECT_CONNECTIONS, "enable_channels": app.state.config.ENABLE_CHANNELS, - "enable_web_search": app.state.config.ENABLE_RAG_WEB_SEARCH, + "enable_web_search": app.state.config.ENABLE_WEB_SEARCH, "enable_code_execution": app.state.config.ENABLE_CODE_EXECUTION, "enable_code_interpreter": app.state.config.ENABLE_CODE_INTERPRETER, "enable_image_generation": app.state.config.ENABLE_IMAGE_GENERATION, diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 942cb8483f..718cfe52fa 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -28,9 +28,9 @@ from open_webui.retrieval.loaders.tavily import TavilyLoader from open_webui.constants import ERROR_MESSAGES from open_webui.config import ( ENABLE_RAG_LOCAL_WEB_FETCH, - PLAYWRIGHT_WS_URI, + PLAYWRIGHT_WS_URL, PLAYWRIGHT_TIMEOUT, - RAG_WEB_LOADER_ENGINE, + WEB_LOADER_ENGINE, FIRECRAWL_API_BASE_URL, FIRECRAWL_API_KEY, TAVILY_API_KEY, @@ -584,13 +584,6 @@ class SafeWebBaseLoader(WebBaseLoader): return [document async for document in self.alazy_load()] -RAG_WEB_LOADER_ENGINES = defaultdict(lambda: SafeWebBaseLoader) -RAG_WEB_LOADER_ENGINES["playwright"] = SafePlaywrightURLLoader -RAG_WEB_LOADER_ENGINES["safe_web"] = SafeWebBaseLoader -RAG_WEB_LOADER_ENGINES["firecrawl"] = SafeFireCrawlLoader -RAG_WEB_LOADER_ENGINES["tavily"] = SafeTavilyLoader - - def get_web_loader( urls: Union[str, Sequence[str]], verify_ssl: bool = True, @@ -608,27 +601,36 @@ def get_web_loader( "trust_env": trust_env, } - if RAG_WEB_LOADER_ENGINE.value == "playwright": + if WEB_LOADER_ENGINE.value == "" or WEB_LOADER_ENGINE.value == "safe_web": + WebLoaderClass = SafeWebBaseLoader + if WEB_LOADER_ENGINE.value == "playwright": + WebLoaderClass = SafePlaywrightURLLoader web_loader_args["playwright_timeout"] = PLAYWRIGHT_TIMEOUT.value * 1000 - if PLAYWRIGHT_WS_URI.value: - web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URI.value + if PLAYWRIGHT_WS_URL.value: + web_loader_args["playwright_ws_url"] = PLAYWRIGHT_WS_URL.value - if RAG_WEB_LOADER_ENGINE.value == "firecrawl": + if WEB_LOADER_ENGINE.value == "firecrawl": + WebLoaderClass = SafeFireCrawlLoader web_loader_args["api_key"] = FIRECRAWL_API_KEY.value web_loader_args["api_url"] = FIRECRAWL_API_BASE_URL.value - if RAG_WEB_LOADER_ENGINE.value == "tavily": + if WEB_LOADER_ENGINE.value == "tavily": + WebLoaderClass = SafeTavilyLoader web_loader_args["api_key"] = TAVILY_API_KEY.value web_loader_args["extract_depth"] = TAVILY_EXTRACT_DEPTH.value - # Create the appropriate WebLoader based on the configuration - WebLoaderClass = RAG_WEB_LOADER_ENGINES[RAG_WEB_LOADER_ENGINE.value] - web_loader = WebLoaderClass(**web_loader_args) + if WebLoaderClass: + web_loader = WebLoaderClass(**web_loader_args) - log.debug( - "Using RAG_WEB_LOADER_ENGINE %s for %s URLs", - web_loader.__class__.__name__, - len(safe_urls), - ) + log.debug( + "Using WEB_LOADER_ENGINE %s for %s URLs", + web_loader.__class__.__name__, + len(safe_urls), + ) - return web_loader + return web_loader + else: + raise ValueError( + f"Invalid WEB_LOADER_ENGINE: {WEB_LOADER_ENGINE.value}. " + "Please set it to 'safe_web', 'playwright', 'firecrawl', or 'tavily'." + ) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index d00e303f1b..accb21d32e 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -352,482 +352,432 @@ async def update_reranking_config( async def get_rag_config(request: Request, user=Depends(get_admin_user)): return { "status": True, - "pdf_extract_images": request.app.state.config.PDF_EXTRACT_IMAGES, - "RAG_FULL_CONTEXT": request.app.state.config.RAG_FULL_CONTEXT, + # RAG settings + "TEMPLATE": request.app.state.config.RAG_TEMPLATE, + "TOP_K": request.app.state.config.TOP_K, "BYPASS_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL, - "enable_google_drive_integration": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION, - "enable_onedrive_integration": request.app.state.config.ENABLE_ONEDRIVE_INTEGRATION, - "content_extraction": { - "engine": request.app.state.config.CONTENT_EXTRACTION_ENGINE, - "tika_server_url": request.app.state.config.TIKA_SERVER_URL, - "docling_server_url": request.app.state.config.DOCLING_SERVER_URL, - "document_intelligence_config": { - "endpoint": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, - "key": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, - }, - "mistral_ocr_config": { - "api_key": request.app.state.config.MISTRAL_OCR_API_KEY, - }, - }, - "chunk": { - "text_splitter": request.app.state.config.TEXT_SPLITTER, - "chunk_size": request.app.state.config.CHUNK_SIZE, - "chunk_overlap": request.app.state.config.CHUNK_OVERLAP, - }, - "file": { - "max_size": request.app.state.config.FILE_MAX_SIZE, - "max_count": request.app.state.config.FILE_MAX_COUNT, - }, + "RAG_FULL_CONTEXT": request.app.state.config.RAG_FULL_CONTEXT, + # Hybrid search settings + "ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, + "TOP_K_RERANKER": request.app.state.config.TOP_K_RERANKER, + "RELEVANCE_THRESHOLD": request.app.state.config.RELEVANCE_THRESHOLD, + # Content extraction settings + "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, + "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, + "TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL, + "DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL, + "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, + "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, + # Chunking settings + "TEXT_SPLITTER": request.app.state.config.TEXT_SPLITTER, + "CHUNK_SIZE": request.app.state.config.CHUNK_SIZE, + "CHUNK_OVERLAP": request.app.state.config.CHUNK_OVERLAP, + # File upload settings + "FILE_MAX_SIZE": request.app.state.config.FILE_MAX_SIZE, + "FILE_MAX_COUNT": request.app.state.config.FILE_MAX_COUNT, + # Integration settings + "ENABLE_GOOGLE_DRIVE_INTEGRATION": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION, + "ENABLE_ONEDRIVE_INTEGRATION": request.app.state.config.ENABLE_ONEDRIVE_INTEGRATION, + # Web search settings "web": { - "ENABLE_RAG_WEB_SEARCH": request.app.state.config.ENABLE_RAG_WEB_SEARCH, - "search": { - "engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE, - "searxng_query_url": request.app.state.config.SEARXNG_QUERY_URL, - "google_pse_api_key": request.app.state.config.GOOGLE_PSE_API_KEY, - "google_pse_engine_id": request.app.state.config.GOOGLE_PSE_ENGINE_ID, - "brave_search_api_key": request.app.state.config.BRAVE_SEARCH_API_KEY, - "kagi_search_api_key": request.app.state.config.KAGI_SEARCH_API_KEY, - "mojeek_search_api_key": request.app.state.config.MOJEEK_SEARCH_API_KEY, - "bocha_search_api_key": request.app.state.config.BOCHA_SEARCH_API_KEY, - "serpstack_api_key": request.app.state.config.SERPSTACK_API_KEY, - "serpstack_https": request.app.state.config.SERPSTACK_HTTPS, - "serper_api_key": request.app.state.config.SERPER_API_KEY, - "serply_api_key": request.app.state.config.SERPLY_API_KEY, - "tavily_api_key": request.app.state.config.TAVILY_API_KEY, - "searchapi_api_key": request.app.state.config.SEARCHAPI_API_KEY, - "searchapi_engine": request.app.state.config.SEARCHAPI_ENGINE, - "serpapi_api_key": request.app.state.config.SERPAPI_API_KEY, - "serpapi_engine": request.app.state.config.SERPAPI_ENGINE, - "jina_api_key": request.app.state.config.JINA_API_KEY, - "bing_search_v7_endpoint": request.app.state.config.BING_SEARCH_V7_ENDPOINT, - "bing_search_v7_subscription_key": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, - "exa_api_key": request.app.state.config.EXA_API_KEY, - "perplexity_api_key": request.app.state.config.PERPLEXITY_API_KEY, - "sougou_api_sid": request.app.state.config.SOUGOU_API_SID, - "sougou_api_sk": request.app.state.config.SOUGOU_API_SK, - "result_count": request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - "concurrent_requests": request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, - "domain_filter_list": request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, - }, - "loader": { - "engine": request.app.state.config.RAG_WEB_LOADER_ENGINE, - "enable_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, - "trust_env": request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, - "bypass_embedding_and_retrieval": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, - "playwright_ws_uri": request.app.state.config.PLAYWRIGHT_WS_URI, - "playwright_timeout": request.app.state.config.PLAYWRIGHT_TIMEOUT, - "firecrawl_api_key": request.app.state.config.FIRECRAWL_API_KEY, - "firecrawl_api_base_url": request.app.state.config.FIRECRAWL_API_BASE_URL, - "tavily_api_key": request.app.state.config.TAVILY_API_KEY, - "tavily_extract_depth": request.app.state.config.TAVILY_EXTRACT_DEPTH, - "youtube": { - "language": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, - "proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, - "translation": request.app.state.YOUTUBE_LOADER_TRANSLATION, - }, - }, + "ENABLE_WEB_SEARCH": request.app.state.config.ENABLE_WEB_SEARCH, + "WEB_SEARCH_ENGINE": request.app.state.config.WEB_SEARCH_ENGINE, + "WEB_SEARCH_TRUST_ENV": request.app.state.config.WEB_SEARCH_TRUST_ENV, + "WEB_SEARCH_RESULT_COUNT": request.app.state.config.WEB_SEARCH_RESULT_COUNT, + "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, + "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, + "SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL, + "GOOGLE_PSE_API_KEY": request.app.state.config.GOOGLE_PSE_API_KEY, + "GOOGLE_PSE_ENGINE_ID": request.app.state.config.GOOGLE_PSE_ENGINE_ID, + "BRAVE_SEARCH_API_KEY": request.app.state.config.BRAVE_SEARCH_API_KEY, + "KAGI_SEARCH_API_KEY": request.app.state.config.KAGI_SEARCH_API_KEY, + "MOJEEK_SEARCH_API_KEY": request.app.state.config.MOJEEK_SEARCH_API_KEY, + "BOCHA_SEARCH_API_KEY": request.app.state.config.BOCHA_SEARCH_API_KEY, + "SERPSTACK_API_KEY": request.app.state.config.SERPSTACK_API_KEY, + "SERPSTACK_HTTPS": request.app.state.config.SERPSTACK_HTTPS, + "SERPER_API_KEY": request.app.state.config.SERPER_API_KEY, + "SERPLY_API_KEY": request.app.state.config.SERPLY_API_KEY, + "TAVILY_API_KEY": request.app.state.config.TAVILY_API_KEY, + "SEARCHAPI_API_KEY": request.app.state.config.SEARCHAPI_API_KEY, + "SEARCHAPI_ENGINE": request.app.state.config.SEARCHAPI_ENGINE, + "SERPAPI_API_KEY": request.app.state.config.SERPAPI_API_KEY, + "SERPAPI_ENGINE": request.app.state.config.SERPAPI_ENGINE, + "JINA_API_KEY": request.app.state.config.JINA_API_KEY, + "BING_SEARCH_V7_ENDPOINT": request.app.state.config.BING_SEARCH_V7_ENDPOINT, + "BING_SEARCH_V7_SUBSCRIPTION_KEY": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, + "EXA_API_KEY": request.app.state.config.EXA_API_KEY, + "PERPLEXITY_API_KEY": request.app.state.config.PERPLEXITY_API_KEY, + "SOUGOU_API_SID": request.app.state.config.SOUGOU_API_SID, + "SOUGOU_API_SK": request.app.state.config.SOUGOU_API_SK, + "WEB_LOADER_ENGINE": request.app.state.config.WEB_LOADER_ENGINE, + "ENABLE_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, + "PLAYWRIGHT_WS_URL": request.app.state.config.PLAYWRIGHT_WS_URL, + "PLAYWRIGHT_TIMEOUT": request.app.state.config.PLAYWRIGHT_TIMEOUT, + "FIRECRAWL_API_KEY": request.app.state.config.FIRECRAWL_API_KEY, + "FIRECRAWL_API_BASE_URL": request.app.state.config.FIRECRAWL_API_BASE_URL, + "TAVILY_EXTRACT_DEPTH": request.app.state.config.TAVILY_EXTRACT_DEPTH, + "YOUTUBE_LOADER_LANGUAGE": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, + "YOUTUBE_LOADER_PROXY_URL": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, + "YOUTUBE_LOADER_TRANSLATION": request.app.state.YOUTUBE_LOADER_TRANSLATION, }, } -class FileConfig(BaseModel): - max_size: Optional[int] = None - max_count: Optional[int] = None - - -class DocumentIntelligenceConfigForm(BaseModel): - endpoint: str - key: str - - -class MistralOCRConfigForm(BaseModel): - api_key: str - - -class ContentExtractionConfig(BaseModel): - engine: str = "" - tika_server_url: Optional[str] = None - docling_server_url: Optional[str] = None - document_intelligence_config: Optional[DocumentIntelligenceConfigForm] = None - mistral_ocr_config: Optional[MistralOCRConfigForm] = None - - -class ChunkParamUpdateForm(BaseModel): - text_splitter: Optional[str] = None - chunk_size: int - chunk_overlap: int - - -class YoutubeLoaderConfig(BaseModel): - language: list[str] - translation: Optional[str] = None - proxy_url: str = "" - - -class WebSearchConfig(BaseModel): - engine: Optional[str] = None - searxng_query_url: Optional[str] = None - google_pse_api_key: Optional[str] = None - google_pse_engine_id: Optional[str] = None - brave_search_api_key: Optional[str] = None - kagi_search_api_key: Optional[str] = None - mojeek_search_api_key: Optional[str] = None - bocha_search_api_key: Optional[str] = None - serpstack_api_key: Optional[str] = None - serpstack_https: Optional[bool] = None - serper_api_key: Optional[str] = None - serply_api_key: Optional[str] = None - tavily_api_key: Optional[str] = None - searchapi_api_key: Optional[str] = None - searchapi_engine: Optional[str] = None - serpapi_api_key: Optional[str] = None - serpapi_engine: Optional[str] = None - jina_api_key: Optional[str] = None - bing_search_v7_endpoint: Optional[str] = None - bing_search_v7_subscription_key: Optional[str] = None - exa_api_key: Optional[str] = None - perplexity_api_key: Optional[str] = None - sougou_api_sid: Optional[str] = None - sougou_api_sk: Optional[str] = None - result_count: Optional[int] = None - concurrent_requests: Optional[int] = None - domain_filter_list: Optional[List[str]] = [] - - -class WebLoaderConfig(BaseModel): - engine: Optional[str] = None - enable_ssl_verification: Optional[bool] = None - trust_env: Optional[bool] = None - bypass_embedding_and_retrieval: Optional[bool] = None - playwright_ws_uri: Optional[str] = None - playwright_timeout: Optional[int] = None - firecrawl_api_key: Optional[str] = None - firecrawl_api_base_url: Optional[str] = None - tavily_api_key: Optional[str] = None - tavily_extract_depth: Optional[str] = None - youtube: Optional[YoutubeLoaderConfig] = None - - class WebConfig(BaseModel): - ENABLE_RAG_WEB_SEARCH: Optional[bool] = None - search: WebSearchConfig - loader: WebLoaderConfig + ENABLE_WEB_SEARCH: Optional[bool] = None + WEB_SEARCH_ENGINE: Optional[str] = None + WEB_SEARCH_TRUST_ENV: Optional[bool] = None + WEB_SEARCH_RESULT_COUNT: Optional[int] = None + WEB_SEARCH_CONCURRENT_REQUESTS: Optional[int] = None + WEB_SEARCH_DOMAIN_FILTER_LIST: Optional[List[str]] = [] + BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None + SEARXNG_QUERY_URL: Optional[str] = None + GOOGLE_PSE_API_KEY: Optional[str] = None + GOOGLE_PSE_ENGINE_ID: Optional[str] = None + BRAVE_SEARCH_API_KEY: Optional[str] = None + KAGI_SEARCH_API_KEY: Optional[str] = None + MOJEEK_SEARCH_API_KEY: Optional[str] = None + BOCHA_SEARCH_API_KEY: Optional[str] = None + SERPSTACK_API_KEY: Optional[str] = None + SERPSTACK_HTTPS: Optional[bool] = None + SERPER_API_KEY: Optional[str] = None + SERPLY_API_KEY: Optional[str] = None + TAVILY_API_KEY: Optional[str] = None + SEARCHAPI_API_KEY: Optional[str] = None + SEARCHAPI_ENGINE: Optional[str] = None + SERPAPI_API_KEY: Optional[str] = None + SERPAPI_ENGINE: Optional[str] = None + JINA_API_KEY: Optional[str] = None + BING_SEARCH_V7_ENDPOINT: Optional[str] = None + BING_SEARCH_V7_SUBSCRIPTION_KEY: Optional[str] = None + EXA_API_KEY: Optional[str] = None + PERPLEXITY_API_KEY: Optional[str] = None + SOUGOU_API_SID: Optional[str] = None + SOUGOU_API_SK: Optional[str] = None + WEB_LOADER_ENGINE: Optional[str] = None + ENABLE_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None + PLAYWRIGHT_WS_URL: Optional[str] = None + PLAYWRIGHT_TIMEOUT: Optional[int] = None + FIRECRAWL_API_KEY: Optional[str] = None + FIRECRAWL_API_BASE_URL: Optional[str] = None + TAVILY_EXTRACT_DEPTH: Optional[str] = None + YOUTUBE_LOADER_LANGUAGE: Optional[List[str]] = None + YOUTUBE_LOADER_PROXY_URL: Optional[str] = None + YOUTUBE_LOADER_TRANSLATION: Optional[str] = None -class ConfigUpdateForm(BaseModel): - RAG_FULL_CONTEXT: Optional[bool] = None +class ConfigForm(BaseModel): + # RAG settings + TEMPLATE: Optional[str] = None + TOP_K: Optional[int] = None BYPASS_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None - pdf_extract_images: Optional[bool] = None - enable_google_drive_integration: Optional[bool] = None - enable_onedrive_integration: Optional[bool] = None - file: Optional[FileConfig] = None - content_extraction: Optional[ContentExtractionConfig] = None - chunk: Optional[ChunkParamUpdateForm] = None + RAG_FULL_CONTEXT: Optional[bool] = None + + # Hybrid search settings + ENABLE_RAG_HYBRID_SEARCH: Optional[bool] = None + TOP_K_RERANKER: Optional[int] = None + RELEVANCE_THRESHOLD: Optional[float] = None + + # Content extraction settings + CONTENT_EXTRACTION_ENGINE: Optional[str] = None + PDF_EXTRACT_IMAGES: Optional[bool] = None + TIKA_SERVER_URL: Optional[str] = None + DOCLING_SERVER_URL: Optional[str] = None + DOCUMENT_INTELLIGENCE_ENDPOINT: Optional[str] = None + DOCUMENT_INTELLIGENCE_KEY: Optional[str] = None + MISTRAL_OCR_API_KEY: Optional[str] = None + + # Chunking settings + TEXT_SPLITTER: Optional[str] = None + CHUNK_SIZE: Optional[int] = None + CHUNK_OVERLAP: Optional[int] = None + + # File upload settings + FILE_MAX_SIZE: Optional[int] = None + FILE_MAX_COUNT: Optional[int] = None + + # Integration settings + ENABLE_GOOGLE_DRIVE_INTEGRATION: Optional[bool] = None + ENABLE_ONEDRIVE_INTEGRATION: Optional[bool] = None + + # Web search settings web: Optional[WebConfig] = None @router.post("/config/update") async def update_rag_config( - request: Request, form_data: ConfigUpdateForm, user=Depends(get_admin_user) + request: Request, form_data: ConfigForm, user=Depends(get_admin_user) ): - request.app.state.config.PDF_EXTRACT_IMAGES = ( - form_data.pdf_extract_images - if form_data.pdf_extract_images is not None - else request.app.state.config.PDF_EXTRACT_IMAGES + # RAG settings + request.app.state.config.RAG_TEMPLATE = ( + form_data.TEMPLATE + if form_data.TEMPLATE is not None + else request.app.state.config.RAG_TEMPLATE + ) + request.app.state.config.TOP_K = ( + form_data.TOP_K + if form_data.TOP_K is not None + else request.app.state.config.TOP_K + ) + request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL = ( + form_data.BYPASS_EMBEDDING_AND_RETRIEVAL + if form_data.BYPASS_EMBEDDING_AND_RETRIEVAL is not None + else request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL ) - request.app.state.config.RAG_FULL_CONTEXT = ( form_data.RAG_FULL_CONTEXT if form_data.RAG_FULL_CONTEXT is not None else request.app.state.config.RAG_FULL_CONTEXT ) - request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL = ( - form_data.BYPASS_EMBEDDING_AND_RETRIEVAL - if form_data.BYPASS_EMBEDDING_AND_RETRIEVAL is not None - else request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL + # Hybrid search settings + request.app.state.config.ENABLE_RAG_HYBRID_SEARCH = ( + form_data.ENABLE_RAG_HYBRID_SEARCH + if form_data.ENABLE_RAG_HYBRID_SEARCH is not None + else request.app.state.config.ENABLE_RAG_HYBRID_SEARCH + ) + # Free up memory if hybrid search is disabled + if not request.app.state.config.ENABLE_RAG_HYBRID_SEARCH: + request.app.state.rf = None + + request.app.state.config.TOP_K_RERANKER = ( + form_data.TOP_K_RERANKER + if form_data.TOP_K_RERANKER is not None + else request.app.state.config.TOP_K_RERANKER + ) + request.app.state.config.RELEVANCE_THRESHOLD = ( + form_data.RELEVANCE_THRESHOLD + if form_data.RELEVANCE_THRESHOLD is not None + else request.app.state.config.RELEVANCE_THRESHOLD ) + # Content extraction settings + request.app.state.config.CONTENT_EXTRACTION_ENGINE = ( + form_data.CONTENT_EXTRACTION_ENGINE + if form_data.CONTENT_EXTRACTION_ENGINE is not None + else request.app.state.config.CONTENT_EXTRACTION_ENGINE + ) + request.app.state.config.PDF_EXTRACT_IMAGES = ( + form_data.PDF_EXTRACT_IMAGES + if form_data.PDF_EXTRACT_IMAGES is not None + else request.app.state.config.PDF_EXTRACT_IMAGES + ) + request.app.state.config.TIKA_SERVER_URL = ( + form_data.TIKA_SERVER_URL + if form_data.TIKA_SERVER_URL is not None + else request.app.state.config.TIKA_SERVER_URL + ) + request.app.state.config.DOCLING_SERVER_URL = ( + form_data.DOCLING_SERVER_URL + if form_data.DOCLING_SERVER_URL is not None + else request.app.state.config.DOCLING_SERVER_URL + ) + request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = ( + form_data.DOCUMENT_INTELLIGENCE_ENDPOINT + if form_data.DOCUMENT_INTELLIGENCE_ENDPOINT is not None + else request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT + ) + request.app.state.config.DOCUMENT_INTELLIGENCE_KEY = ( + form_data.DOCUMENT_INTELLIGENCE_KEY + if form_data.DOCUMENT_INTELLIGENCE_KEY is not None + else request.app.state.config.DOCUMENT_INTELLIGENCE_KEY + ) + request.app.state.config.MISTRAL_OCR_API_KEY = ( + form_data.MISTRAL_OCR_API_KEY + if form_data.MISTRAL_OCR_API_KEY is not None + else request.app.state.config.MISTRAL_OCR_API_KEY + ) + + # Chunking settings + request.app.state.config.TEXT_SPLITTER = ( + form_data.TEXT_SPLITTER + if form_data.TEXT_SPLITTER is not None + else request.app.state.config.TEXT_SPLITTER + ) + request.app.state.config.CHUNK_SIZE = ( + form_data.CHUNK_SIZE + if form_data.CHUNK_SIZE is not None + else request.app.state.config.CHUNK_SIZE + ) + request.app.state.config.CHUNK_OVERLAP = ( + form_data.CHUNK_OVERLAP + if form_data.CHUNK_OVERLAP is not None + else request.app.state.config.CHUNK_OVERLAP + ) + + # File upload settings + request.app.state.config.FILE_MAX_SIZE = ( + form_data.FILE_MAX_SIZE + if form_data.FILE_MAX_SIZE is not None + else request.app.state.config.FILE_MAX_SIZE + ) + request.app.state.config.FILE_MAX_COUNT = ( + form_data.FILE_MAX_COUNT + if form_data.FILE_MAX_COUNT is not None + else request.app.state.config.FILE_MAX_COUNT + ) + + # Integration settings request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION = ( form_data.enable_google_drive_integration if form_data.enable_google_drive_integration is not None else request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION ) - request.app.state.config.ENABLE_ONEDRIVE_INTEGRATION = ( - form_data.enable_onedrive_integration - if form_data.enable_onedrive_integration is not None + form_data.ENABLE_ONEDRIVE_INTEGRATION + if form_data.ENABLE_ONEDRIVE_INTEGRATION is not None else request.app.state.config.ENABLE_ONEDRIVE_INTEGRATION ) - if form_data.file is not None: - request.app.state.config.FILE_MAX_SIZE = form_data.file.max_size - request.app.state.config.FILE_MAX_COUNT = form_data.file.max_count - - if form_data.content_extraction is not None: - log.info( - f"Updating content extraction: {request.app.state.config.CONTENT_EXTRACTION_ENGINE} to {form_data.content_extraction.engine}" - ) - request.app.state.config.CONTENT_EXTRACTION_ENGINE = ( - form_data.content_extraction.engine - ) - request.app.state.config.TIKA_SERVER_URL = ( - form_data.content_extraction.tika_server_url - ) - request.app.state.config.DOCLING_SERVER_URL = ( - form_data.content_extraction.docling_server_url - ) - if form_data.content_extraction.document_intelligence_config is not None: - request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = ( - form_data.content_extraction.document_intelligence_config.endpoint - ) - request.app.state.config.DOCUMENT_INTELLIGENCE_KEY = ( - form_data.content_extraction.document_intelligence_config.key - ) - if form_data.content_extraction.mistral_ocr_config is not None: - request.app.state.config.MISTRAL_OCR_API_KEY = ( - form_data.content_extraction.mistral_ocr_config.api_key - ) - - if form_data.chunk is not None: - request.app.state.config.TEXT_SPLITTER = form_data.chunk.text_splitter - request.app.state.config.CHUNK_SIZE = form_data.chunk.chunk_size - request.app.state.config.CHUNK_OVERLAP = form_data.chunk.chunk_overlap - if form_data.web is not None: - request.app.state.config.ENABLE_RAG_WEB_SEARCH = ( - form_data.web.ENABLE_RAG_WEB_SEARCH + # Web search settings + request.app.state.config.ENABLE_WEB_SEARCH = form_data.web.ENABLE_WEB_SEARCH + request.app.state.config.WEB_SEARCH_ENGINE = form_data.web.WEB_SEARCH_ENGINE + request.app.state.config.WEB_SEARCH_TRUST_ENV = ( + form_data.web.WEB_SEARCH_TRUST_ENV ) - - request.app.state.config.RAG_WEB_SEARCH_ENGINE = form_data.web.search.engine - request.app.state.config.SEARXNG_QUERY_URL = ( - form_data.web.search.searxng_query_url + request.app.state.config.WEB_SEARCH_RESULT_COUNT = ( + form_data.web.WEB_SEARCH_RESULT_COUNT ) - request.app.state.config.GOOGLE_PSE_API_KEY = ( - form_data.web.search.google_pse_api_key + request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = ( + form_data.web.WEB_SEARCH_CONCURRENT_REQUESTS ) - request.app.state.config.GOOGLE_PSE_ENGINE_ID = ( - form_data.web.search.google_pse_engine_id - ) - request.app.state.config.BRAVE_SEARCH_API_KEY = ( - form_data.web.search.brave_search_api_key - ) - request.app.state.config.KAGI_SEARCH_API_KEY = ( - form_data.web.search.kagi_search_api_key - ) - request.app.state.config.MOJEEK_SEARCH_API_KEY = ( - form_data.web.search.mojeek_search_api_key - ) - request.app.state.config.BOCHA_SEARCH_API_KEY = ( - form_data.web.search.bocha_search_api_key - ) - request.app.state.config.SERPSTACK_API_KEY = ( - form_data.web.search.serpstack_api_key - ) - request.app.state.config.SERPSTACK_HTTPS = form_data.web.search.serpstack_https - request.app.state.config.SERPER_API_KEY = form_data.web.search.serper_api_key - request.app.state.config.SERPLY_API_KEY = form_data.web.search.serply_api_key - request.app.state.config.TAVILY_API_KEY = form_data.web.search.tavily_api_key - request.app.state.config.SEARCHAPI_API_KEY = ( - form_data.web.search.searchapi_api_key - ) - request.app.state.config.SEARCHAPI_ENGINE = ( - form_data.web.search.searchapi_engine - ) - request.app.state.config.SERPAPI_API_KEY = form_data.web.search.serpapi_api_key - request.app.state.config.SERPAPI_ENGINE = form_data.web.search.serpapi_engine - request.app.state.config.JINA_API_KEY = form_data.web.search.jina_api_key - request.app.state.config.BING_SEARCH_V7_ENDPOINT = ( - form_data.web.search.bing_search_v7_endpoint - ) - request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = ( - form_data.web.search.bing_search_v7_subscription_key - ) - request.app.state.config.EXA_API_KEY = form_data.web.search.exa_api_key - request.app.state.config.PERPLEXITY_API_KEY = ( - form_data.web.search.perplexity_api_key - ) - request.app.state.config.SOUGOU_API_SID = form_data.web.search.sougou_api_sid - request.app.state.config.SOUGOU_API_SK = form_data.web.search.sougou_api_sk - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = ( - form_data.web.search.result_count - ) - request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = ( - form_data.web.search.concurrent_requests - ) - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = ( - form_data.web.search.domain_filter_list - ) - - request.app.state.config.RAG_WEB_LOADER_ENGINE = form_data.web.loader.engine - request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = ( - # Note: When UI "Bypass SSL verification for Websites"=True then ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION=False - form_data.web.loader.enable_ssl_verification - ) - request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV = ( - form_data.web.loader.trust_env + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = ( + form_data.web.WEB_SEARCH_DOMAIN_FILTER_LIST ) request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( - form_data.web.loader.bypass_embedding_and_retrieval + form_data.web.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL ) - request.app.state.config.PLAYWRIGHT_WS_URI = ( - form_data.web.loader.playwright_ws_uri + request.app.state.config.SEARXNG_QUERY_URL = form_data.web.SEARXNG_QUERY_URL + request.app.state.config.GOOGLE_PSE_API_KEY = form_data.web.GOOGLE_PSE_API_KEY + request.app.state.config.GOOGLE_PSE_ENGINE_ID = ( + form_data.web.GOOGLE_PSE_ENGINE_ID ) - request.app.state.config.PLAYWRIGHT_TIMEOUT = ( - form_data.web.loader.playwright_timeout + request.app.state.config.BRAVE_SEARCH_API_KEY = ( + form_data.web.BRAVE_SEARCH_API_KEY ) - request.app.state.config.FIRECRAWL_API_KEY = ( - form_data.web.loader.firecrawl_api_key + request.app.state.config.KAGI_SEARCH_API_KEY = form_data.web.KAGI_SEARCH_API_KEY + request.app.state.config.MOJEEK_SEARCH_API_KEY = ( + form_data.web.MOJEEK_SEARCH_API_KEY ) + request.app.state.config.BOCHA_SEARCH_API_KEY = ( + form_data.web.BOCHA_SEARCH_API_KEY + ) + request.app.state.config.SERPSTACK_API_KEY = form_data.web.SERPSTACK_API_KEY + request.app.state.config.SERPSTACK_HTTPS = form_data.web.SERPSTACK_HTTPS + request.app.state.config.SERPER_API_KEY = form_data.web.SERPER_API_KEY + request.app.state.config.SERPLY_API_KEY = form_data.web.SERPLY_API_KEY + request.app.state.config.TAVILY_API_KEY = form_data.web.TAVILY_API_KEY + request.app.state.config.SEARCHAPI_API_KEY = form_data.web.SEARCHAPI_API_KEY + request.app.state.config.SEARCHAPI_ENGINE = form_data.web.SEARCHAPI_ENGINE + request.app.state.config.SERPAPI_API_KEY = form_data.web.SERPAPI_API_KEY + request.app.state.config.SERPAPI_ENGINE = form_data.web.SERPAPI_ENGINE + request.app.state.config.JINA_API_KEY = form_data.web.JINA_API_KEY + request.app.state.config.BING_SEARCH_V7_ENDPOINT = ( + form_data.web.BING_SEARCH_V7_ENDPOINT + ) + request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY = ( + form_data.web.BING_SEARCH_V7_SUBSCRIPTION_KEY + ) + request.app.state.config.EXA_API_KEY = form_data.web.EXA_API_KEY + request.app.state.config.PERPLEXITY_API_KEY = form_data.web.PERPLEXITY_API_KEY + request.app.state.config.SOUGOU_API_SID = form_data.web.SOUGOU_API_SID + request.app.state.config.SOUGOU_API_SK = form_data.web.SOUGOU_API_SK + + # Web loader settings + request.app.state.config.WEB_LOADER_ENGINE = form_data.web.WEB_LOADER_ENGINE + request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION = ( + form_data.web.ENABLE_WEB_LOADER_SSL_VERIFICATION + ) + request.app.state.config.PLAYWRIGHT_WS_URL = form_data.web.PLAYWRIGHT_WS_URL + request.app.state.config.PLAYWRIGHT_TIMEOUT = form_data.web.PLAYWRIGHT_TIMEOUT + request.app.state.config.FIRECRAWL_API_KEY = form_data.web.FIRECRAWL_API_KEY request.app.state.config.FIRECRAWL_API_BASE_URL = ( - form_data.web.loader.firecrawl_api_base_url + form_data.web.FIRECRAWL_API_BASE_URL ) - request.app.state.config.TAVILY_API_KEY = form_data.web.loader.tavily_api_key request.app.state.config.TAVILY_EXTRACT_DEPTH = ( - form_data.web.loader.tavily_extract_depth + form_data.web.TAVILY_EXTRACT_DEPTH ) request.app.state.config.YOUTUBE_LOADER_LANGUAGE = ( - form_data.web.loader.youtube.language + form_data.web.YOUTUBE_LOADER_LANGUAGE ) request.app.state.config.YOUTUBE_LOADER_PROXY_URL = ( - form_data.web.loader.youtube.proxy_url + form_data.web.YOUTUBE_LOADER_PROXY_URL ) - request.app.state.YOUTUBE_LOADER_TRANSLATION = ( - form_data.web.loader.youtube.translation + request.app.state.config.YOUTUBE_LOADER_TRANSLATION = ( + form_data.web.YOUTUBE_LOADER_TRANSLATION ) return { "status": True, - "pdf_extract_images": request.app.state.config.PDF_EXTRACT_IMAGES, - "RAG_FULL_CONTEXT": request.app.state.config.RAG_FULL_CONTEXT, "BYPASS_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL, - "file": { - "max_size": request.app.state.config.FILE_MAX_SIZE, - "max_count": request.app.state.config.FILE_MAX_COUNT, - }, - "content_extraction": { - "engine": request.app.state.config.CONTENT_EXTRACTION_ENGINE, - "tika_server_url": request.app.state.config.TIKA_SERVER_URL, - "docling_server_url": request.app.state.config.DOCLING_SERVER_URL, - "document_intelligence_config": { - "endpoint": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, - "key": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, - }, - "mistral_ocr_config": { - "api_key": request.app.state.config.MISTRAL_OCR_API_KEY, - }, - }, - "chunk": { - "text_splitter": request.app.state.config.TEXT_SPLITTER, - "chunk_size": request.app.state.config.CHUNK_SIZE, - "chunk_overlap": request.app.state.config.CHUNK_OVERLAP, - }, + "RAG_FULL_CONTEXT": request.app.state.config.RAG_FULL_CONTEXT, + # Content extraction settings + "CONTENT_EXTRACTION_ENGINE": request.app.state.config.CONTENT_EXTRACTION_ENGINE, + "PDF_EXTRACT_IMAGES": request.app.state.config.PDF_EXTRACT_IMAGES, + "TIKA_SERVER_URL": request.app.state.config.TIKA_SERVER_URL, + "DOCLING_SERVER_URL": request.app.state.config.DOCLING_SERVER_URL, + "DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT, + "DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY, + "MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY, + # Chunking settings + "TEXT_SPLITTER": request.app.state.config.TEXT_SPLITTER, + "CHUNK_SIZE": request.app.state.config.CHUNK_SIZE, + "CHUNK_OVERLAP": request.app.state.config.CHUNK_OVERLAP, + # File upload settings + "FILE_MAX_SIZE": request.app.state.config.FILE_MAX_SIZE, + "FILE_MAX_COUNT": request.app.state.config.FILE_MAX_COUNT, + # Integration settings + "ENABLE_GOOGLE_DRIVE_INTEGRATION": request.app.state.config.ENABLE_GOOGLE_DRIVE_INTEGRATION, + "ENABLE_ONEDRIVE_INTEGRATION": request.app.state.config.ENABLE_ONEDRIVE_INTEGRATION, + # Web search settings "web": { - "ENABLE_RAG_WEB_SEARCH": request.app.state.config.ENABLE_RAG_WEB_SEARCH, - "search": { - "engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE, - "searxng_query_url": request.app.state.config.SEARXNG_QUERY_URL, - "google_pse_api_key": request.app.state.config.GOOGLE_PSE_API_KEY, - "google_pse_engine_id": request.app.state.config.GOOGLE_PSE_ENGINE_ID, - "brave_search_api_key": request.app.state.config.BRAVE_SEARCH_API_KEY, - "kagi_search_api_key": request.app.state.config.KAGI_SEARCH_API_KEY, - "mojeek_search_api_key": request.app.state.config.MOJEEK_SEARCH_API_KEY, - "bocha_search_api_key": request.app.state.config.BOCHA_SEARCH_API_KEY, - "serpstack_api_key": request.app.state.config.SERPSTACK_API_KEY, - "serpstack_https": request.app.state.config.SERPSTACK_HTTPS, - "serper_api_key": request.app.state.config.SERPER_API_KEY, - "serply_api_key": request.app.state.config.SERPLY_API_KEY, - "tavily_api_key": request.app.state.config.TAVILY_API_KEY, - "searchapi_api_key": request.app.state.config.SEARCHAPI_API_KEY, - "searchapi_engine": request.app.state.config.SEARCHAPI_ENGINE, - "serpapi_api_key": request.app.state.config.SERPAPI_API_KEY, - "serpapi_engine": request.app.state.config.SERPAPI_ENGINE, - "jina_api_key": request.app.state.config.JINA_API_KEY, - "bing_search_v7_endpoint": request.app.state.config.BING_SEARCH_V7_ENDPOINT, - "bing_search_v7_subscription_key": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, - "exa_api_key": request.app.state.config.EXA_API_KEY, - "perplexity_api_key": request.app.state.config.PERPLEXITY_API_KEY, - "sougou_api_sid": request.app.state.config.SOUGOU_API_SID, - "sougou_api_sk": request.app.state.config.SOUGOU_API_SK, - "result_count": request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - "concurrent_requests": request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, - "domain_filter_list": request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, - }, - "loader": { - "engine": request.app.state.config.RAG_WEB_LOADER_ENGINE, - "enable_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, - "trust_env": request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, - "bypass_embedding_and_retrieval": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, - "playwright_ws_uri": request.app.state.config.PLAYWRIGHT_WS_URI, - "playwright_timeout": request.app.state.config.PLAYWRIGHT_TIMEOUT, - "firecrawl_api_key": request.app.state.config.FIRECRAWL_API_KEY, - "firecrawl_api_base_url": request.app.state.config.FIRECRAWL_API_BASE_URL, - "tavily_api_key": request.app.state.config.TAVILY_API_KEY, - "tavily_extract_depth": request.app.state.config.TAVILY_EXTRACT_DEPTH, - "youtube": { - "language": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, - "proxy_url": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, - "translation": request.app.state.YOUTUBE_LOADER_TRANSLATION, - }, - }, + "ENABLE_WEB_SEARCH": request.app.state.config.ENABLE_WEB_SEARCH, + "WEB_SEARCH_ENGINE": request.app.state.config.WEB_SEARCH_ENGINE, + "WEB_SEARCH_TRUST_ENV": request.app.state.config.WEB_SEARCH_TRUST_ENV, + "WEB_SEARCH_RESULT_COUNT": request.app.state.config.WEB_SEARCH_RESULT_COUNT, + "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, + "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, + "SEARXNG_QUERY_URL": request.app.state.config.SEARXNG_QUERY_URL, + "GOOGLE_PSE_API_KEY": request.app.state.config.GOOGLE_PSE_API_KEY, + "GOOGLE_PSE_ENGINE_ID": request.app.state.config.GOOGLE_PSE_ENGINE_ID, + "BRAVE_SEARCH_API_KEY": request.app.state.config.BRAVE_SEARCH_API_KEY, + "KAGI_SEARCH_API_KEY": request.app.state.config.KAGI_SEARCH_API_KEY, + "MOJEEK_SEARCH_API_KEY": request.app.state.config.MOJEEK_SEARCH_API_KEY, + "BOCHA_SEARCH_API_KEY": request.app.state.config.BOCHA_SEARCH_API_KEY, + "SERPSTACK_API_KEY": request.app.state.config.SERPSTACK_API_KEY, + "SERPSTACK_HTTPS": request.app.state.config.SERPSTACK_HTTPS, + "SERPER_API_KEY": request.app.state.config.SERPER_API_KEY, + "SERPLY_API_KEY": request.app.state.config.SERPLY_API_KEY, + "TAVILY_API_KEY": request.app.state.config.TAVILY_API_KEY, + "SEARCHAPI_API_KEY": request.app.state.config.SEARCHAPI_API_KEY, + "SEARCHAPI_ENGINE": request.app.state.config.SEARCHAPI_ENGINE, + "SERPAPI_API_KEY": request.app.state.config.SERPAPI_API_KEY, + "SERPAPI_ENGINE": request.app.state.config.SERPAPI_ENGINE, + "JINA_API_KEY": request.app.state.config.JINA_API_KEY, + "BING_SEARCH_V7_ENDPOINT": request.app.state.config.BING_SEARCH_V7_ENDPOINT, + "BING_SEARCH_V7_SUBSCRIPTION_KEY": request.app.state.config.BING_SEARCH_V7_SUBSCRIPTION_KEY, + "EXA_API_KEY": request.app.state.config.EXA_API_KEY, + "PERPLEXITY_API_KEY": request.app.state.config.PERPLEXITY_API_KEY, + "SOUGOU_API_SID": request.app.state.config.SOUGOU_API_SID, + "SOUGOU_API_SK": request.app.state.config.SOUGOU_API_SK, + "WEB_LOADER_ENGINE": request.app.state.config.WEB_LOADER_ENGINE, + "ENABLE_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, + "PLAYWRIGHT_WS_URL": request.app.state.config.PLAYWRIGHT_WS_URL, + "PLAYWRIGHT_TIMEOUT": request.app.state.config.PLAYWRIGHT_TIMEOUT, + "FIRECRAWL_API_KEY": request.app.state.config.FIRECRAWL_API_KEY, + "FIRECRAWL_API_BASE_URL": request.app.state.config.FIRECRAWL_API_BASE_URL, + "TAVILY_EXTRACT_DEPTH": request.app.state.config.TAVILY_EXTRACT_DEPTH, + "YOUTUBE_LOADER_LANGUAGE": request.app.state.config.YOUTUBE_LOADER_LANGUAGE, + "YOUTUBE_LOADER_PROXY_URL": request.app.state.config.YOUTUBE_LOADER_PROXY_URL, + "YOUTUBE_LOADER_TRANSLATION": request.app.state.YOUTUBE_LOADER_TRANSLATION, }, } -@router.get("/template") -async def get_rag_template(request: Request, user=Depends(get_verified_user)): - return { - "status": True, - "template": request.app.state.config.RAG_TEMPLATE, - } - - -@router.get("/query/settings") -async def get_query_settings(request: Request, user=Depends(get_admin_user)): - return { - "status": True, - "template": request.app.state.config.RAG_TEMPLATE, - "k": request.app.state.config.TOP_K, - "k_reranker": request.app.state.config.TOP_K_RERANKER, - "r": request.app.state.config.RELEVANCE_THRESHOLD, - "hybrid": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, - } - - -class QuerySettingsForm(BaseModel): - k: Optional[int] = None - k_reranker: Optional[int] = None - r: Optional[float] = None - template: Optional[str] = None - hybrid: Optional[bool] = None - - -@router.post("/query/settings/update") -async def update_query_settings( - request: Request, form_data: QuerySettingsForm, user=Depends(get_admin_user) -): - request.app.state.config.RAG_TEMPLATE = form_data.template - request.app.state.config.TOP_K = form_data.k if form_data.k else 4 - request.app.state.config.TOP_K_RERANKER = form_data.k_reranker or 4 - request.app.state.config.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0 - - request.app.state.config.ENABLE_RAG_HYBRID_SEARCH = ( - form_data.hybrid if form_data.hybrid else False - ) - - if not request.app.state.config.ENABLE_RAG_HYBRID_SEARCH: - request.app.state.rf = None - - return { - "status": True, - "template": request.app.state.config.RAG_TEMPLATE, - "k": request.app.state.config.TOP_K, - "k_reranker": request.app.state.config.TOP_K_RERANKER, - "r": request.app.state.config.RELEVANCE_THRESHOLD, - "hybrid": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, - } - - #################################### # # Document process and retrieval @@ -1268,8 +1218,8 @@ def process_web( loader = get_web_loader( form_data.url, - verify_ssl=request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, - requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, + verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, + requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, ) docs = loader.load() content = " ".join([doc.page_content for doc in docs]) @@ -1333,8 +1283,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_searxng( request.app.state.config.SEARXNG_QUERY_URL, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No SEARXNG_QUERY_URL found in environment variables") @@ -1347,8 +1297,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: request.app.state.config.GOOGLE_PSE_API_KEY, request.app.state.config.GOOGLE_PSE_ENGINE_ID, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception( @@ -1359,8 +1309,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_brave( request.app.state.config.BRAVE_SEARCH_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No BRAVE_SEARCH_API_KEY found in environment variables") @@ -1369,8 +1319,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_kagi( request.app.state.config.KAGI_SEARCH_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No KAGI_SEARCH_API_KEY found in environment variables") @@ -1379,8 +1329,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_mojeek( request.app.state.config.MOJEEK_SEARCH_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No MOJEEK_SEARCH_API_KEY found in environment variables") @@ -1389,8 +1339,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_bocha( request.app.state.config.BOCHA_SEARCH_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No BOCHA_SEARCH_API_KEY found in environment variables") @@ -1399,8 +1349,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_serpstack( request.app.state.config.SERPSTACK_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, https_enabled=request.app.state.config.SERPSTACK_HTTPS, ) else: @@ -1410,8 +1360,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_serper( request.app.state.config.SERPER_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No SERPER_API_KEY found in environment variables") @@ -1420,24 +1370,24 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_serply( request.app.state.config.SERPLY_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No SERPLY_API_KEY found in environment variables") elif engine == "duckduckgo": return search_duckduckgo( query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) elif engine == "tavily": if request.app.state.config.TAVILY_API_KEY: return search_tavily( request.app.state.config.TAVILY_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No TAVILY_API_KEY found in environment variables") @@ -1447,8 +1397,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: request.app.state.config.SEARCHAPI_API_KEY, request.app.state.config.SEARCHAPI_ENGINE, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No SEARCHAPI_API_KEY found in environment variables") @@ -1458,8 +1408,8 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: request.app.state.config.SERPAPI_API_KEY, request.app.state.config.SERPAPI_ENGINE, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: raise Exception("No SERPAPI_API_KEY found in environment variables") @@ -1467,7 +1417,7 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: return search_jina( request.app.state.config.JINA_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, ) elif engine == "bing": return search_bing( @@ -1475,34 +1425,39 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: request.app.state.config.BING_SEARCH_V7_ENDPOINT, str(DEFAULT_LOCALE), query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) elif engine == "exa": return search_exa( request.app.state.config.EXA_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) elif engine == "perplexity": return search_perplexity( request.app.state.config.PERPLEXITY_API_KEY, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) - elif engine == 'sougou': - if request.app.state.config.SOUGOU_API_SID and request.app.state.config.SOUGOU_API_SK: + elif engine == "sougou": + if ( + request.app.state.config.SOUGOU_API_SID + and request.app.state.config.SOUGOU_API_SK + ): return search_sougou( request.app.state.config.SOUGOU_API_SID, request.app.state.config.SOUGOU_API_SK, query, - request.app.state.config.RAG_WEB_SEARCH_RESULT_COUNT, - request.app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST, + request.app.state.config.WEB_SEARCH_RESULT_COUNT, + request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, ) else: - raise Exception("No SOUGOU_API_SID or SOUGOU_API_SK found in environment variables") + raise Exception( + "No SOUGOU_API_SID or SOUGOU_API_SK found in environment variables" + ) else: raise Exception("No search engine API key found in environment variables") @@ -1513,10 +1468,10 @@ async def process_web_search( ): try: logging.info( - f"trying to web search with {request.app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query}" + f"trying to web search with {request.app.state.config.WEB_SEARCH_ENGINE, form_data.query}" ) web_results = search_web( - request, request.app.state.config.RAG_WEB_SEARCH_ENGINE, form_data.query + request, request.app.state.config.WEB_SEARCH_ENGINE, form_data.query ) except Exception as e: log.exception(e) @@ -1532,9 +1487,9 @@ async def process_web_search( urls = [result.link for result in web_results] loader = get_web_loader( urls, - verify_ssl=request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, - requests_per_second=request.app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS, - trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV, + verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, + requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV, ) docs = await loader.aload() urls = [ diff --git a/backend/start.sh b/backend/start.sh index b9a30fd3da..4588e4c348 100755 --- a/backend/start.sh +++ b/backend/start.sh @@ -4,8 +4,8 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR" || exit # Add conditional Playwright browser installation -if [[ "${RAG_WEB_LOADER_ENGINE,,}" == "playwright" ]]; then - if [[ -z "${PLAYWRIGHT_WS_URI}" ]]; then +if [[ "${WEB_LOADER_ENGINE,,}" == "playwright" ]]; then + if [[ -z "${PLAYWRIGHT_WS_URL}" ]]; then echo "Installing Playwright browsers..." playwright install chromium playwright install-deps chromium diff --git a/backend/start_windows.bat b/backend/start_windows.bat index 661ecc494e..8d9aae3ac6 100644 --- a/backend/start_windows.bat +++ b/backend/start_windows.bat @@ -7,8 +7,8 @@ SET "SCRIPT_DIR=%~dp0" cd /d "%SCRIPT_DIR%" || exit /b :: Add conditional Playwright browser installation -IF /I "%RAG_WEB_LOADER_ENGINE%" == "playwright" ( - IF "%PLAYWRIGHT_WS_URI%" == "" ( +IF /I "%WEB_LOADER_ENGINE%" == "playwright" ( + IF "%PLAYWRIGHT_WS_URL%" == "" ( echo Installing Playwright browsers... playwright install chromium playwright install-deps chromium diff --git a/docker-compose.playwright.yaml b/docker-compose.playwright.yaml index fe570bed01..fa2b49ff9a 100644 --- a/docker-compose.playwright.yaml +++ b/docker-compose.playwright.yaml @@ -6,5 +6,5 @@ services: open-webui: environment: - - 'RAG_WEB_LOADER_ENGINE=playwright' - - 'PLAYWRIGHT_WS_URI=ws://playwright:3000' \ No newline at end of file + - 'WEB_LOADER_ENGINE=playwright' + - 'PLAYWRIGHT_WS_URL=ws://playwright:3000' diff --git a/src/lib/apis/retrieval/index.ts b/src/lib/apis/retrieval/index.ts index 31317fe0b9..f4b937b68f 100644 --- a/src/lib/apis/retrieval/index.ts +++ b/src/lib/apis/retrieval/index.ts @@ -50,9 +50,9 @@ type YoutubeConfigForm = { }; type RAGConfigForm = { - pdf_extract_images?: boolean; - enable_google_drive_integration?: boolean; - enable_onedrive_integration?: boolean; + PDF_EXTRACT_IMAGES?: boolean; + ENABLE_GOOGLE_DRIVE_INTEGRATION?: boolean; + ENABLE_ONEDRIVE_INTEGRATION?: boolean; chunk?: ChunkConfigForm; content_extraction?: ContentExtractConfigForm; web_loader_ssl_verification?: boolean; @@ -89,33 +89,6 @@ export const updateRAGConfig = async (token: string, payload: RAGConfigForm) => return res; }; -export const getRAGTemplate = async (token: string) => { - let error = null; - - const res = await fetch(`${RETRIEVAL_API_BASE_URL}/template`, { - method: 'GET', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${token}` - } - }) - .then(async (res) => { - if (!res.ok) throw await res.json(); - return res.json(); - }) - .catch((err) => { - console.log(err); - error = err.detail; - return null; - }); - - if (error) { - throw error; - } - - return res?.template ?? ''; -}; - export const getQuerySettings = async (token: string) => { let error = null; diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 97c62c6a41..2047a07e76 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -17,8 +17,8 @@ updateRAGConfig } from '$lib/apis/retrieval'; - import { reindexKnowledgeFiles} from '$lib/apis/knowledge'; - import { deleteAllFiles } from '$lib/apis/files'; + import { reindexKnowledgeFiles } from '$lib/apis/knowledge'; + import { deleteAllFiles } from '$lib/apis/files'; import ResetUploadDirConfirmDialog from '$lib/components/common/ConfirmDialog.svelte'; import ResetVectorDBConfirmDialog from '$lib/components/common/ConfirmDialog.svelte'; @@ -27,6 +27,7 @@ import Tooltip from '$lib/components/common/Tooltip.svelte'; import Switch from '$lib/components/common/Switch.svelte'; import Textarea from '$lib/components/common/Textarea.svelte'; + import Spinner from '$lib/components/common/Spinner.svelte'; const i18n = getContext('i18n'); @@ -42,31 +43,6 @@ let embeddingBatchSize = 1; let rerankingModel = ''; - let fileMaxSize = null; - let fileMaxCount = null; - - let contentExtractionEngine = 'default'; - let tikaServerUrl = ''; - let showTikaServerUrl = false; - let doclingServerUrl = ''; - let showDoclingServerUrl = false; - let documentIntelligenceEndpoint = ''; - let documentIntelligenceKey = ''; - let showDocumentIntelligenceConfig = false; - let mistralApiKey = ''; - let showMistralOcrConfig = false; - - let textSplitter = ''; - let chunkSize = 0; - let chunkOverlap = 0; - let pdfExtractImages = true; - - let RAG_FULL_CONTEXT = false; - let BYPASS_EMBEDDING_AND_RETRIEVAL = false; - - let enableGoogleDriveIntegration = false; - let enableOneDriveIntegration = false; - let OpenAIUrl = ''; let OpenAIKey = ''; @@ -81,6 +57,8 @@ hybrid: false }; + let RAGConfig = null; + const embeddingModelUpdateHandler = async () => { if (embeddingEngine === '' && embeddingModel.split('/').length - 1 > 1) { toast.error( @@ -175,65 +153,40 @@ }; const submitHandler = async () => { - if (contentExtractionEngine === 'tika' && tikaServerUrl === '') { + if (RAGConfig.CONTENT_EXTRACTION_ENGINE === 'tika' && RAGConfig.TIKA_SERVER_URL === '') { toast.error($i18n.t('Tika Server URL required.')); return; } - if (contentExtractionEngine === 'docling' && doclingServerUrl === '') { + if (RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling' && RAGConfig.DOCLING_SERVER_URL === '') { toast.error($i18n.t('Docling Server URL required.')); return; } + if ( - contentExtractionEngine === 'document_intelligence' && - (documentIntelligenceEndpoint === '' || documentIntelligenceKey === '') + RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence' && + (RAGConfig.DOCUMENT_INTELLIGENCE_ENDPOINT === '' || + RAGConfig.DOCUMENT_INTELLIGENCE_KEY === '') ) { toast.error($i18n.t('Document Intelligence endpoint and key required.')); return; } - if (contentExtractionEngine === 'mistral_ocr' && mistralApiKey === '') { + if ( + RAGConfig.CONTENT_EXTRACTION_ENGINE === 'mistral_ocr' && + RAGConfig.MISTRAL_OCR_API_KEY === '' + ) { toast.error($i18n.t('Mistral OCR API Key required.')); return; } - if (!BYPASS_EMBEDDING_AND_RETRIEVAL) { + if (!RAGConfig.BYPASS_EMBEDDING_AND_RETRIEVAL) { await embeddingModelUpdateHandler(); - if (querySettings.hybrid) { + if (RAGConfig.ENABLE_RAG_HYBRID_SEARCH) { await rerankingModelUpdateHandler(); } } - const res = await updateRAGConfig(localStorage.token, { - pdf_extract_images: pdfExtractImages, - enable_google_drive_integration: enableGoogleDriveIntegration, - enable_onedrive_integration: enableOneDriveIntegration, - file: { - max_size: fileMaxSize === '' ? null : fileMaxSize, - max_count: fileMaxCount === '' ? null : fileMaxCount - }, - RAG_FULL_CONTEXT: RAG_FULL_CONTEXT, - BYPASS_EMBEDDING_AND_RETRIEVAL: BYPASS_EMBEDDING_AND_RETRIEVAL, - chunk: { - text_splitter: textSplitter, - chunk_overlap: chunkOverlap, - chunk_size: chunkSize - }, - content_extraction: { - engine: contentExtractionEngine, - tika_server_url: tikaServerUrl, - docling_server_url: doclingServerUrl, - document_intelligence_config: { - key: documentIntelligenceKey, - endpoint: documentIntelligenceEndpoint - }, - mistral_ocr_config: { - api_key: mistralApiKey - } - } - }); - - await updateQuerySettings(localStorage.token, querySettings); - + const res = await updateRAGConfig(localStorage.token, RAGConfig); dispatch('save'); }; @@ -261,46 +214,11 @@ } }; - const toggleHybridSearch = async () => { - querySettings = await updateQuerySettings(localStorage.token, querySettings); - }; - onMount(async () => { await setEmbeddingConfig(); await setRerankingConfig(); - querySettings = await getQuerySettings(localStorage.token); - - const res = await getRAGConfig(localStorage.token); - - if (res) { - pdfExtractImages = res.pdf_extract_images; - - textSplitter = res.chunk.text_splitter; - chunkSize = res.chunk.chunk_size; - chunkOverlap = res.chunk.chunk_overlap; - - RAG_FULL_CONTEXT = res.RAG_FULL_CONTEXT; - BYPASS_EMBEDDING_AND_RETRIEVAL = res.BYPASS_EMBEDDING_AND_RETRIEVAL; - - contentExtractionEngine = res.content_extraction.engine; - tikaServerUrl = res.content_extraction.tika_server_url; - doclingServerUrl = res.content_extraction.docling_server_url; - - showTikaServerUrl = contentExtractionEngine === 'tika'; - showDoclingServerUrl = contentExtractionEngine === 'docling'; - documentIntelligenceEndpoint = res.content_extraction.document_intelligence_config.endpoint; - documentIntelligenceKey = res.content_extraction.document_intelligence_config.key; - showDocumentIntelligenceConfig = contentExtractionEngine === 'document_intelligence'; - mistralApiKey = res.content_extraction.mistral_ocr_config.api_key; - showMistralOcrConfig = contentExtractionEngine === 'mistral_ocr'; - - fileMaxSize = res?.file.max_size ?? ''; - fileMaxCount = res?.file.max_count ?? ''; - - enableGoogleDriveIntegration = res.enable_google_drive_integration; - enableOneDriveIntegration = res.enable_onedrive_integration; - } + RAGConfig = await getRAGConfig(localStorage.token); }); @@ -332,7 +250,6 @@ }} /> - { @@ -353,339 +270,93 @@ submitHandler(); }} > -
-
-
-
{$i18n.t('General')}
- -
- -
-
-
- {$i18n.t('Content Extraction Engine')} -
-
- -
-
- {#if contentExtractionEngine === 'tika'} -
-
- -
-
- {:else if contentExtractionEngine === 'docling'} -
- -
- {:else if contentExtractionEngine === 'document_intelligence'} -
- - -
- {:else if contentExtractionEngine === 'mistral_ocr'} -
- -
- {/if} -
- - {#if contentExtractionEngine === ''} -
-
- {$i18n.t('PDF Extract Images (OCR)')} -
-
- -
-
- {/if} - -
-
- - {$i18n.t('Bypass Embedding and Retrieval')} - -
-
- - - -
-
- - {#if !BYPASS_EMBEDDING_AND_RETRIEVAL} -
-
{$i18n.t('Text Splitter')}
-
- -
-
- -
-
-
-
- {$i18n.t('Chunk Size')} -
-
- -
-
- -
-
- {$i18n.t('Chunk Overlap')} -
- -
- -
-
-
-
- {/if} -
- - {#if !BYPASS_EMBEDDING_AND_RETRIEVAL} + {#if RAGConfig} +
+
-
{$i18n.t('Embedding')}
+
{$i18n.t('General')}

-
+
-
- {$i18n.t('Embedding Model Engine')} +
+ {$i18n.t('Content Extraction Engine')}
-
+
- {#if embeddingEngine === 'openai'} -
- - - + {#if RAGConfig.CONTENT_EXTRACTION_ENGINE === ''} +
+
+
+ {$i18n.t('PDF Extract Images (OCR)')} +
+
+ +
+
- {:else if embeddingEngine === 'ollama'} + {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'tika'} +
+
+ +
+
+ {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'docling'} +
+ +
+ {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence'}
- +
+ {:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'mistral_ocr'} +
+
{/if}
-
-
{$i18n.t('Embedding Model')}
- -
- {#if embeddingEngine === 'ollama'} -
-
- -
-
- {:else} -
-
- -
- - {#if embeddingEngine === ''} - - {/if} -
- {/if} -
- -
- {$i18n.t( - 'Warning: If you update or change your embedding model, you will need to re-import all documents.' - )} -
-
- - {#if embeddingEngine === 'ollama' || embeddingEngine === 'openai'} -
-
{$i18n.t('Embedding Batch Size')}
- -
- -
-
- {/if} -
- -
-
{$i18n.t('Retrieval')}
- -
-
-
{$i18n.t('Full Context Mode')}
+
+ + {$i18n.t('Bypass Embedding and Retrieval')} + +
- +
- {#if !RAG_FULL_CONTEXT} + {#if !RAGConfig.BYPASS_EMBEDDING_AND_RETRIEVAL}
-
{$i18n.t('Hybrid Search')}
+
{$i18n.t('Text Splitter')}
- { - toggleHybridSearch(); - }} - /> +
- {#if querySettings.hybrid === true} -
-
{$i18n.t('Reranking Model')}
+
+
+
+
+ {$i18n.t('Chunk Size')} +
+
+ +
+
-
+
+
+ {$i18n.t('Chunk Overlap')} +
+ +
+ +
+
+
+
+ {/if} +
+ + {#if !RAGConfig.BYPASS_EMBEDDING_AND_RETRIEVAL} +
+
{$i18n.t('Embedding')}
+ +
+ +
+
+
+ {$i18n.t('Embedding Model Engine')} +
+
+ +
+
+ + {#if embeddingEngine === 'openai'} +
+ + + +
+ {:else if embeddingEngine === 'ollama'} +
+ + + +
+ {/if} +
+ +
+
{$i18n.t('Embedding Model')}
+ +
+ {#if embeddingEngine === 'ollama'}
+
+
+ {:else} +
+
+
- + {/if} + + {/if}
-
+ {/if}
- {/if} -
-
{$i18n.t('Top K')}
-
- +
+ {$i18n.t( + 'Warning: If you update or change your embedding model, you will need to re-import all documents.' + )}
- {#if querySettings.hybrid === true} -
-
{$i18n.t('Top K Reranker')}
+ {#if embeddingEngine === 'ollama' || embeddingEngine === 'openai'} +
+
+ {$i18n.t('Embedding Batch Size')} +
+ +
+ +
+
+ {/if} +
+ +
+
{$i18n.t('Retrieval')}
+ +
+ +
+
{$i18n.t('Full Context Mode')}
+
+ + + +
+
+ + {#if !RAGConfig.RAG_FULL_CONTEXT} +
+
{$i18n.t('Hybrid Search')}
+
+ { + submitHandler(); + }} + /> +
+
+ + {#if RAGConfig.ENABLE_RAG_HYBRID_SEARCH === true} +
+
{$i18n.t('Reranking Model')}
+ +
+
+
+ +
+ +
+
+
+ {/if} + +
+
{$i18n.t('Top K')}
- {/if} - {#if querySettings.hybrid === true} -
-
-
{$i18n.t('Minimum Score')}
+ {#if RAGConfig.ENABLE_RAG_HYBRID_SEARCH === true} +
+
{$i18n.t('Top K Reranker')}
-
- {$i18n.t( - 'Note: If you set a minimum score, the search will only return documents with a score greater than or equal to the minimum score.' - )} -
-
- {/if} - {/if} + {/if} -
-
{$i18n.t('RAG Template')}
-
- -