From f97f21bf3ac53fd144610d194908ab7b0fc2f7f6 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Mon, 18 Aug 2025 20:06:36 +0400 Subject: [PATCH] refac/fix: rename WEB_SEARCH_CONCURRENT_REQUESTS to WEB_LOADER_CONCURRENT_REQUESTS --- backend/open_webui/config.py | 8 ++++ backend/open_webui/main.py | 4 ++ .../open_webui/retrieval/web/duckduckgo.py | 8 +++- backend/open_webui/routers/retrieval.py | 11 +++++- .../admin/Settings/WebSearch.svelte | 39 ++++++++++++------- 5 files changed, 54 insertions(+), 16 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index bc4ac98eb4..c49f48dc10 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2616,6 +2616,14 @@ WEB_LOADER_ENGINE = PersistentConfig( os.environ.get("WEB_LOADER_ENGINE", ""), ) + +WEB_LOADER_CONCURRENT_REQUESTS = PersistentConfig( + "WEB_LOADER_CONCURRENT_REQUESTS", + "rag.web.loader.concurrent_requests", + int(os.getenv("WEB_LOADER_CONCURRENT_REQUESTS", "10")), +) + + ENABLE_WEB_LOADER_SSL_VERIFICATION = PersistentConfig( "ENABLE_WEB_LOADER_SSL_VERIFICATION", "rag.web.loader.ssl_verification", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 0f8db6d628..e04ff6c308 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -185,6 +185,7 @@ from open_webui.config import ( FIRECRAWL_API_BASE_URL, FIRECRAWL_API_KEY, WEB_LOADER_ENGINE, + WEB_LOADER_CONCURRENT_REQUESTS, WHISPER_MODEL, WHISPER_VAD_FILTER, WHISPER_LANGUAGE, @@ -855,7 +856,10 @@ app.state.config.WEB_SEARCH_ENGINE = WEB_SEARCH_ENGINE app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = WEB_SEARCH_DOMAIN_FILTER_LIST app.state.config.WEB_SEARCH_RESULT_COUNT = WEB_SEARCH_RESULT_COUNT app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = WEB_SEARCH_CONCURRENT_REQUESTS + app.state.config.WEB_LOADER_ENGINE = WEB_LOADER_ENGINE +app.state.config.WEB_LOADER_CONCURRENT_REQUESTS = WEB_LOADER_CONCURRENT_REQUESTS + app.state.config.WEB_SEARCH_TRUST_ENV = WEB_SEARCH_TRUST_ENV app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL diff --git a/backend/open_webui/retrieval/web/duckduckgo.py b/backend/open_webui/retrieval/web/duckduckgo.py index a32fc358ed..e4cf9d00ec 100644 --- a/backend/open_webui/retrieval/web/duckduckgo.py +++ b/backend/open_webui/retrieval/web/duckduckgo.py @@ -11,7 +11,10 @@ log.setLevel(SRC_LOG_LEVELS["RAG"]) def search_duckduckgo( - query: str, count: int, filter_list: Optional[list[str]] = None + query: str, + count: int, + filter_list: Optional[list[str]] = None, + concurrent_requests: Optional[int] = None, ) -> list[SearchResult]: """ Search using DuckDuckGo's Search API and return the results as a list of SearchResult objects. @@ -25,6 +28,9 @@ def search_duckduckgo( # Use the DDGS context manager to create a DDGS object search_results = [] with DDGS() as ddgs: + if concurrent_requests: + ddgs.threads = concurrent_requests + # Use the ddgs.text() method to perform the search try: search_results = ddgs.text( diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index c02b48e487..4a0d327c0b 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -449,6 +449,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "WEB_SEARCH_TRUST_ENV": request.app.state.config.WEB_SEARCH_TRUST_ENV, "WEB_SEARCH_RESULT_COUNT": request.app.state.config.WEB_SEARCH_RESULT_COUNT, "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + "WEB_LOADER_CONCURRENT_REQUESTS": request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS, "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, "BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER, @@ -504,6 +505,7 @@ class WebConfig(BaseModel): WEB_SEARCH_TRUST_ENV: Optional[bool] = None WEB_SEARCH_RESULT_COUNT: Optional[int] = None WEB_SEARCH_CONCURRENT_REQUESTS: Optional[int] = None + WEB_LOADER_CONCURRENT_REQUESTS: Optional[int] = None WEB_SEARCH_DOMAIN_FILTER_LIST: Optional[List[str]] = [] BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None BYPASS_WEB_SEARCH_WEB_LOADER: Optional[bool] = None @@ -916,6 +918,9 @@ async def update_rag_config( request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = ( form_data.web.WEB_SEARCH_CONCURRENT_REQUESTS ) + request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS = ( + form_data.web.WEB_LOADER_CONCURRENT_REQUESTS + ) request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = ( form_data.web.WEB_SEARCH_DOMAIN_FILTER_LIST ) @@ -1067,6 +1072,7 @@ async def update_rag_config( "WEB_SEARCH_TRUST_ENV": request.app.state.config.WEB_SEARCH_TRUST_ENV, "WEB_SEARCH_RESULT_COUNT": request.app.state.config.WEB_SEARCH_RESULT_COUNT, "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + "WEB_LOADER_CONCURRENT_REQUESTS": request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS, "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, "BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER, @@ -1624,7 +1630,7 @@ def process_web( loader = get_web_loader( form_data.url, verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, - requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + requests_per_second=request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS, ) docs = loader.load() content = " ".join([doc.page_content for doc in docs]) @@ -1798,6 +1804,7 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: query, request.app.state.config.WEB_SEARCH_RESULT_COUNT, request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, + concurrent_requests=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, ) elif engine == "tavily": if request.app.state.config.TAVILY_API_KEY: @@ -1971,7 +1978,7 @@ async def process_web_search( loader = get_web_loader( urls, verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, - requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + requests_per_second=request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS, trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV, ) docs = await loader.aload() diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index 599878345b..50513b5ab6 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -555,6 +555,19 @@ /> + {:else if webConfig.WEB_SEARCH_ENGINE === 'ddgs' || webConfig.WEB_SEARCH_ENGINE === 'duckduckgo'} +
+
+ {$i18n.t('Concurrent Requests')} +
+ + +
{:else if webConfig.WEB_SEARCH_ENGINE === 'external'}
@@ -604,19 +617,6 @@ required />
- -
-
- {$i18n.t('Concurrent Requests')} -
- - -
@@ -853,6 +853,19 @@ {/if} +
+
+ {$i18n.t('Concurrent Requests')} +
+ + +
+
{$i18n.t('Youtube Language')}