From 2fc2b4cf619040e47c019fbc88b62a8f6c7e5bde Mon Sep 17 00:00:00 2001 From: Boris Bocquet Date: Sun, 7 Dec 2025 19:31:52 +0100 Subject: [PATCH] SafeWebBaseLoader requests now use a timeout in seconds provided by PersistentConfig SAFE_WEBLOADER_TIMEOUT (float) --- backend/open_webui/config.py | 7 +++++++ backend/open_webui/main.py | 2 ++ backend/open_webui/retrieval/web/utils.py | 4 ++++ backend/open_webui/routers/retrieval.py | 6 ++++++ .../components/admin/Settings/WebSearch.svelte | 16 ++++++++++++++++ 5 files changed, 35 insertions(+) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 41e88df5d2..d714d3164a 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -3007,6 +3007,13 @@ WEB_SEARCH_TRUST_ENV = PersistentConfig( os.getenv("WEB_SEARCH_TRUST_ENV", "False").lower() == "true", ) +SAFE_WEBLOADER_TIMEOUT = PersistentConfig( + "SAFE_WEBLOADER_TIMEOUT", + "rag.web.loader.safe_webloader_timeout", + float( + os.environ.get("SAFE_WEBLOADER_TIMEOUT", "10") + ), # timeout in seconds (see requests.get) +) OLLAMA_CLOUD_WEB_SEARCH_API_KEY = PersistentConfig( "OLLAMA_CLOUD_WEB_SEARCH_API_KEY", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 21a1aee043..5e849ce17d 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -208,6 +208,7 @@ from open_webui.config import ( FIRECRAWL_API_KEY, WEB_LOADER_ENGINE, WEB_LOADER_CONCURRENT_REQUESTS, + SAFE_WEBLOADER_TIMEOUT, WHISPER_MODEL, WHISPER_VAD_FILTER, WHISPER_LANGUAGE, @@ -967,6 +968,7 @@ app.state.config.EXTERNAL_WEB_SEARCH_API_KEY = EXTERNAL_WEB_SEARCH_API_KEY app.state.config.EXTERNAL_WEB_LOADER_URL = EXTERNAL_WEB_LOADER_URL app.state.config.EXTERNAL_WEB_LOADER_API_KEY = EXTERNAL_WEB_LOADER_API_KEY +app.state.config.SAFE_WEBLOADER_TIMEOUT = SAFE_WEBLOADER_TIMEOUT app.state.config.PLAYWRIGHT_WS_URL = PLAYWRIGHT_WS_URL app.state.config.PLAYWRIGHT_TIMEOUT = PLAYWRIGHT_TIMEOUT diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index bdbde0b3a9..db95fd3501 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -40,6 +40,7 @@ from open_webui.config import ( EXTERNAL_WEB_LOADER_URL, EXTERNAL_WEB_LOADER_API_KEY, WEB_FETCH_FILTER_LIST, + SAFE_WEBLOADER_TIMEOUT, ) from open_webui.env import SRC_LOG_LEVELS from open_webui.utils.misc import is_string_allowed @@ -674,6 +675,9 @@ def get_web_loader( if WEB_LOADER_ENGINE.value == "" or WEB_LOADER_ENGINE.value == "safe_web": WebLoaderClass = SafeWebBaseLoader + timeout = SAFE_WEBLOADER_TIMEOUT.value + requests_kwargs = {"timeout": timeout} + web_loader_args["requests_kwargs"] = requests_kwargs if WEB_LOADER_ENGINE.value == "playwright": WebLoaderClass = SafePlaywrightURLLoader web_loader_args["playwright_timeout"] = PLAYWRIGHT_TIMEOUT.value diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index b7ed993895..8e1aab4a00 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -537,6 +537,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "SOUGOU_API_SK": request.app.state.config.SOUGOU_API_SK, "WEB_LOADER_ENGINE": request.app.state.config.WEB_LOADER_ENGINE, "ENABLE_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, + "SAFE_WEBLOADER_TIMEOUT": request.app.state.config.SAFE_WEBLOADER_TIMEOUT, "PLAYWRIGHT_WS_URL": request.app.state.config.PLAYWRIGHT_WS_URL, "PLAYWRIGHT_TIMEOUT": request.app.state.config.PLAYWRIGHT_TIMEOUT, "FIRECRAWL_API_KEY": request.app.state.config.FIRECRAWL_API_KEY, @@ -595,6 +596,7 @@ class WebConfig(BaseModel): SOUGOU_API_SK: Optional[str] = None WEB_LOADER_ENGINE: Optional[str] = None ENABLE_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None + SAFE_WEBLOADER_TIMEOUT: Optional[float] = None PLAYWRIGHT_WS_URL: Optional[str] = None PLAYWRIGHT_TIMEOUT: Optional[int] = None FIRECRAWL_API_KEY: Optional[str] = None @@ -1074,6 +1076,9 @@ async def update_rag_config( request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION = ( form_data.web.ENABLE_WEB_LOADER_SSL_VERIFICATION ) + request.app.state.config.SAFE_WEBLOADER_TIMEOUT = ( + form_data.web.SAFE_WEBLOADER_TIMEOUT + ) request.app.state.config.PLAYWRIGHT_WS_URL = form_data.web.PLAYWRIGHT_WS_URL request.app.state.config.PLAYWRIGHT_TIMEOUT = form_data.web.PLAYWRIGHT_TIMEOUT request.app.state.config.FIRECRAWL_API_KEY = form_data.web.FIRECRAWL_API_KEY @@ -1207,6 +1212,7 @@ async def update_rag_config( "SOUGOU_API_SK": request.app.state.config.SOUGOU_API_SK, "WEB_LOADER_ENGINE": request.app.state.config.WEB_LOADER_ENGINE, "ENABLE_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, + "SAFE_WEBLOADER_TIMEOUT": request.app.state.config.SAFE_WEBLOADER_TIMEOUT, "PLAYWRIGHT_WS_URL": request.app.state.config.PLAYWRIGHT_WS_URL, "PLAYWRIGHT_TIMEOUT": request.app.state.config.PLAYWRIGHT_TIMEOUT, "FIRECRAWL_API_KEY": request.app.state.config.FIRECRAWL_API_KEY, diff --git a/src/lib/components/admin/Settings/WebSearch.svelte b/src/lib/components/admin/Settings/WebSearch.svelte index 17191ac216..40eae51c06 100644 --- a/src/lib/components/admin/Settings/WebSearch.svelte +++ b/src/lib/components/admin/Settings/WebSearch.svelte @@ -775,6 +775,22 @@ +
+
+ {$i18n.t('Safe WebLoader Timeout (s)')} +
+ +
+
+ +
+
+
{:else if webConfig.WEB_LOADER_ENGINE === 'playwright'}