SafeWebBaseLoader requests now use a timeout in seconds provided by PersistentConfig SAFE_WEBLOADER_TIMEOUT (float)

This commit is contained in:
Boris Bocquet 2025-12-07 19:31:52 +01:00
parent 6f1486ffd0
commit 2fc2b4cf61
5 changed files with 35 additions and 0 deletions

View file

@ -3007,6 +3007,13 @@ WEB_SEARCH_TRUST_ENV = PersistentConfig(
os.getenv("WEB_SEARCH_TRUST_ENV", "False").lower() == "true",
)
SAFE_WEBLOADER_TIMEOUT = PersistentConfig(
"SAFE_WEBLOADER_TIMEOUT",
"rag.web.loader.safe_webloader_timeout",
float(
os.environ.get("SAFE_WEBLOADER_TIMEOUT", "10")
), # timeout in seconds (see requests.get)
)
OLLAMA_CLOUD_WEB_SEARCH_API_KEY = PersistentConfig(
"OLLAMA_CLOUD_WEB_SEARCH_API_KEY",

View file

@ -208,6 +208,7 @@ from open_webui.config import (
FIRECRAWL_API_KEY,
WEB_LOADER_ENGINE,
WEB_LOADER_CONCURRENT_REQUESTS,
SAFE_WEBLOADER_TIMEOUT,
WHISPER_MODEL,
WHISPER_VAD_FILTER,
WHISPER_LANGUAGE,
@ -967,6 +968,7 @@ app.state.config.EXTERNAL_WEB_SEARCH_API_KEY = EXTERNAL_WEB_SEARCH_API_KEY
app.state.config.EXTERNAL_WEB_LOADER_URL = EXTERNAL_WEB_LOADER_URL
app.state.config.EXTERNAL_WEB_LOADER_API_KEY = EXTERNAL_WEB_LOADER_API_KEY
app.state.config.SAFE_WEBLOADER_TIMEOUT = SAFE_WEBLOADER_TIMEOUT
app.state.config.PLAYWRIGHT_WS_URL = PLAYWRIGHT_WS_URL
app.state.config.PLAYWRIGHT_TIMEOUT = PLAYWRIGHT_TIMEOUT

View file

@ -40,6 +40,7 @@ from open_webui.config import (
EXTERNAL_WEB_LOADER_URL,
EXTERNAL_WEB_LOADER_API_KEY,
WEB_FETCH_FILTER_LIST,
SAFE_WEBLOADER_TIMEOUT,
)
from open_webui.env import SRC_LOG_LEVELS
from open_webui.utils.misc import is_string_allowed
@ -674,6 +675,9 @@ def get_web_loader(
if WEB_LOADER_ENGINE.value == "" or WEB_LOADER_ENGINE.value == "safe_web":
WebLoaderClass = SafeWebBaseLoader
timeout = SAFE_WEBLOADER_TIMEOUT.value
requests_kwargs = {"timeout": timeout}
web_loader_args["requests_kwargs"] = requests_kwargs
if WEB_LOADER_ENGINE.value == "playwright":
WebLoaderClass = SafePlaywrightURLLoader
web_loader_args["playwright_timeout"] = PLAYWRIGHT_TIMEOUT.value

View file

@ -537,6 +537,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"SOUGOU_API_SK": request.app.state.config.SOUGOU_API_SK,
"WEB_LOADER_ENGINE": request.app.state.config.WEB_LOADER_ENGINE,
"ENABLE_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
"SAFE_WEBLOADER_TIMEOUT": request.app.state.config.SAFE_WEBLOADER_TIMEOUT,
"PLAYWRIGHT_WS_URL": request.app.state.config.PLAYWRIGHT_WS_URL,
"PLAYWRIGHT_TIMEOUT": request.app.state.config.PLAYWRIGHT_TIMEOUT,
"FIRECRAWL_API_KEY": request.app.state.config.FIRECRAWL_API_KEY,
@ -595,6 +596,7 @@ class WebConfig(BaseModel):
SOUGOU_API_SK: Optional[str] = None
WEB_LOADER_ENGINE: Optional[str] = None
ENABLE_WEB_LOADER_SSL_VERIFICATION: Optional[bool] = None
SAFE_WEBLOADER_TIMEOUT: Optional[float] = None
PLAYWRIGHT_WS_URL: Optional[str] = None
PLAYWRIGHT_TIMEOUT: Optional[int] = None
FIRECRAWL_API_KEY: Optional[str] = None
@ -1074,6 +1076,9 @@ async def update_rag_config(
request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION = (
form_data.web.ENABLE_WEB_LOADER_SSL_VERIFICATION
)
request.app.state.config.SAFE_WEBLOADER_TIMEOUT = (
form_data.web.SAFE_WEBLOADER_TIMEOUT
)
request.app.state.config.PLAYWRIGHT_WS_URL = form_data.web.PLAYWRIGHT_WS_URL
request.app.state.config.PLAYWRIGHT_TIMEOUT = form_data.web.PLAYWRIGHT_TIMEOUT
request.app.state.config.FIRECRAWL_API_KEY = form_data.web.FIRECRAWL_API_KEY
@ -1207,6 +1212,7 @@ async def update_rag_config(
"SOUGOU_API_SK": request.app.state.config.SOUGOU_API_SK,
"WEB_LOADER_ENGINE": request.app.state.config.WEB_LOADER_ENGINE,
"ENABLE_WEB_LOADER_SSL_VERIFICATION": request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
"SAFE_WEBLOADER_TIMEOUT": request.app.state.config.SAFE_WEBLOADER_TIMEOUT,
"PLAYWRIGHT_WS_URL": request.app.state.config.PLAYWRIGHT_WS_URL,
"PLAYWRIGHT_TIMEOUT": request.app.state.config.PLAYWRIGHT_TIMEOUT,
"FIRECRAWL_API_KEY": request.app.state.config.FIRECRAWL_API_KEY,

View file

@ -775,6 +775,22 @@
<Switch bind:state={webConfig.ENABLE_WEB_LOADER_SSL_VERIFICATION} />
</div>
</div>
<div class="mt-2">
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Safe WebLoader Timeout (s)')}
</div>
<div class="flex w-full">
<div class="flex-1">
<input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
placeholder={$i18n.t('Enter Safe Webloader Timeout')}
bind:value={webConfig.SAFE_WEBLOADER_TIMEOUT}
autocomplete="off"
/>
</div>
</div>
</div>
{:else if webConfig.WEB_LOADER_ENGINE === 'playwright'}
<div class="mb-2.5 flex w-full flex-col">
<div>