refac/fix: rename WEB_SEARCH_CONCURRENT_REQUESTS to WEB_LOADER_CONCURRENT_REQUESTS

This commit is contained in:
Timothy Jaeryang Baek 2025-08-18 20:06:36 +04:00
parent a651598d19
commit f97f21bf3a
5 changed files with 54 additions and 16 deletions

View file

@ -2616,6 +2616,14 @@ WEB_LOADER_ENGINE = PersistentConfig(
os.environ.get("WEB_LOADER_ENGINE", ""), os.environ.get("WEB_LOADER_ENGINE", ""),
) )
WEB_LOADER_CONCURRENT_REQUESTS = PersistentConfig(
"WEB_LOADER_CONCURRENT_REQUESTS",
"rag.web.loader.concurrent_requests",
int(os.getenv("WEB_LOADER_CONCURRENT_REQUESTS", "10")),
)
ENABLE_WEB_LOADER_SSL_VERIFICATION = PersistentConfig( ENABLE_WEB_LOADER_SSL_VERIFICATION = PersistentConfig(
"ENABLE_WEB_LOADER_SSL_VERIFICATION", "ENABLE_WEB_LOADER_SSL_VERIFICATION",
"rag.web.loader.ssl_verification", "rag.web.loader.ssl_verification",

View file

@ -185,6 +185,7 @@ from open_webui.config import (
FIRECRAWL_API_BASE_URL, FIRECRAWL_API_BASE_URL,
FIRECRAWL_API_KEY, FIRECRAWL_API_KEY,
WEB_LOADER_ENGINE, WEB_LOADER_ENGINE,
WEB_LOADER_CONCURRENT_REQUESTS,
WHISPER_MODEL, WHISPER_MODEL,
WHISPER_VAD_FILTER, WHISPER_VAD_FILTER,
WHISPER_LANGUAGE, WHISPER_LANGUAGE,
@ -855,7 +856,10 @@ app.state.config.WEB_SEARCH_ENGINE = WEB_SEARCH_ENGINE
app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = WEB_SEARCH_DOMAIN_FILTER_LIST app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = WEB_SEARCH_DOMAIN_FILTER_LIST
app.state.config.WEB_SEARCH_RESULT_COUNT = WEB_SEARCH_RESULT_COUNT app.state.config.WEB_SEARCH_RESULT_COUNT = WEB_SEARCH_RESULT_COUNT
app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = WEB_SEARCH_CONCURRENT_REQUESTS app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = WEB_SEARCH_CONCURRENT_REQUESTS
app.state.config.WEB_LOADER_ENGINE = WEB_LOADER_ENGINE app.state.config.WEB_LOADER_ENGINE = WEB_LOADER_ENGINE
app.state.config.WEB_LOADER_CONCURRENT_REQUESTS = WEB_LOADER_CONCURRENT_REQUESTS
app.state.config.WEB_SEARCH_TRUST_ENV = WEB_SEARCH_TRUST_ENV app.state.config.WEB_SEARCH_TRUST_ENV = WEB_SEARCH_TRUST_ENV
app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = (
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL

View file

@ -11,7 +11,10 @@ log.setLevel(SRC_LOG_LEVELS["RAG"])
def search_duckduckgo( def search_duckduckgo(
query: str, count: int, filter_list: Optional[list[str]] = None query: str,
count: int,
filter_list: Optional[list[str]] = None,
concurrent_requests: Optional[int] = None,
) -> list[SearchResult]: ) -> list[SearchResult]:
""" """
Search using DuckDuckGo's Search API and return the results as a list of SearchResult objects. Search using DuckDuckGo's Search API and return the results as a list of SearchResult objects.
@ -25,6 +28,9 @@ def search_duckduckgo(
# Use the DDGS context manager to create a DDGS object # Use the DDGS context manager to create a DDGS object
search_results = [] search_results = []
with DDGS() as ddgs: with DDGS() as ddgs:
if concurrent_requests:
ddgs.threads = concurrent_requests
# Use the ddgs.text() method to perform the search # Use the ddgs.text() method to perform the search
try: try:
search_results = ddgs.text( search_results = ddgs.text(

View file

@ -449,6 +449,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
"WEB_SEARCH_TRUST_ENV": request.app.state.config.WEB_SEARCH_TRUST_ENV, "WEB_SEARCH_TRUST_ENV": request.app.state.config.WEB_SEARCH_TRUST_ENV,
"WEB_SEARCH_RESULT_COUNT": request.app.state.config.WEB_SEARCH_RESULT_COUNT, "WEB_SEARCH_RESULT_COUNT": request.app.state.config.WEB_SEARCH_RESULT_COUNT,
"WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
"WEB_LOADER_CONCURRENT_REQUESTS": request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS,
"WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
"BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER, "BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER,
@ -504,6 +505,7 @@ class WebConfig(BaseModel):
WEB_SEARCH_TRUST_ENV: Optional[bool] = None WEB_SEARCH_TRUST_ENV: Optional[bool] = None
WEB_SEARCH_RESULT_COUNT: Optional[int] = None WEB_SEARCH_RESULT_COUNT: Optional[int] = None
WEB_SEARCH_CONCURRENT_REQUESTS: Optional[int] = None WEB_SEARCH_CONCURRENT_REQUESTS: Optional[int] = None
WEB_LOADER_CONCURRENT_REQUESTS: Optional[int] = None
WEB_SEARCH_DOMAIN_FILTER_LIST: Optional[List[str]] = [] WEB_SEARCH_DOMAIN_FILTER_LIST: Optional[List[str]] = []
BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None
BYPASS_WEB_SEARCH_WEB_LOADER: Optional[bool] = None BYPASS_WEB_SEARCH_WEB_LOADER: Optional[bool] = None
@ -916,6 +918,9 @@ async def update_rag_config(
request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = ( request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = (
form_data.web.WEB_SEARCH_CONCURRENT_REQUESTS form_data.web.WEB_SEARCH_CONCURRENT_REQUESTS
) )
request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS = (
form_data.web.WEB_LOADER_CONCURRENT_REQUESTS
)
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = ( request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = (
form_data.web.WEB_SEARCH_DOMAIN_FILTER_LIST form_data.web.WEB_SEARCH_DOMAIN_FILTER_LIST
) )
@ -1067,6 +1072,7 @@ async def update_rag_config(
"WEB_SEARCH_TRUST_ENV": request.app.state.config.WEB_SEARCH_TRUST_ENV, "WEB_SEARCH_TRUST_ENV": request.app.state.config.WEB_SEARCH_TRUST_ENV,
"WEB_SEARCH_RESULT_COUNT": request.app.state.config.WEB_SEARCH_RESULT_COUNT, "WEB_SEARCH_RESULT_COUNT": request.app.state.config.WEB_SEARCH_RESULT_COUNT,
"WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
"WEB_LOADER_CONCURRENT_REQUESTS": request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS,
"WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
"BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL,
"BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER, "BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER,
@ -1624,7 +1630,7 @@ def process_web(
loader = get_web_loader( loader = get_web_loader(
form_data.url, form_data.url,
verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, requests_per_second=request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS,
) )
docs = loader.load() docs = loader.load()
content = " ".join([doc.page_content for doc in docs]) content = " ".join([doc.page_content for doc in docs])
@ -1798,6 +1804,7 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]:
query, query,
request.app.state.config.WEB_SEARCH_RESULT_COUNT, request.app.state.config.WEB_SEARCH_RESULT_COUNT,
request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST,
concurrent_requests=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS,
) )
elif engine == "tavily": elif engine == "tavily":
if request.app.state.config.TAVILY_API_KEY: if request.app.state.config.TAVILY_API_KEY:
@ -1971,7 +1978,7 @@ async def process_web_search(
loader = get_web_loader( loader = get_web_loader(
urls, urls,
verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION,
requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, requests_per_second=request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS,
trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV, trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV,
) )
docs = await loader.aload() docs = await loader.aload()

View file

@ -555,6 +555,19 @@
/> />
</div> </div>
</div> </div>
{:else if webConfig.WEB_SEARCH_ENGINE === 'ddgs' || webConfig.WEB_SEARCH_ENGINE === 'duckduckgo'}
<div class="w-full mb-2.5">
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Concurrent Requests')}
</div>
<input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
placeholder={$i18n.t('Concurrent Requests')}
bind:value={webConfig.WEB_SEARCH_CONCURRENT_REQUESTS}
required
/>
</div>
{:else if webConfig.WEB_SEARCH_ENGINE === 'external'} {:else if webConfig.WEB_SEARCH_ENGINE === 'external'}
<div class="mb-2.5 flex w-full flex-col"> <div class="mb-2.5 flex w-full flex-col">
<div> <div>
@ -604,19 +617,6 @@
required required
/> />
</div> </div>
<div class="w-full">
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Concurrent Requests')}
</div>
<input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
placeholder={$i18n.t('Concurrent Requests')}
bind:value={webConfig.WEB_SEARCH_CONCURRENT_REQUESTS}
required
/>
</div>
</div> </div>
</div> </div>
@ -853,6 +853,19 @@
</div> </div>
{/if} {/if}
<div class="mb-2.5 w-full">
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Concurrent Requests')}
</div>
<input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
placeholder={$i18n.t('Concurrent Requests')}
bind:value={webConfig.WEB_LOADER_CONCURRENT_REQUESTS}
required
/>
</div>
<div class=" mb-2.5 flex w-full justify-between"> <div class=" mb-2.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium"> <div class=" self-center text-xs font-medium">
{$i18n.t('Youtube Language')} {$i18n.t('Youtube Language')}