feat: Apply WEB_SEARCH_CONCURRENT_REQUESTS to all search engines using semaphore (#20070)

* sequential

* zero default

* fix
This commit is contained in:
Classic298 2025-12-21 13:18:00 +01:00 committed by GitHub
parent ae203d8952
commit 4fd790f7dd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 49 additions and 24 deletions

View file

@ -2994,7 +2994,7 @@ WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig(
WEB_SEARCH_CONCURRENT_REQUESTS = PersistentConfig(
"WEB_SEARCH_CONCURRENT_REQUESTS",
"rag.web.search.concurrent_requests",
int(os.getenv("WEB_SEARCH_CONCURRENT_REQUESTS", "10")),
int(os.getenv("WEB_SEARCH_CONCURRENT_REQUESTS", "0")),
)

View file

@ -2103,16 +2103,38 @@ async def process_web_search(
f"trying to web search with {request.app.state.config.WEB_SEARCH_ENGINE, form_data.queries}"
)
search_tasks = [
run_in_threadpool(
search_web,
request,
request.app.state.config.WEB_SEARCH_ENGINE,
query,
user,
)
for query in form_data.queries
]
# Use semaphore to limit concurrent requests based on WEB_SEARCH_CONCURRENT_REQUESTS
# 0 or None = unlimited (previous behavior), positive number = limited concurrency
# Set to 1 for sequential execution (rate-limited APIs like Brave free tier)
concurrent_limit = request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS
if concurrent_limit:
# Limited concurrency with semaphore
semaphore = asyncio.Semaphore(concurrent_limit)
async def search_with_limit(query):
async with semaphore:
return await run_in_threadpool(
search_web,
request,
request.app.state.config.WEB_SEARCH_ENGINE,
query,
user,
)
search_tasks = [search_with_limit(query) for query in form_data.queries]
else:
# Unlimited parallel execution (previous behavior)
search_tasks = [
run_in_threadpool(
search_web,
request,
request.app.state.config.WEB_SEARCH_ENGINE,
query,
user,
)
for query in form_data.queries
]
search_results = await asyncio.gather(*search_tasks)

View file

@ -630,19 +630,6 @@
/>
</div>
</div>
{:else if webConfig.WEB_SEARCH_ENGINE === 'ddgs' || webConfig.WEB_SEARCH_ENGINE === 'duckduckgo'}
<div class="w-full mb-2.5">
<div class=" self-center text-xs font-medium mb-1">
{$i18n.t('Concurrent Requests')}
</div>
<input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
placeholder={$i18n.t('Concurrent Requests')}
bind:value={webConfig.WEB_SEARCH_CONCURRENT_REQUESTS}
required
/>
</div>
{:else if webConfig.WEB_SEARCH_ENGINE === 'external'}
<div class="mb-2.5 flex w-full flex-col">
<div>
@ -692,6 +679,22 @@
required
/>
</div>
<div class="w-full">
<div class=" self-center text-xs font-medium mb-1">
<Tooltip content={$i18n.t('Limit concurrent search queries. 0 = unlimited (default). Set to 1 for sequential execution (recommended for APIs with strict rate limits like Brave free tier).')} placement="top-start">
{$i18n.t('Concurrent Requests')}
</Tooltip>
</div>
<input
class="w-full rounded-lg py-2 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
placeholder={$i18n.t('Concurrent Requests')}
bind:value={webConfig.WEB_SEARCH_CONCURRENT_REQUESTS}
type="number"
min="0"
/>
</div>
</div>
</div>