From 292cb62d4af2ed34e11c866c8d45e1a25ef2057d Mon Sep 17 00:00:00 2001
From: _00_ <131402327+rgaricano@users.noreply.github.com>
Date: Sat, 30 Aug 2025 01:48:31 +0200
Subject: [PATCH 1/2] FIX: Hybrid Search lexical-semantic tags
FIX Error in Hybrid Search lexical-semantic terms places
I was reviewing and I noticed that the lexical-semantic terms are inverted.
BM25 weight=1 --> lexical
BM25 weight=0 --> semantic
---
src/lib/components/admin/Settings/Documents.svelte | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte
index d3a244fa45..087a9bb950 100644
--- a/src/lib/components/admin/Settings/Documents.svelte
+++ b/src/lib/components/admin/Settings/Documents.svelte
@@ -1104,10 +1104,10 @@
- {$i18n.t('lexical')}
+ {$i18n.t('semantic')}
- {$i18n.t('semantic')}
+ {$i18n.t('lexical')}
From 647e38f701ff93bb40ed71ba445a8ba903518306 Mon Sep 17 00:00:00 2001
From: _00_ <131402327+rgaricano@users.noreply.github.com>
Date: Sat, 30 Aug 2025 10:45:35 +0200
Subject: [PATCH 2/2] Revert bypass hybrid search when BM25_weight=0
Revert PR https://github.com/open-webui/open-webui/commit/74b1c801
---
backend/open_webui/retrieval/utils.py | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py
index 100c92c6c0..856527083c 100644
--- a/backend/open_webui/retrieval/utils.py
+++ b/backend/open_webui/retrieval/utils.py
@@ -128,8 +128,6 @@ def query_doc_with_hybrid_search(
log.warning(f"query_doc_with_hybrid_search:no_docs {collection_name}")
return {"documents": [], "metadatas": [], "distances": []}
- # BM_25 required only if weight is greater than 0
- if hybrid_bm25_weight > 0:
log.debug(f"query_doc_with_hybrid_search:doc {collection_name}")
bm25_retriever = BM25Retriever.from_texts(
texts=collection_result.documents[0],
@@ -343,8 +341,7 @@ def query_collection_with_hybrid_search(
# Fetch collection data once per collection sequentially
# Avoid fetching the same data multiple times later
collection_results = {}
- # Only retrieve entire collection if bm_25 calculation is required
- if hybrid_bm25_weight > 0:
+
for collection_name in collection_names:
try:
log.debug(
@@ -356,9 +353,7 @@ def query_collection_with_hybrid_search(
except Exception as e:
log.exception(f"Failed to fetch collection {collection_name}: {e}")
collection_results[collection_name] = None
- else:
- for collection_name in collection_names:
- collection_results[collection_name] = []
+
log.info(
f"Starting hybrid search for {len(queries)} queries in {len(collection_names)} collections..."
)