From 292cb62d4af2ed34e11c866c8d45e1a25ef2057d Mon Sep 17 00:00:00 2001 From: _00_ <131402327+rgaricano@users.noreply.github.com> Date: Sat, 30 Aug 2025 01:48:31 +0200 Subject: [PATCH 1/2] FIX: Hybrid Search lexical-semantic tags FIX Error in Hybrid Search lexical-semantic terms places I was reviewing and I noticed that the lexical-semantic terms are inverted. BM25 weight=1 --> lexical BM25 weight=0 --> semantic --- src/lib/components/admin/Settings/Documents.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index d3a244fa45..087a9bb950 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -1104,10 +1104,10 @@
- {$i18n.t('lexical')} + {$i18n.t('semantic')}
- {$i18n.t('semantic')} + {$i18n.t('lexical')}
From 647e38f701ff93bb40ed71ba445a8ba903518306 Mon Sep 17 00:00:00 2001 From: _00_ <131402327+rgaricano@users.noreply.github.com> Date: Sat, 30 Aug 2025 10:45:35 +0200 Subject: [PATCH 2/2] Revert bypass hybrid search when BM25_weight=0 Revert PR https://github.com/open-webui/open-webui/commit/74b1c801 --- backend/open_webui/retrieval/utils.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 100c92c6c0..856527083c 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -128,8 +128,6 @@ def query_doc_with_hybrid_search( log.warning(f"query_doc_with_hybrid_search:no_docs {collection_name}") return {"documents": [], "metadatas": [], "distances": []} - # BM_25 required only if weight is greater than 0 - if hybrid_bm25_weight > 0: log.debug(f"query_doc_with_hybrid_search:doc {collection_name}") bm25_retriever = BM25Retriever.from_texts( texts=collection_result.documents[0], @@ -343,8 +341,7 @@ def query_collection_with_hybrid_search( # Fetch collection data once per collection sequentially # Avoid fetching the same data multiple times later collection_results = {} - # Only retrieve entire collection if bm_25 calculation is required - if hybrid_bm25_weight > 0: + for collection_name in collection_names: try: log.debug( @@ -356,9 +353,7 @@ def query_collection_with_hybrid_search( except Exception as e: log.exception(f"Failed to fetch collection {collection_name}: {e}") collection_results[collection_name] = None - else: - for collection_name in collection_names: - collection_results[collection_name] = [] + log.info( f"Starting hybrid search for {len(queries)} queries in {len(collection_names)} collections..." )