From 74b1c80132797afb18bc03c285b0d736d6b313f9 Mon Sep 17 00:00:00 2001 From: expruc Date: Tue, 12 Aug 2025 15:53:39 +0300 Subject: [PATCH] disable collection retrieval and bm_25 calculation if bm_25 weight is 0 or less --- backend/open_webui/retrieval/utils.py | 42 +++++++++++++++------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 539adda329..7e13cbf164 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -124,12 +124,14 @@ def query_doc_with_hybrid_search( hybrid_bm25_weight: float, ) -> dict: try: - log.debug(f"query_doc_with_hybrid_search:doc {collection_name}") - bm25_retriever = BM25Retriever.from_texts( - texts=collection_result.documents[0], - metadatas=collection_result.metadatas[0], - ) - bm25_retriever.k = k + # BM_25 required only if weight is greater than 0 + if hybrid_bm25_weight > 0: + log.debug(f"query_doc_with_hybrid_search:doc {collection_name}") + bm25_retriever = BM25Retriever.from_texts( + texts=collection_result.documents[0], + metadatas=collection_result.metadatas[0], + ) + bm25_retriever.k = k vector_search_retriever = VectorSearchRetriever( collection_name=collection_name, @@ -337,18 +339,22 @@ def query_collection_with_hybrid_search( # Fetch collection data once per collection sequentially # Avoid fetching the same data multiple times later collection_results = {} - for collection_name in collection_names: - try: - log.debug( - f"query_collection_with_hybrid_search:VECTOR_DB_CLIENT.get:collection {collection_name}" - ) - collection_results[collection_name] = VECTOR_DB_CLIENT.get( - collection_name=collection_name - ) - except Exception as e: - log.exception(f"Failed to fetch collection {collection_name}: {e}") - collection_results[collection_name] = None - + # Only retrieve entire collection if bm_25 calculation is required + if hybrid_bm25_weight > 0: + for collection_name in collection_names: + try: + log.debug( + f"query_collection_with_hybrid_search:VECTOR_DB_CLIENT.get:collection {collection_name}" + ) + collection_results[collection_name] = VECTOR_DB_CLIENT.get( + collection_name=collection_name + ) + except Exception as e: + log.exception(f"Failed to fetch collection {collection_name}: {e}") + collection_results[collection_name] = None + else: + for collection_name in collection_names: + collection_results[collection_name] = [] log.info( f"Starting hybrid search for {len(queries)} queries in {len(collection_names)} collections..." )