diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 539adda329..7e13cbf164 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -124,12 +124,14 @@ def query_doc_with_hybrid_search( hybrid_bm25_weight: float, ) -> dict: try: - log.debug(f"query_doc_with_hybrid_search:doc {collection_name}") - bm25_retriever = BM25Retriever.from_texts( - texts=collection_result.documents[0], - metadatas=collection_result.metadatas[0], - ) - bm25_retriever.k = k + # BM_25 required only if weight is greater than 0 + if hybrid_bm25_weight > 0: + log.debug(f"query_doc_with_hybrid_search:doc {collection_name}") + bm25_retriever = BM25Retriever.from_texts( + texts=collection_result.documents[0], + metadatas=collection_result.metadatas[0], + ) + bm25_retriever.k = k vector_search_retriever = VectorSearchRetriever( collection_name=collection_name, @@ -337,18 +339,22 @@ def query_collection_with_hybrid_search( # Fetch collection data once per collection sequentially # Avoid fetching the same data multiple times later collection_results = {} - for collection_name in collection_names: - try: - log.debug( - f"query_collection_with_hybrid_search:VECTOR_DB_CLIENT.get:collection {collection_name}" - ) - collection_results[collection_name] = VECTOR_DB_CLIENT.get( - collection_name=collection_name - ) - except Exception as e: - log.exception(f"Failed to fetch collection {collection_name}: {e}") - collection_results[collection_name] = None - + # Only retrieve entire collection if bm_25 calculation is required + if hybrid_bm25_weight > 0: + for collection_name in collection_names: + try: + log.debug( + f"query_collection_with_hybrid_search:VECTOR_DB_CLIENT.get:collection {collection_name}" + ) + collection_results[collection_name] = VECTOR_DB_CLIENT.get( + collection_name=collection_name + ) + except Exception as e: + log.exception(f"Failed to fetch collection {collection_name}: {e}") + collection_results[collection_name] = None + else: + for collection_name in collection_names: + collection_results[collection_name] = [] log.info( f"Starting hybrid search for {len(queries)} queries in {len(collection_names)} collections..." )