From 4a7e1b93e5f9b2a0474706721e8a57f2b2ee16f5 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 25 Sep 2025 15:54:58 +0000 Subject: [PATCH] Fix: Prevent RAG queries when all files are in full context This commit fixes an issue where Retrieval-Augmented Generation (RAG) queries were still being generated even when all attached files were set to 'full context' mode. This was inefficient as the full content of the files was already available to the model. The `chat_completion_files_handler` in `backend/open_webui/utils/middleware.py` has been updated to: - Check if all attached files have the `context: 'full'` property. - Skip the `generate_queries` step if all files are in full context mode. - Pass a `full_context=True` flag to the `get_sources_from_items` function to ensure it fetches the entire document content instead of performing a vector search. This change ensures that RAG queries are only generated when necessary, improving the efficiency of the system. --- backend/open_webui/utils/middleware.py | 74 ++++++++++++++------------ 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index 97f19dcded..eb10af76ef 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -631,48 +631,53 @@ async def chat_completion_files_handler( sources = [] if files := body.get("metadata", {}).get("files", None): + # Check if all files are in full context mode + all_full_context = all(item.get("context") == "full" for item in files) + queries = [] - try: - queries_response = await generate_queries( - request, - { - "model": body["model"], - "messages": body["messages"], - "type": "retrieval", - }, - user, - ) - queries_response = queries_response["choices"][0]["message"]["content"] - + if not all_full_context: try: - bracket_start = queries_response.find("{") - bracket_end = queries_response.rfind("}") + 1 + queries_response = await generate_queries( + request, + { + "model": body["model"], + "messages": body["messages"], + "type": "retrieval", + }, + user, + ) + queries_response = queries_response["choices"][0]["message"]["content"] - if bracket_start == -1 or bracket_end == -1: - raise Exception("No JSON object found in the response") + try: + bracket_start = queries_response.find("{") + bracket_end = queries_response.rfind("}") + 1 - queries_response = queries_response[bracket_start:bracket_end] - queries_response = json.loads(queries_response) - except Exception as e: - queries_response = {"queries": [queries_response]} + if bracket_start == -1 or bracket_end == -1: + raise Exception("No JSON object found in the response") - queries = queries_response.get("queries", []) - except: - pass + queries_response = queries_response[bracket_start:bracket_end] + queries_response = json.loads(queries_response) + except Exception as e: + queries_response = {"queries": [queries_response]} + + queries = queries_response.get("queries", []) + except: + pass if len(queries) == 0: queries = [get_last_user_message(body["messages"])] - await __event_emitter__( - { - "type": "status", - "data": { - "action": "queries_generated", - "queries": queries, - "done": False, - }, - } - ) + if not all_full_context: + await __event_emitter__( + { + "type": "status", + "data": { + "action": "queries_generated", + "queries": queries, + "done": False, + }, + } + ) try: # Offload get_sources_from_items to a separate thread @@ -701,7 +706,8 @@ async def chat_completion_files_handler( r=request.app.state.config.RELEVANCE_THRESHOLD, hybrid_bm25_weight=request.app.state.config.HYBRID_BM25_WEIGHT, hybrid_search=request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, - full_context=request.app.state.config.RAG_FULL_CONTEXT, + full_context=all_full_context + or request.app.state.config.RAG_FULL_CONTEXT, user=user, ), )