From e146a5f613f987712db390df3f777122c7c242a3 Mon Sep 17 00:00:00 2001 From: i1046 Date: Fri, 28 Nov 2025 12:10:12 +0100 Subject: [PATCH 1/3] adding rag treshold logic --- backend/open_webui/config.py | 6 ++++ backend/open_webui/main.py | 2 ++ backend/open_webui/retrieval/utils.py | 6 ++++ backend/open_webui/routers/retrieval.py | 39 ++++++++++++++++++++++++- 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 5a9844c067..7b8054d56e 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -2625,6 +2625,12 @@ RAG_FULL_CONTEXT = PersistentConfig( os.getenv("RAG_FULL_CONTEXT", "False").lower() == "true", ) +RAG_TOKEN_THRESHOLD = PersistentConfig( + "RAG_TOKEN_THRESHOLD", + "rag.token_threshold", + int(os.environ.get("RAG_TOKEN_THRESHOLD", "0")), +) + RAG_FILE_MAX_COUNT = PersistentConfig( "RAG_FILE_MAX_COUNT", "rag.file.max_count", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index af8e670a53..4d6f9c3866 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -218,6 +218,7 @@ from open_webui.config import ( RAG_TEMPLATE, DEFAULT_RAG_TEMPLATE, RAG_FULL_CONTEXT, + RAG_TOKEN_THRESHOLD, BYPASS_EMBEDDING_AND_RETRIEVAL, RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL_AUTO_UPDATE, @@ -840,6 +841,7 @@ app.state.config.FILE_IMAGE_COMPRESSION_HEIGHT = FILE_IMAGE_COMPRESSION_HEIGHT app.state.config.RAG_FULL_CONTEXT = RAG_FULL_CONTEXT +app.state.config.RAG_TOKEN_THRESHOLD = RAG_TOKEN_THRESHOLD app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL = BYPASS_EMBEDDING_AND_RETRIEVAL app.state.config.ENABLE_RAG_HYBRID_SEARCH = ENABLE_RAG_HYBRID_SEARCH app.state.config.ENABLE_RAG_HYBRID_SEARCH_ENRICHED_TEXTS = ( diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index b041a00471..f099a062fd 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -1025,9 +1025,15 @@ async def get_sources_from_items( "metadatas": [[{"url": item.get("url"), "name": item.get("url")}]], } elif item.get("type") == "file": + file_bypassed_rag = False + if item.get("id"): + file_object = Files.get_file_by_id(item.get("id")) + if file_object and file_object.meta: + file_bypassed_rag = file_object.meta.get("bypass_rag", False) if ( item.get("context") == "full" or request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL + or file_bypassed_rag ): if item.get("file", {}).get("data", {}).get("content", ""): # Manual Full Mode Toggle diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 6080337250..97fb3aef93 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -440,6 +440,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "TOP_K": request.app.state.config.TOP_K, "BYPASS_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL, "RAG_FULL_CONTEXT": request.app.state.config.RAG_FULL_CONTEXT, + "RAG_TOKEN_THRESHOLD": request.app.state.config.RAG_TOKEN_THRESHOLD, # Hybrid search settings "ENABLE_RAG_HYBRID_SEARCH": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH, "ENABLE_RAG_HYBRID_SEARCH_ENRICHED_TEXTS": request.app.state.config.ENABLE_RAG_HYBRID_SEARCH_ENRICHED_TEXTS, @@ -614,6 +615,7 @@ class ConfigForm(BaseModel): TOP_K: Optional[int] = None BYPASS_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None RAG_FULL_CONTEXT: Optional[bool] = None + RAG_TOKEN_THRESHOLD: Optional[int] = None # Hybrid search settings ENABLE_RAG_HYBRID_SEARCH: Optional[bool] = None @@ -707,6 +709,11 @@ async def update_rag_config( if form_data.RAG_FULL_CONTEXT is not None else request.app.state.config.RAG_FULL_CONTEXT ) + request.app.state.config.RAG_TOKEN_THRESHOLD = ( + form_data.RAG_TOKEN_THRESHOLD + if form_data.RAG_TOKEN_THRESHOLD is not None + else request.app.state.config.RAG_TOKEN_THRESHOLD + ) # Hybrid search settings request.app.state.config.ENABLE_RAG_HYBRID_SEARCH = ( @@ -1591,7 +1598,37 @@ def process_file( hash = calculate_sha256_string(text_content) Files.update_file_hash_by_id(file.id, hash) - if request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL: + should_bypass = request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL + token_count = 0 + + if not should_bypass and request.app.state.config.RAG_TOKEN_THRESHOLD > 0: + try: + encoding = tiktoken.get_encoding( + str(request.app.state.config.TIKTOKEN_ENCODING_NAME) + ) + token_count = len(encoding.encode(text_content)) + + if token_count <= request.app.state.config.RAG_TOKEN_THRESHOLD: + should_bypass = True + log.info( + f"File '{file.filename}': {token_count} tokens " + f"(<= {request.app.state.config.RAG_TOKEN_THRESHOLD}), bypassing RAG" + ) + else: + log.info( + f"File '{file.filename}': {token_count} tokens " + f"(> {request.app.state.config.RAG_TOKEN_THRESHOLD}), using RAG" + ) + except Exception as e: + log.warning(f"Error counting tokens: {e}") + + if should_bypass: + Files.update_file_data_by_id(file.id, {"status": "completed"}) + + Files.update_file_metadata_by_id( + file.id, + {"bypass_rag": True} + ) Files.update_file_data_by_id(file.id, {"status": "completed"}) return { "status": True, From 86a9d4fad66c6c6f6ae0452792ff03d7585a0231 Mon Sep 17 00:00:00 2001 From: i1046 Date: Fri, 28 Nov 2025 12:19:20 +0100 Subject: [PATCH 2/3] front + remove duplicate --- backend/open_webui/routers/retrieval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 97fb3aef93..1e9d949c28 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1629,7 +1629,7 @@ def process_file( file.id, {"bypass_rag": True} ) - Files.update_file_data_by_id(file.id, {"status": "completed"}) + return { "status": True, "collection_name": None, From 44637399c0143fb2a02c65eaede9f418d7ed5c68 Mon Sep 17 00:00:00 2001 From: i1046 Date: Fri, 28 Nov 2025 12:19:38 +0100 Subject: [PATCH 3/3] front --- .../components/admin/Settings/Documents.svelte | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 548583ee8a..7ac86fca25 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -705,6 +705,22 @@ {#if !RAGConfig.BYPASS_EMBEDDING_AND_RETRIEVAL} +
+
+ {$i18n.t('Token Threshold for RAG')} +
+
+ +
+
+
{$i18n.t('Text Splitter')}
@@ -712,6 +728,7 @@ class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 text-xs bg-transparent outline-hidden text-right" bind:value={RAGConfig.TEXT_SPLITTER} > +