From 39fe385017a190b773522fbfd5cd11fb0618ae24 Mon Sep 17 00:00:00 2001 From: Marko Henning Date: Wed, 20 Aug 2025 13:30:45 +0200 Subject: [PATCH 1/6] Correctly unloads embedding/reranker models --- backend/open_webui/main.py | 18 +++++----- backend/open_webui/routers/retrieval.py | 45 ++++++++++++++++--------- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index d5b89c8d50..43ab357538 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -924,14 +924,16 @@ try: app.state.config.RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL_AUTO_UPDATE, ) - - app.state.rf = get_rf( - app.state.config.RAG_RERANKING_ENGINE, - app.state.config.RAG_RERANKING_MODEL, - app.state.config.RAG_EXTERNAL_RERANKER_URL, - app.state.config.RAG_EXTERNAL_RERANKER_API_KEY, - RAG_RERANKING_MODEL_AUTO_UPDATE, - ) + if ENABLE_RAG_HYBRID_SEARCH and not BYPASS_EMBEDDING_AND_RETRIEVAL: + app.state.rf = get_rf( + app.state.config.RAG_RERANKING_ENGINE, + app.state.config.RAG_RERANKING_MODEL, + app.state.config.RAG_EXTERNAL_RERANKER_URL, + app.state.config.RAG_EXTERNAL_RERANKER_API_KEY, + RAG_RERANKING_MODEL_AUTO_UPDATE, + ) + else: + app.state.rf = None except Exception as e: log.error(f"Error updating models: {e}") pass diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 4a0d327c0b..a6a0f05da0 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -321,6 +321,14 @@ async def update_embedding_config( form_data.embedding_batch_size ) + # unloads current embedding model and clears VRAM cache + request.app.state.ef = None + request.app.state.EMBEDDING_FUNCTION = None + import gc + gc.collect() + import torch + if torch.cuda.is_available(): + torch.cuda.empty_cache() request.app.state.ef = get_ef( request.app.state.config.RAG_EMBEDDING_ENGINE, request.app.state.config.RAG_EMBEDDING_MODEL, @@ -653,9 +661,6 @@ async def update_rag_config( if form_data.ENABLE_RAG_HYBRID_SEARCH is not None else request.app.state.config.ENABLE_RAG_HYBRID_SEARCH ) - # Free up memory if hybrid search is disabled - if not request.app.state.config.ENABLE_RAG_HYBRID_SEARCH: - request.app.state.rf = None request.app.state.config.TOP_K_RERANKER = ( form_data.TOP_K_RERANKER @@ -838,19 +843,29 @@ async def update_rag_config( ) try: - request.app.state.rf = get_rf( - request.app.state.config.RAG_RERANKING_ENGINE, - request.app.state.config.RAG_RERANKING_MODEL, - request.app.state.config.RAG_EXTERNAL_RERANKER_URL, - request.app.state.config.RAG_EXTERNAL_RERANKER_API_KEY, - True, - ) + # Unloading the reranker and clear VRAM memory. + if request.app.state.rf != None: + request.app.state.rf = None + request.app.state.RERANKING_FUNCTION = None + import gc + gc.collect() + import torch + if torch.cuda.is_available(): + torch.cuda.empty_cache() + if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and not request.app.state.BYPASS_EMBEDDING_AND_RETRIEVAL: + request.app.state.rf = get_rf( + request.app.state.config.RAG_RERANKING_ENGINE, + request.app.state.config.RAG_RERANKING_MODEL, + request.app.state.config.RAG_EXTERNAL_RERANKER_URL, + request.app.state.config.RAG_EXTERNAL_RERANKER_API_KEY, + True, + ) - request.app.state.RERANKING_FUNCTION = get_reranking_function( - request.app.state.config.RAG_RERANKING_ENGINE, - request.app.state.config.RAG_RERANKING_MODEL, - request.app.state.rf, - ) + request.app.state.RERANKING_FUNCTION = get_reranking_function( + request.app.state.config.RAG_RERANKING_ENGINE, + request.app.state.config.RAG_RERANKING_MODEL, + request.app.state.rf, + ) except Exception as e: log.error(f"Error loading reranking model: {e}") request.app.state.config.ENABLE_RAG_HYBRID_SEARCH = False From cd02ff2e079eb4840417c63b362dddc03cf96612 Mon Sep 17 00:00:00 2001 From: Marko Henning Date: Wed, 20 Aug 2025 14:07:13 +0200 Subject: [PATCH 2/6] Fix if checks --- backend/open_webui/main.py | 2 +- backend/open_webui/routers/retrieval.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 43ab357538..461600351a 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -924,7 +924,7 @@ try: app.state.config.RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL_AUTO_UPDATE, ) - if ENABLE_RAG_HYBRID_SEARCH and not BYPASS_EMBEDDING_AND_RETRIEVAL: + if app.state.config.ENABLE_RAG_HYBRID_SEARCH and not app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL: app.state.rf = get_rf( app.state.config.RAG_RERANKING_ENGINE, app.state.config.RAG_RERANKING_MODEL, diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index a6a0f05da0..abc1f53965 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -852,7 +852,7 @@ async def update_rag_config( import torch if torch.cuda.is_available(): torch.cuda.empty_cache() - if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and not request.app.state.BYPASS_EMBEDDING_AND_RETRIEVAL: + if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL: request.app.state.rf = get_rf( request.app.state.config.RAG_RERANKING_ENGINE, request.app.state.config.RAG_RERANKING_MODEL, From 6663fc3a6c69db90c7c49632b3654800fb24f5ef Mon Sep 17 00:00:00 2001 From: Marko Henning Date: Thu, 21 Aug 2025 10:49:03 +0200 Subject: [PATCH 3/6] Unloads only if internal models are used. --- backend/open_webui/routers/retrieval.py | 35 ++++++++++++------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index abc1f53965..b863e84385 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -4,7 +4,7 @@ import mimetypes import os import shutil import asyncio - +import torch import uuid from datetime import datetime @@ -281,6 +281,14 @@ async def update_embedding_config( log.info( f"Updating embedding model: {request.app.state.config.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}" ) + if request.app.state.config.RAG_EMBEDDING_ENGINE == '': + # unloads current internal embedding model and clears VRAM cache + request.app.state.ef = None + request.app.state.EMBEDDING_FUNCTION = None + import gc + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() try: request.app.state.config.RAG_EMBEDDING_ENGINE = form_data.embedding_engine request.app.state.config.RAG_EMBEDDING_MODEL = form_data.embedding_model @@ -321,14 +329,6 @@ async def update_embedding_config( form_data.embedding_batch_size ) - # unloads current embedding model and clears VRAM cache - request.app.state.ef = None - request.app.state.EMBEDDING_FUNCTION = None - import gc - gc.collect() - import torch - if torch.cuda.is_available(): - torch.cuda.empty_cache() request.app.state.ef = get_ef( request.app.state.config.RAG_EMBEDDING_ENGINE, request.app.state.config.RAG_EMBEDDING_MODEL, @@ -814,6 +814,14 @@ async def update_rag_config( ) # Reranking settings + if request.app.state.config.RAG_RERANKING_ENGINE == '': + # Unloading the internal reranker and clear VRAM memory + request.app.state.rf = None + request.app.state.RERANKING_FUNCTION = None + import gc + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() request.app.state.config.RAG_RERANKING_ENGINE = ( form_data.RAG_RERANKING_ENGINE if form_data.RAG_RERANKING_ENGINE is not None @@ -843,15 +851,6 @@ async def update_rag_config( ) try: - # Unloading the reranker and clear VRAM memory. - if request.app.state.rf != None: - request.app.state.rf = None - request.app.state.RERANKING_FUNCTION = None - import gc - gc.collect() - import torch - if torch.cuda.is_available(): - torch.cuda.empty_cache() if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL: request.app.state.rf = get_rf( request.app.state.config.RAG_RERANKING_ENGINE, From b3de3295d650fe92bad74cb792ba802653ebb807 Mon Sep 17 00:00:00 2001 From: Marko Henning Date: Thu, 21 Aug 2025 13:19:24 +0200 Subject: [PATCH 4/6] Chage torch import to conditional import --- backend/open_webui/routers/retrieval.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index b863e84385..51a81b1fd7 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -4,7 +4,6 @@ import mimetypes import os import shutil import asyncio -import torch import uuid from datetime import datetime @@ -287,8 +286,10 @@ async def update_embedding_config( request.app.state.EMBEDDING_FUNCTION = None import gc gc.collect() - if torch.cuda.is_available(): - torch.cuda.empty_cache() + if DEVICE_TYPE == 'cuda': + import torch + if torch.cuda.is_available(): + torch.cuda.empty_cache() try: request.app.state.config.RAG_EMBEDDING_ENGINE = form_data.embedding_engine request.app.state.config.RAG_EMBEDDING_MODEL = form_data.embedding_model @@ -820,8 +821,10 @@ async def update_rag_config( request.app.state.RERANKING_FUNCTION = None import gc gc.collect() - if torch.cuda.is_available(): - torch.cuda.empty_cache() + if DEVICE_TYPE == 'cuda': + import torch + if torch.cuda.is_available(): + torch.cuda.empty_cache() request.app.state.config.RAG_RERANKING_ENGINE = ( form_data.RAG_RERANKING_ENGINE if form_data.RAG_RERANKING_ENGINE is not None From c821c3ecb06e19872bea5378bbcee6deeed066bb Mon Sep 17 00:00:00 2001 From: Marko Henning Date: Thu, 21 Aug 2025 13:40:56 +0200 Subject: [PATCH 5/6] Formatting --- backend/open_webui/main.py | 5 ++++- backend/open_webui/routers/retrieval.py | 17 ++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 461600351a..e82347088f 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -924,7 +924,10 @@ try: app.state.config.RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL_AUTO_UPDATE, ) - if app.state.config.ENABLE_RAG_HYBRID_SEARCH and not app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL: + if ( + app.state.config.ENABLE_RAG_HYBRID_SEARCH + and not app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL + ): app.state.rf = get_rf( app.state.config.RAG_RERANKING_ENGINE, app.state.config.RAG_RERANKING_MODEL, diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index 51a81b1fd7..d75677cb43 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -280,14 +280,16 @@ async def update_embedding_config( log.info( f"Updating embedding model: {request.app.state.config.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}" ) - if request.app.state.config.RAG_EMBEDDING_ENGINE == '': + if request.app.state.config.RAG_EMBEDDING_ENGINE == "": # unloads current internal embedding model and clears VRAM cache request.app.state.ef = None request.app.state.EMBEDDING_FUNCTION = None import gc + gc.collect() - if DEVICE_TYPE == 'cuda': + if DEVICE_TYPE == "cuda": import torch + if torch.cuda.is_available(): torch.cuda.empty_cache() try: @@ -815,14 +817,16 @@ async def update_rag_config( ) # Reranking settings - if request.app.state.config.RAG_RERANKING_ENGINE == '': + if request.app.state.config.RAG_RERANKING_ENGINE == "": # Unloading the internal reranker and clear VRAM memory request.app.state.rf = None request.app.state.RERANKING_FUNCTION = None import gc + gc.collect() - if DEVICE_TYPE == 'cuda': + if DEVICE_TYPE == "cuda": import torch + if torch.cuda.is_available(): torch.cuda.empty_cache() request.app.state.config.RAG_RERANKING_ENGINE = ( @@ -854,7 +858,10 @@ async def update_rag_config( ) try: - if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL: + if ( + request.app.state.config.ENABLE_RAG_HYBRID_SEARCH + and not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL + ): request.app.state.rf = get_rf( request.app.state.config.RAG_RERANKING_ENGINE, request.app.state.config.RAG_RERANKING_MODEL, From f2e78d79407004dfe5bc40a6ef3e71bf4088d273 Mon Sep 17 00:00:00 2001 From: Marko Henning Date: Thu, 21 Aug 2025 13:42:03 +0200 Subject: [PATCH 6/6] More formatting --- backend/open_webui/routers/retrieval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index d75677cb43..c990c94b61 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -861,7 +861,7 @@ async def update_rag_config( if ( request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL - ): + ): request.app.state.rf = get_rf( request.app.state.config.RAG_RERANKING_ENGINE, request.app.state.config.RAG_RERANKING_MODEL,