From 39fe385017a190b773522fbfd5cd11fb0618ae24 Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Wed, 20 Aug 2025 13:30:45 +0200
Subject: [PATCH 1/6] Correctly unloads embedding/reranker models

---
 backend/open_webui/main.py              | 18 +++++-----
 backend/open_webui/routers/retrieval.py | 45 ++++++++++++++++---------
 2 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py
index d5b89c8d50..43ab357538 100644
--- a/backend/open_webui/main.py
+++ b/backend/open_webui/main.py
@@ -924,14 +924,16 @@ try:
         app.state.config.RAG_EMBEDDING_MODEL,
         RAG_EMBEDDING_MODEL_AUTO_UPDATE,
     )
-
-    app.state.rf = get_rf(
-        app.state.config.RAG_RERANKING_ENGINE,
-        app.state.config.RAG_RERANKING_MODEL,
-        app.state.config.RAG_EXTERNAL_RERANKER_URL,
-        app.state.config.RAG_EXTERNAL_RERANKER_API_KEY,
-        RAG_RERANKING_MODEL_AUTO_UPDATE,
-    )
+    if ENABLE_RAG_HYBRID_SEARCH and not BYPASS_EMBEDDING_AND_RETRIEVAL:
+        app.state.rf = get_rf(
+            app.state.config.RAG_RERANKING_ENGINE,
+            app.state.config.RAG_RERANKING_MODEL,
+            app.state.config.RAG_EXTERNAL_RERANKER_URL,
+            app.state.config.RAG_EXTERNAL_RERANKER_API_KEY,
+            RAG_RERANKING_MODEL_AUTO_UPDATE,
+        )
+    else:
+        app.state.rf = None
 except Exception as e:
     log.error(f"Error updating models: {e}")
     pass
diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index 4a0d327c0b..a6a0f05da0 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -321,6 +321,14 @@ async def update_embedding_config(
                 form_data.embedding_batch_size
             )
 
+        # unloads current embedding model and clears VRAM cache
+        request.app.state.ef = None
+        request.app.state.EMBEDDING_FUNCTION = None
+        import gc
+        gc.collect()
+        import torch
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
         request.app.state.ef = get_ef(
             request.app.state.config.RAG_EMBEDDING_ENGINE,
             request.app.state.config.RAG_EMBEDDING_MODEL,
@@ -653,9 +661,6 @@ async def update_rag_config(
         if form_data.ENABLE_RAG_HYBRID_SEARCH is not None
         else request.app.state.config.ENABLE_RAG_HYBRID_SEARCH
     )
-    # Free up memory if hybrid search is disabled
-    if not request.app.state.config.ENABLE_RAG_HYBRID_SEARCH:
-        request.app.state.rf = None
 
     request.app.state.config.TOP_K_RERANKER = (
         form_data.TOP_K_RERANKER
@@ -838,19 +843,29 @@ async def update_rag_config(
         )
 
         try:
-            request.app.state.rf = get_rf(
-                request.app.state.config.RAG_RERANKING_ENGINE,
-                request.app.state.config.RAG_RERANKING_MODEL,
-                request.app.state.config.RAG_EXTERNAL_RERANKER_URL,
-                request.app.state.config.RAG_EXTERNAL_RERANKER_API_KEY,
-                True,
-            )
+            # Unloading the reranker and clear VRAM memory.
+            if request.app.state.rf != None:
+                request.app.state.rf = None
+                request.app.state.RERANKING_FUNCTION = None
+                import gc
+                gc.collect()
+                import torch
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+            if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and not request.app.state.BYPASS_EMBEDDING_AND_RETRIEVAL:
+                request.app.state.rf = get_rf(
+                    request.app.state.config.RAG_RERANKING_ENGINE,
+                    request.app.state.config.RAG_RERANKING_MODEL,
+                    request.app.state.config.RAG_EXTERNAL_RERANKER_URL,
+                    request.app.state.config.RAG_EXTERNAL_RERANKER_API_KEY,
+                    True,
+                )
 
-            request.app.state.RERANKING_FUNCTION = get_reranking_function(
-                request.app.state.config.RAG_RERANKING_ENGINE,
-                request.app.state.config.RAG_RERANKING_MODEL,
-                request.app.state.rf,
-            )
+                request.app.state.RERANKING_FUNCTION = get_reranking_function(
+                    request.app.state.config.RAG_RERANKING_ENGINE,
+                    request.app.state.config.RAG_RERANKING_MODEL,
+                    request.app.state.rf,
+                )
         except Exception as e:
             log.error(f"Error loading reranking model: {e}")
             request.app.state.config.ENABLE_RAG_HYBRID_SEARCH = False

From cd02ff2e079eb4840417c63b362dddc03cf96612 Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Wed, 20 Aug 2025 14:07:13 +0200
Subject: [PATCH 2/6] Fix if checks

---
 backend/open_webui/main.py              | 2 +-
 backend/open_webui/routers/retrieval.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py
index 43ab357538..461600351a 100644
--- a/backend/open_webui/main.py
+++ b/backend/open_webui/main.py
@@ -924,7 +924,7 @@ try:
         app.state.config.RAG_EMBEDDING_MODEL,
         RAG_EMBEDDING_MODEL_AUTO_UPDATE,
     )
-    if ENABLE_RAG_HYBRID_SEARCH and not BYPASS_EMBEDDING_AND_RETRIEVAL:
+    if app.state.config.ENABLE_RAG_HYBRID_SEARCH and not app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL:
         app.state.rf = get_rf(
             app.state.config.RAG_RERANKING_ENGINE,
             app.state.config.RAG_RERANKING_MODEL,
diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index a6a0f05da0..abc1f53965 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -852,7 +852,7 @@ async def update_rag_config(
                 import torch
                 if torch.cuda.is_available():
                     torch.cuda.empty_cache()
-            if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and not request.app.state.BYPASS_EMBEDDING_AND_RETRIEVAL:
+            if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL:
                 request.app.state.rf = get_rf(
                     request.app.state.config.RAG_RERANKING_ENGINE,
                     request.app.state.config.RAG_RERANKING_MODEL,

From 6663fc3a6c69db90c7c49632b3654800fb24f5ef Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Thu, 21 Aug 2025 10:49:03 +0200
Subject: [PATCH 3/6] Unloads only if internal models are used.

---
 backend/open_webui/routers/retrieval.py | 35 ++++++++++++-------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index abc1f53965..b863e84385 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -4,7 +4,7 @@ import mimetypes
 import os
 import shutil
 import asyncio
-
+import torch
 
 import uuid
 from datetime import datetime
@@ -281,6 +281,14 @@ async def update_embedding_config(
     log.info(
         f"Updating embedding model: {request.app.state.config.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}"
     )
+    if request.app.state.config.RAG_EMBEDDING_ENGINE == '':
+        # unloads current internal embedding model and clears VRAM cache
+        request.app.state.ef = None
+        request.app.state.EMBEDDING_FUNCTION = None
+        import gc
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
     try:
         request.app.state.config.RAG_EMBEDDING_ENGINE = form_data.embedding_engine
         request.app.state.config.RAG_EMBEDDING_MODEL = form_data.embedding_model
@@ -321,14 +329,6 @@ async def update_embedding_config(
                 form_data.embedding_batch_size
             )
 
-        # unloads current embedding model and clears VRAM cache
-        request.app.state.ef = None
-        request.app.state.EMBEDDING_FUNCTION = None
-        import gc
-        gc.collect()
-        import torch
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
         request.app.state.ef = get_ef(
             request.app.state.config.RAG_EMBEDDING_ENGINE,
             request.app.state.config.RAG_EMBEDDING_MODEL,
@@ -814,6 +814,14 @@ async def update_rag_config(
     )
 
     # Reranking settings
+    if request.app.state.config.RAG_RERANKING_ENGINE == '':
+        # Unloading the internal reranker and clear VRAM memory
+        request.app.state.rf = None
+        request.app.state.RERANKING_FUNCTION = None
+        import gc
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
     request.app.state.config.RAG_RERANKING_ENGINE = (
         form_data.RAG_RERANKING_ENGINE
         if form_data.RAG_RERANKING_ENGINE is not None
@@ -843,15 +851,6 @@ async def update_rag_config(
         )
 
         try:
-            # Unloading the reranker and clear VRAM memory.
-            if request.app.state.rf != None:
-                request.app.state.rf = None
-                request.app.state.RERANKING_FUNCTION = None
-                import gc
-                gc.collect()
-                import torch
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
             if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL:
                 request.app.state.rf = get_rf(
                     request.app.state.config.RAG_RERANKING_ENGINE,

From b3de3295d650fe92bad74cb792ba802653ebb807 Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Thu, 21 Aug 2025 13:19:24 +0200
Subject: [PATCH 4/6] Chage torch import to conditional import

---
 backend/open_webui/routers/retrieval.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index b863e84385..51a81b1fd7 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -4,7 +4,6 @@ import mimetypes
 import os
 import shutil
 import asyncio
-import torch
 
 import uuid
 from datetime import datetime
@@ -287,8 +286,10 @@ async def update_embedding_config(
         request.app.state.EMBEDDING_FUNCTION = None
         import gc
         gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
+        if DEVICE_TYPE == 'cuda':
+            import torch
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
     try:
         request.app.state.config.RAG_EMBEDDING_ENGINE = form_data.embedding_engine
         request.app.state.config.RAG_EMBEDDING_MODEL = form_data.embedding_model
@@ -820,8 +821,10 @@ async def update_rag_config(
         request.app.state.RERANKING_FUNCTION = None
         import gc
         gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
+        if DEVICE_TYPE == 'cuda':
+            import torch
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
     request.app.state.config.RAG_RERANKING_ENGINE = (
         form_data.RAG_RERANKING_ENGINE
         if form_data.RAG_RERANKING_ENGINE is not None

From c821c3ecb06e19872bea5378bbcee6deeed066bb Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Thu, 21 Aug 2025 13:40:56 +0200
Subject: [PATCH 5/6] Formatting

---
 backend/open_webui/main.py              |  5 ++++-
 backend/open_webui/routers/retrieval.py | 17 ++++++++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py
index 461600351a..e82347088f 100644
--- a/backend/open_webui/main.py
+++ b/backend/open_webui/main.py
@@ -924,7 +924,10 @@ try:
         app.state.config.RAG_EMBEDDING_MODEL,
         RAG_EMBEDDING_MODEL_AUTO_UPDATE,
     )
-    if app.state.config.ENABLE_RAG_HYBRID_SEARCH and not app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL:
+    if (
+        app.state.config.ENABLE_RAG_HYBRID_SEARCH
+        and not app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL
+    ):
         app.state.rf = get_rf(
             app.state.config.RAG_RERANKING_ENGINE,
             app.state.config.RAG_RERANKING_MODEL,
diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index 51a81b1fd7..d75677cb43 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -280,14 +280,16 @@ async def update_embedding_config(
     log.info(
         f"Updating embedding model: {request.app.state.config.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}"
     )
-    if request.app.state.config.RAG_EMBEDDING_ENGINE == '':
+    if request.app.state.config.RAG_EMBEDDING_ENGINE == "":
         # unloads current internal embedding model and clears VRAM cache
         request.app.state.ef = None
         request.app.state.EMBEDDING_FUNCTION = None
         import gc
+
         gc.collect()
-        if DEVICE_TYPE == 'cuda':
+        if DEVICE_TYPE == "cuda":
             import torch
+
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
     try:
@@ -815,14 +817,16 @@ async def update_rag_config(
     )
 
     # Reranking settings
-    if request.app.state.config.RAG_RERANKING_ENGINE == '':
+    if request.app.state.config.RAG_RERANKING_ENGINE == "":
         # Unloading the internal reranker and clear VRAM memory
         request.app.state.rf = None
         request.app.state.RERANKING_FUNCTION = None
         import gc
+
         gc.collect()
-        if DEVICE_TYPE == 'cuda':
+        if DEVICE_TYPE == "cuda":
             import torch
+
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
     request.app.state.config.RAG_RERANKING_ENGINE = (
@@ -854,7 +858,10 @@ async def update_rag_config(
         )
 
         try:
-            if request.app.state.config.ENABLE_RAG_HYBRID_SEARCH and not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL:
+            if (
+                request.app.state.config.ENABLE_RAG_HYBRID_SEARCH
+                and not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL
+                ):
                 request.app.state.rf = get_rf(
                     request.app.state.config.RAG_RERANKING_ENGINE,
                     request.app.state.config.RAG_RERANKING_MODEL,

From f2e78d79407004dfe5bc40a6ef3e71bf4088d273 Mon Sep 17 00:00:00 2001
From: Marko Henning <marko.henning@posteo.de>
Date: Thu, 21 Aug 2025 13:42:03 +0200
Subject: [PATCH 6/6] More formatting

---
 backend/open_webui/routers/retrieval.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index d75677cb43..c990c94b61 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -861,7 +861,7 @@ async def update_rag_config(
             if (
                 request.app.state.config.ENABLE_RAG_HYBRID_SEARCH
                 and not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL
-                ):
+            ):
                 request.app.state.rf = get_rf(
                     request.app.state.config.RAG_RERANKING_ENGINE,
                     request.app.state.config.RAG_RERANKING_MODEL,