diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index ab93054dab..600c33afa1 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -241,13 +241,13 @@ class SearchForm(BaseModel): async def get_status(request: Request): return { "status": True, - "chunk_size": request.app.state.config.CHUNK_SIZE, - "chunk_overlap": request.app.state.config.CHUNK_OVERLAP, - "template": request.app.state.config.RAG_TEMPLATE, - "embedding_engine": request.app.state.config.RAG_EMBEDDING_ENGINE, - "embedding_model": request.app.state.config.RAG_EMBEDDING_MODEL, - "reranking_model": request.app.state.config.RAG_RERANKING_MODEL, - "embedding_batch_size": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE, + "CHUNK_SIZE": request.app.state.config.CHUNK_SIZE, + "CHUNK_OVERLAP": request.app.state.config.CHUNK_OVERLAP, + "RAG_TEMPLATE": request.app.state.config.RAG_TEMPLATE, + "RAG_EMBEDDING_ENGINE": request.app.state.config.RAG_EMBEDDING_ENGINE, + "RAG_EMBEDDING_MODEL": request.app.state.config.RAG_EMBEDDING_MODEL, + "RAG_RERANKING_MODEL": request.app.state.config.RAG_RERANKING_MODEL, + "RAG_EMBEDDING_BATCH_SIZE": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE, "ENABLE_ASYNC_EMBEDDING": request.app.state.config.ENABLE_ASYNC_EMBEDDING, } @@ -256,9 +256,9 @@ async def get_status(request: Request): async def get_embedding_config(request: Request, user=Depends(get_admin_user)): return { "status": True, - "embedding_engine": request.app.state.config.RAG_EMBEDDING_ENGINE, - "embedding_model": request.app.state.config.RAG_EMBEDDING_MODEL, - "embedding_batch_size": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE, + "RAG_EMBEDDING_ENGINE": request.app.state.config.RAG_EMBEDDING_ENGINE, + "RAG_EMBEDDING_MODEL": request.app.state.config.RAG_EMBEDDING_MODEL, + "RAG_EMBEDDING_BATCH_SIZE": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE, "ENABLE_ASYNC_EMBEDDING": request.app.state.config.ENABLE_ASYNC_EMBEDDING, "openai_config": { "url": request.app.state.config.RAG_OPENAI_API_BASE_URL, @@ -296,19 +296,13 @@ class EmbeddingModelUpdateForm(BaseModel): openai_config: Optional[OpenAIConfigForm] = None ollama_config: Optional[OllamaConfigForm] = None azure_openai_config: Optional[AzureOpenAIConfigForm] = None - embedding_engine: str - embedding_model: str - embedding_batch_size: Optional[int] = 1 + RAG_EMBEDDING_ENGINE: str + RAG_EMBEDDING_MODEL: str + RAG_EMBEDDING_BATCH_SIZE: Optional[int] = 1 ENABLE_ASYNC_EMBEDDING: Optional[bool] = True -@router.post("/embedding/update") -async def update_embedding_config( - request: Request, form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user) -): - log.info( - f"Updating embedding model: {request.app.state.config.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}" - ) +def unload_embedding_model(request: Request): if request.app.state.config.RAG_EMBEDDING_ENGINE == "": # unloads current internal embedding model and clears VRAM cache request.app.state.ef = None @@ -321,9 +315,25 @@ async def update_embedding_config( if torch.cuda.is_available(): torch.cuda.empty_cache() + + +@router.post("/embedding/update") +async def update_embedding_config( + request: Request, form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user) +): + log.info( + f"Updating embedding model: {request.app.state.config.RAG_EMBEDDING_MODEL} to {form_data.RAG_EMBEDDING_MODEL}" + ) + unload_embedding_model(request) try: - request.app.state.config.RAG_EMBEDDING_ENGINE = form_data.embedding_engine - request.app.state.config.RAG_EMBEDDING_MODEL = form_data.embedding_model + request.app.state.config.RAG_EMBEDDING_ENGINE = form_data.RAG_EMBEDDING_ENGINE + request.app.state.config.RAG_EMBEDDING_MODEL = form_data.RAG_EMBEDDING_MODEL + request.app.state.config.RAG_EMBEDDING_BATCH_SIZE = ( + form_data.RAG_EMBEDDING_BATCH_SIZE + ) + request.app.state.config.ENABLE_ASYNC_EMBEDDING = ( + form_data.ENABLE_ASYNC_EMBEDDING + ) if request.app.state.config.RAG_EMBEDDING_ENGINE in [ "ollama", @@ -357,14 +367,6 @@ async def update_embedding_config( form_data.azure_openai_config.version ) - request.app.state.config.RAG_EMBEDDING_BATCH_SIZE = ( - form_data.embedding_batch_size - ) - - request.app.state.config.ENABLE_ASYNC_EMBEDDING = ( - form_data.enable_async_embedding - ) - request.app.state.ef = get_ef( request.app.state.config.RAG_EMBEDDING_ENGINE, request.app.state.config.RAG_EMBEDDING_MODEL, @@ -403,9 +405,9 @@ async def update_embedding_config( return { "status": True, - "embedding_engine": request.app.state.config.RAG_EMBEDDING_ENGINE, - "embedding_model": request.app.state.config.RAG_EMBEDDING_MODEL, - "embedding_batch_size": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE, + "RAG_EMBEDDING_ENGINE": request.app.state.config.RAG_EMBEDDING_ENGINE, + "RAG_EMBEDDING_MODEL": request.app.state.config.RAG_EMBEDDING_MODEL, + "RAG_EMBEDDING_BATCH_SIZE": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE, "ENABLE_ASYNC_EMBEDDING": request.app.state.config.ENABLE_ASYNC_EMBEDDING, "openai_config": { "url": request.app.state.config.RAG_OPENAI_API_BASE_URL, diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index b837308635..8186430a92 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -38,9 +38,9 @@ let showResetUploadDirConfirm = false; let showReindexConfirm = false; - let embeddingEngine = ''; - let embeddingModel = ''; - let embeddingBatchSize = 1; + let RAG_EMBEDDING_ENGINE = ''; + let RAG_EMBEDDING_MODEL = ''; + let RAG_EMBEDDING_BATCH_SIZE = 1; let ENABLE_ASYNC_EMBEDDING = true; let rerankingModel = ''; @@ -66,7 +66,7 @@ let RAGConfig = null; const embeddingModelUpdateHandler = async () => { - if (embeddingEngine === '' && embeddingModel.split('/').length - 1 > 1) { + if (RAG_EMBEDDING_ENGINE === '' && RAG_EMBEDDING_MODEL.split('/').length - 1 > 1) { toast.error( $i18n.t( 'Model filesystem path detected. Model shortname is required for update, cannot continue.' @@ -74,7 +74,7 @@ ); return; } - if (embeddingEngine === 'ollama' && embeddingModel === '') { + if (RAG_EMBEDDING_ENGINE === 'ollama' && RAG_EMBEDDING_MODEL === '') { toast.error( $i18n.t( 'Model filesystem path detected. Model shortname is required for update, cannot continue.' @@ -83,7 +83,7 @@ return; } - if (embeddingEngine === 'openai' && embeddingModel === '') { + if (RAG_EMBEDDING_ENGINE === 'openai' && RAG_EMBEDDING_MODEL === '') { toast.error( $i18n.t( 'Model filesystem path detected. Model shortname is required for update, cannot continue.' @@ -93,20 +93,25 @@ } if ( - embeddingEngine === 'azure_openai' && + RAG_EMBEDDING_ENGINE === 'azure_openai' && (AzureOpenAIKey === '' || AzureOpenAIUrl === '' || AzureOpenAIVersion === '') ) { toast.error($i18n.t('OpenAI URL/Key required.')); return; } - console.debug('Update embedding model attempt:', embeddingModel); + console.debug('Update embedding model attempt:', { + RAG_EMBEDDING_ENGINE, + RAG_EMBEDDING_MODEL, + RAG_EMBEDDING_BATCH_SIZE, + ENABLE_ASYNC_EMBEDDING + }); updateEmbeddingModelLoading = true; const res = await updateEmbeddingConfig(localStorage.token, { - embedding_engine: embeddingEngine, - embedding_model: embeddingModel, - embedding_batch_size: embeddingBatchSize, + RAG_EMBEDDING_ENGINE: RAG_EMBEDDING_ENGINE, + RAG_EMBEDDING_MODEL: RAG_EMBEDDING_MODEL, + RAG_EMBEDDING_BATCH_SIZE: RAG_EMBEDDING_BATCH_SIZE, ENABLE_ASYNC_EMBEDDING: ENABLE_ASYNC_EMBEDDING, ollama_config: { key: OllamaKey, @@ -237,9 +242,9 @@ const embeddingConfig = await getEmbeddingConfig(localStorage.token); if (embeddingConfig) { - embeddingEngine = embeddingConfig.embedding_engine; - embeddingModel = embeddingConfig.embedding_model; - embeddingBatchSize = embeddingConfig.embedding_batch_size ?? 1; + RAG_EMBEDDING_ENGINE = embeddingConfig.RAG_EMBEDDING_ENGINE; + RAG_EMBEDDING_MODEL = embeddingConfig.RAG_EMBEDDING_MODEL; + RAG_EMBEDDING_BATCH_SIZE = embeddingConfig.RAG_EMBEDDING_BATCH_SIZE ?? 1; ENABLE_ASYNC_EMBEDDING = embeddingConfig.ENABLE_ASYNC_EMBEDDING ?? true; OpenAIKey = embeddingConfig.openai_config.key; @@ -772,17 +777,17 @@