This commit is contained in:
Timothy Jaeryang Baek 2025-11-25 02:05:27 -05:00
parent 2328dc284e
commit 488631db98
2 changed files with 69 additions and 62 deletions

View file

@ -241,13 +241,13 @@ class SearchForm(BaseModel):
async def get_status(request: Request):
return {
"status": True,
"chunk_size": request.app.state.config.CHUNK_SIZE,
"chunk_overlap": request.app.state.config.CHUNK_OVERLAP,
"template": request.app.state.config.RAG_TEMPLATE,
"embedding_engine": request.app.state.config.RAG_EMBEDDING_ENGINE,
"embedding_model": request.app.state.config.RAG_EMBEDDING_MODEL,
"reranking_model": request.app.state.config.RAG_RERANKING_MODEL,
"embedding_batch_size": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE,
"CHUNK_SIZE": request.app.state.config.CHUNK_SIZE,
"CHUNK_OVERLAP": request.app.state.config.CHUNK_OVERLAP,
"RAG_TEMPLATE": request.app.state.config.RAG_TEMPLATE,
"RAG_EMBEDDING_ENGINE": request.app.state.config.RAG_EMBEDDING_ENGINE,
"RAG_EMBEDDING_MODEL": request.app.state.config.RAG_EMBEDDING_MODEL,
"RAG_RERANKING_MODEL": request.app.state.config.RAG_RERANKING_MODEL,
"RAG_EMBEDDING_BATCH_SIZE": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE,
"ENABLE_ASYNC_EMBEDDING": request.app.state.config.ENABLE_ASYNC_EMBEDDING,
}
@ -256,9 +256,9 @@ async def get_status(request: Request):
async def get_embedding_config(request: Request, user=Depends(get_admin_user)):
return {
"status": True,
"embedding_engine": request.app.state.config.RAG_EMBEDDING_ENGINE,
"embedding_model": request.app.state.config.RAG_EMBEDDING_MODEL,
"embedding_batch_size": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE,
"RAG_EMBEDDING_ENGINE": request.app.state.config.RAG_EMBEDDING_ENGINE,
"RAG_EMBEDDING_MODEL": request.app.state.config.RAG_EMBEDDING_MODEL,
"RAG_EMBEDDING_BATCH_SIZE": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE,
"ENABLE_ASYNC_EMBEDDING": request.app.state.config.ENABLE_ASYNC_EMBEDDING,
"openai_config": {
"url": request.app.state.config.RAG_OPENAI_API_BASE_URL,
@ -296,19 +296,13 @@ class EmbeddingModelUpdateForm(BaseModel):
openai_config: Optional[OpenAIConfigForm] = None
ollama_config: Optional[OllamaConfigForm] = None
azure_openai_config: Optional[AzureOpenAIConfigForm] = None
embedding_engine: str
embedding_model: str
embedding_batch_size: Optional[int] = 1
RAG_EMBEDDING_ENGINE: str
RAG_EMBEDDING_MODEL: str
RAG_EMBEDDING_BATCH_SIZE: Optional[int] = 1
ENABLE_ASYNC_EMBEDDING: Optional[bool] = True
@router.post("/embedding/update")
async def update_embedding_config(
request: Request, form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user)
):
log.info(
f"Updating embedding model: {request.app.state.config.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}"
)
def unload_embedding_model(request: Request):
if request.app.state.config.RAG_EMBEDDING_ENGINE == "":
# unloads current internal embedding model and clears VRAM cache
request.app.state.ef = None
@ -321,9 +315,25 @@ async def update_embedding_config(
if torch.cuda.is_available():
torch.cuda.empty_cache()
@router.post("/embedding/update")
async def update_embedding_config(
request: Request, form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user)
):
log.info(
f"Updating embedding model: {request.app.state.config.RAG_EMBEDDING_MODEL} to {form_data.RAG_EMBEDDING_MODEL}"
)
unload_embedding_model(request)
try:
request.app.state.config.RAG_EMBEDDING_ENGINE = form_data.embedding_engine
request.app.state.config.RAG_EMBEDDING_MODEL = form_data.embedding_model
request.app.state.config.RAG_EMBEDDING_ENGINE = form_data.RAG_EMBEDDING_ENGINE
request.app.state.config.RAG_EMBEDDING_MODEL = form_data.RAG_EMBEDDING_MODEL
request.app.state.config.RAG_EMBEDDING_BATCH_SIZE = (
form_data.RAG_EMBEDDING_BATCH_SIZE
)
request.app.state.config.ENABLE_ASYNC_EMBEDDING = (
form_data.ENABLE_ASYNC_EMBEDDING
)
if request.app.state.config.RAG_EMBEDDING_ENGINE in [
"ollama",
@ -357,14 +367,6 @@ async def update_embedding_config(
form_data.azure_openai_config.version
)
request.app.state.config.RAG_EMBEDDING_BATCH_SIZE = (
form_data.embedding_batch_size
)
request.app.state.config.ENABLE_ASYNC_EMBEDDING = (
form_data.enable_async_embedding
)
request.app.state.ef = get_ef(
request.app.state.config.RAG_EMBEDDING_ENGINE,
request.app.state.config.RAG_EMBEDDING_MODEL,
@ -403,9 +405,9 @@ async def update_embedding_config(
return {
"status": True,
"embedding_engine": request.app.state.config.RAG_EMBEDDING_ENGINE,
"embedding_model": request.app.state.config.RAG_EMBEDDING_MODEL,
"embedding_batch_size": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE,
"RAG_EMBEDDING_ENGINE": request.app.state.config.RAG_EMBEDDING_ENGINE,
"RAG_EMBEDDING_MODEL": request.app.state.config.RAG_EMBEDDING_MODEL,
"RAG_EMBEDDING_BATCH_SIZE": request.app.state.config.RAG_EMBEDDING_BATCH_SIZE,
"ENABLE_ASYNC_EMBEDDING": request.app.state.config.ENABLE_ASYNC_EMBEDDING,
"openai_config": {
"url": request.app.state.config.RAG_OPENAI_API_BASE_URL,

View file

@ -38,9 +38,9 @@
let showResetUploadDirConfirm = false;
let showReindexConfirm = false;
let embeddingEngine = '';
let embeddingModel = '';
let embeddingBatchSize = 1;
let RAG_EMBEDDING_ENGINE = '';
let RAG_EMBEDDING_MODEL = '';
let RAG_EMBEDDING_BATCH_SIZE = 1;
let ENABLE_ASYNC_EMBEDDING = true;
let rerankingModel = '';
@ -66,7 +66,7 @@
let RAGConfig = null;
const embeddingModelUpdateHandler = async () => {
if (embeddingEngine === '' && embeddingModel.split('/').length - 1 > 1) {
if (RAG_EMBEDDING_ENGINE === '' && RAG_EMBEDDING_MODEL.split('/').length - 1 > 1) {
toast.error(
$i18n.t(
'Model filesystem path detected. Model shortname is required for update, cannot continue.'
@ -74,7 +74,7 @@
);
return;
}
if (embeddingEngine === 'ollama' && embeddingModel === '') {
if (RAG_EMBEDDING_ENGINE === 'ollama' && RAG_EMBEDDING_MODEL === '') {
toast.error(
$i18n.t(
'Model filesystem path detected. Model shortname is required for update, cannot continue.'
@ -83,7 +83,7 @@
return;
}
if (embeddingEngine === 'openai' && embeddingModel === '') {
if (RAG_EMBEDDING_ENGINE === 'openai' && RAG_EMBEDDING_MODEL === '') {
toast.error(
$i18n.t(
'Model filesystem path detected. Model shortname is required for update, cannot continue.'
@ -93,20 +93,25 @@
}
if (
embeddingEngine === 'azure_openai' &&
RAG_EMBEDDING_ENGINE === 'azure_openai' &&
(AzureOpenAIKey === '' || AzureOpenAIUrl === '' || AzureOpenAIVersion === '')
) {
toast.error($i18n.t('OpenAI URL/Key required.'));
return;
}
console.debug('Update embedding model attempt:', embeddingModel);
console.debug('Update embedding model attempt:', {
RAG_EMBEDDING_ENGINE,
RAG_EMBEDDING_MODEL,
RAG_EMBEDDING_BATCH_SIZE,
ENABLE_ASYNC_EMBEDDING
});
updateEmbeddingModelLoading = true;
const res = await updateEmbeddingConfig(localStorage.token, {
embedding_engine: embeddingEngine,
embedding_model: embeddingModel,
embedding_batch_size: embeddingBatchSize,
RAG_EMBEDDING_ENGINE: RAG_EMBEDDING_ENGINE,
RAG_EMBEDDING_MODEL: RAG_EMBEDDING_MODEL,
RAG_EMBEDDING_BATCH_SIZE: RAG_EMBEDDING_BATCH_SIZE,
ENABLE_ASYNC_EMBEDDING: ENABLE_ASYNC_EMBEDDING,
ollama_config: {
key: OllamaKey,
@ -237,9 +242,9 @@
const embeddingConfig = await getEmbeddingConfig(localStorage.token);
if (embeddingConfig) {
embeddingEngine = embeddingConfig.embedding_engine;
embeddingModel = embeddingConfig.embedding_model;
embeddingBatchSize = embeddingConfig.embedding_batch_size ?? 1;
RAG_EMBEDDING_ENGINE = embeddingConfig.RAG_EMBEDDING_ENGINE;
RAG_EMBEDDING_MODEL = embeddingConfig.RAG_EMBEDDING_MODEL;
RAG_EMBEDDING_BATCH_SIZE = embeddingConfig.RAG_EMBEDDING_BATCH_SIZE ?? 1;
ENABLE_ASYNC_EMBEDDING = embeddingConfig.ENABLE_ASYNC_EMBEDDING ?? true;
OpenAIKey = embeddingConfig.openai_config.key;
@ -772,17 +777,17 @@
<div class="flex items-center relative">
<select
class="dark:bg-gray-900 w-fit pr-8 rounded-sm px-2 p-1 text-xs bg-transparent outline-hidden text-right"
bind:value={embeddingEngine}
bind:value={RAG_EMBEDDING_ENGINE}
placeholder={$i18n.t('Select an embedding model engine')}
on:change={(e) => {
if (e.target.value === 'ollama') {
embeddingModel = '';
RAG_EMBEDDING_MODEL = '';
} else if (e.target.value === 'openai') {
embeddingModel = 'text-embedding-3-small';
RAG_EMBEDDING_MODEL = 'text-embedding-3-small';
} else if (e.target.value === 'azure_openai') {
embeddingModel = 'text-embedding-3-small';
RAG_EMBEDDING_MODEL = 'text-embedding-3-small';
} else if (e.target.value === '') {
embeddingModel = 'sentence-transformers/all-MiniLM-L6-v2';
RAG_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2';
}
}}
>
@ -794,7 +799,7 @@
</div>
</div>
{#if embeddingEngine === 'openai'}
{#if RAG_EMBEDDING_ENGINE === 'openai'}
<div class="my-0.5 flex gap-2 pr-2">
<input
class="flex-1 w-full text-sm bg-transparent outline-hidden"
@ -809,7 +814,7 @@
required={false}
/>
</div>
{:else if embeddingEngine === 'ollama'}
{:else if RAG_EMBEDDING_ENGINE === 'ollama'}
<div class="my-0.5 flex gap-2 pr-2">
<input
class="flex-1 w-full text-sm bg-transparent outline-hidden"
@ -824,7 +829,7 @@
required={false}
/>
</div>
{:else if embeddingEngine === 'azure_openai'}
{:else if RAG_EMBEDDING_ENGINE === 'azure_openai'}
<div class="my-0.5 flex flex-col gap-2 pr-2 w-full">
<div class="flex gap-2">
<input
@ -851,12 +856,12 @@
<div class=" mb-1 text-xs font-medium">{$i18n.t('Embedding Model')}</div>
<div class="">
{#if embeddingEngine === 'ollama'}
{#if RAG_EMBEDDING_ENGINE === 'ollama'}
<div class="flex w-full">
<div class="flex-1 mr-2">
<input
class="flex-1 w-full text-sm bg-transparent outline-hidden"
bind:value={embeddingModel}
bind:value={RAG_EMBEDDING_MODEL}
placeholder={$i18n.t('Set embedding model')}
required
/>
@ -868,13 +873,13 @@
<input
class="flex-1 w-full text-sm bg-transparent outline-hidden"
placeholder={$i18n.t('Set embedding model (e.g. {{model}})', {
model: embeddingModel.slice(-40)
model: RAG_EMBEDDING_MODEL.slice(-40)
})}
bind:value={embeddingModel}
bind:value={RAG_EMBEDDING_MODEL}
/>
</div>
{#if embeddingEngine === ''}
{#if RAG_EMBEDDING_ENGINE === ''}
<button
class="px-2.5 bg-transparent text-gray-800 dark:bg-transparent dark:text-gray-100 rounded-lg transition"
on:click={() => {
@ -914,7 +919,7 @@
</div>
</div>
{#if embeddingEngine === 'ollama' || embeddingEngine === 'openai' || embeddingEngine === 'azure_openai'}
{#if RAG_EMBEDDING_ENGINE === 'ollama' || RAG_EMBEDDING_ENGINE === 'openai' || RAG_EMBEDDING_ENGINE === 'azure_openai'}
<div class=" mb-2.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">
{$i18n.t('Embedding Batch Size')}
@ -922,7 +927,7 @@
<div class="">
<input
bind:value={embeddingBatchSize}
bind:value={RAG_EMBEDDING_BATCH_SIZE}
type="number"
class=" bg-transparent text-center w-14 outline-none"
min="-2"