diff --git a/CHANGELOG.md b/CHANGELOG.md
index f7ee69b3a7..016794f404 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,39 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.3.33] - 2024-10-24
+
+### Added
+
+- **🏆 Evaluation Leaderboard**: Easily track your performance through a new leaderboard system where your ratings contribute to a real-time ranking based on the Elo system. Sibling responses (regenerations, many model chats) are required for your ratings to count in the leaderboard. Additionally, you can opt-in to share your feedback history and be part of the community-wide leaderboard. Expect further improvements as we refine the algorithm—help us build the best community leaderboard!
+- **⚔️ Arena Model Evaluation**: Enable blind A/B testing of models directly from Admin Settings > Evaluation for a true side-by-side comparison. Ideal for pinpointing the best model for your needs.
+- **🎯 Topic-Based Leaderboard**: Discover more accurate rankings with experimental topic-based reranking, which adjusts leaderboard standings based on tag similarity in feedback. Get more relevant insights based on specific topics!
+- **📁 Folders Support for Chats**: Organize your chats better by grouping them into folders. Drag and drop chats between folders and export them seamlessly for easy sharing or analysis.
+- **📤 Easy Chat Import via Drag & Drop**: Save time by simply dragging and dropping chat exports (JSON) directly onto the sidebar to import them into your workspace—streamlined, efficient, and intuitive!
+- **📚 Enhanced Knowledge Collection**: Now, you can reference individual files from a knowledge collection—ideal for more precise Retrieval-Augmented Generations (RAG) queries and document analysis.
+- **🏷️ Enhanced Tagging System**: Tags now take up less space! Utilize the new 'tag:' query system to manage, search, and organize your conversations more effectively without cluttering the interface.
+- **🧠 Auto-Tagging for Chats**: Your conversations are now automatically tagged for improved organization, mirroring the efficiency of auto-generated titles.
+- **🔍 Backend Chat Query System**: Chat filtering has become more efficient, now handled through the backend\*\* instead of your browser, improving search performance and accuracy.
+- **🎮 Revamped Playground**: Experience a refreshed and optimized Playground for smoother testing, tweaks, and experimentation of your models and tools.
+- **🧩 Token-Based Text Splitter**: Introducing token-based text splitting (tiktoken), giving you more precise control over how text is processed. Previously, only character-based splitting was available.
+- **🔢 Ollama Batch Embeddings**: Leverage new batch embedding support for improved efficiency and performance with Ollama embedding models.
+- **🔍 Enhanced Add Text Content Modal**: Enjoy a cleaner, more intuitive workflow for adding and curating knowledge content with an upgraded input modal from our Knowledge workspace.
+- **🖋️ Rich Text Input for Chats**: Make your chat inputs more dynamic with support for rich text formatting. Your conversations just got a lot more polished and professional.
+- **⚡ Faster Whisper Model Configurability**: Customize your local faster whisper model directly from the WebUI.
+- **☁️ Experimental S3 Support**: Enable stateless WebUI instances with S3 support, greatly enhancing scalability and balancing heavy workloads.
+- **🔕 Disable Update Toast**: Now you can streamline your workspace even further—choose to disable update notifications for a more focused experience.
+- **🌟 RAG Citation Relevance Percentage**: Easily assess citation accuracy with the addition of relevance percentages in RAG results.
+- **⚙️ Mermaid Copy Button**: Mermaid diagrams now come with a handy copy button, simplifying the extraction and use of diagram contents directly in your workflow.
+- **🎨 UI Redesign**: Major interface redesign that will make navigation smoother, keep your focus where it matters, and ensure a modern look.
+
+### Fixed
+
+- **🎙️ Voice Note Mic Stopping Issue**: Fixed the issue where the microphone stayed active after ending a voice note recording, ensuring your audio workflow runs smoothly.
+
+### Removed
+
+- **👋 Goodbye Sidebar Tags**: Sidebar tag clutter is gone. We’ve shifted tag buttons to more effective query-based tag filtering for a sleeker, more agile interface.
+
## [0.3.32] - 2024-10-06
### Added
diff --git a/Dockerfile b/Dockerfile
index 2e898dc890..ec879d732d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
# syntax=docker/dockerfile:1
# Initialize device type args
-# use build args in the docker build commmand with --build-arg="BUILDARG=true"
+# use build args in the docker build command with --build-arg="BUILDARG=true"
ARG USE_CUDA=false
ARG USE_OLLAMA=false
# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)
@@ -11,6 +11,10 @@ ARG USE_CUDA_VER=cu121
# IMPORTANT: If you change the embedding model (sentence-transformers/all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
ARG USE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
ARG USE_RERANKING_MODEL=""
+
+# Tiktoken encoding name; models to use can be found at https://huggingface.co/models?library=tiktoken
+ARG USE_TIKTOKEN_ENCODING_NAME="cl100k_base"
+
ARG BUILD_HASH=dev-build
# Override at your own risk - non-root configurations are untested
ARG UID=0
@@ -72,6 +76,10 @@ ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \
RAG_RERANKING_MODEL="$USE_RERANKING_MODEL_DOCKER" \
SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models"
+## Tiktoken model settings ##
+ENV TIKTOKEN_ENCODING_NAME="$USE_TIKTOKEN_ENCODING_NAME" \
+ TIKTOKEN_CACHE_DIR="/app/backend/data/cache/tiktoken"
+
## Hugging Face download cache ##
ENV HF_HOME="/app/backend/data/cache/embedding/models"
@@ -131,11 +139,13 @@ RUN pip3 install uv && \
uv pip install --system -r requirements.txt --no-cache-dir && \
python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
+ python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \
else \
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
uv pip install --system -r requirements.txt --no-cache-dir && \
python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
+ python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \
fi; \
chown -R $UID:$GID /app/backend/data/
diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md
index 9bf242381c..83251a3a91 100644
--- a/TROUBLESHOOTING.md
+++ b/TROUBLESHOOTING.md
@@ -18,7 +18,7 @@ If you're experiencing connection issues, it’s often due to the WebUI docker c
docker run -d --network=host -v open-webui:/app/backend/data -e OLLAMA_BASE_URL=http://127.0.0.1:11434 --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
-### Error on Slow Reponses for Ollama
+### Error on Slow Responses for Ollama
Open WebUI has a default timeout of 5 minutes for Ollama to finish generating the response. If needed, this can be adjusted via the environment variable AIOHTTP_CLIENT_TIMEOUT, which sets the timeout in seconds.
diff --git a/backend/open_webui/apps/audio/main.py b/backend/open_webui/apps/audio/main.py
index 0e56720138..148430da87 100644
--- a/backend/open_webui/apps/audio/main.py
+++ b/backend/open_webui/apps/audio/main.py
@@ -63,6 +63,9 @@ app.state.config.STT_OPENAI_API_KEY = AUDIO_STT_OPENAI_API_KEY
app.state.config.STT_ENGINE = AUDIO_STT_ENGINE
app.state.config.STT_MODEL = AUDIO_STT_MODEL
+app.state.config.WHISPER_MODEL = WHISPER_MODEL
+app.state.faster_whisper_model = None
+
app.state.config.TTS_OPENAI_API_BASE_URL = AUDIO_TTS_OPENAI_API_BASE_URL
app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY
app.state.config.TTS_ENGINE = AUDIO_TTS_ENGINE
@@ -82,6 +85,31 @@ SPEECH_CACHE_DIR = Path(CACHE_DIR).joinpath("./audio/speech/")
SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+def set_faster_whisper_model(model: str, auto_update: bool = False):
+ if model and app.state.config.STT_ENGINE == "":
+ from faster_whisper import WhisperModel
+
+ faster_whisper_kwargs = {
+ "model_size_or_path": model,
+ "device": whisper_device_type,
+ "compute_type": "int8",
+ "download_root": WHISPER_MODEL_DIR,
+ "local_files_only": not auto_update,
+ }
+
+ try:
+ app.state.faster_whisper_model = WhisperModel(**faster_whisper_kwargs)
+ except Exception:
+ log.warning(
+ "WhisperModel initialization failed, attempting download with local_files_only=False"
+ )
+ faster_whisper_kwargs["local_files_only"] = False
+ app.state.faster_whisper_model = WhisperModel(**faster_whisper_kwargs)
+
+ else:
+ app.state.faster_whisper_model = None
+
+
class TTSConfigForm(BaseModel):
OPENAI_API_BASE_URL: str
OPENAI_API_KEY: str
@@ -99,6 +127,7 @@ class STTConfigForm(BaseModel):
OPENAI_API_KEY: str
ENGINE: str
MODEL: str
+ WHISPER_MODEL: str
class AudioConfigUpdateForm(BaseModel):
@@ -152,6 +181,7 @@ async def get_audio_config(user=Depends(get_admin_user)):
"OPENAI_API_KEY": app.state.config.STT_OPENAI_API_KEY,
"ENGINE": app.state.config.STT_ENGINE,
"MODEL": app.state.config.STT_MODEL,
+ "WHISPER_MODEL": app.state.config.WHISPER_MODEL,
},
}
@@ -176,6 +206,8 @@ async def update_audio_config(
app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY
app.state.config.STT_ENGINE = form_data.stt.ENGINE
app.state.config.STT_MODEL = form_data.stt.MODEL
+ app.state.config.WHISPER_MODEL = form_data.stt.WHISPER_MODEL
+ set_faster_whisper_model(form_data.stt.WHISPER_MODEL, WHISPER_MODEL_AUTO_UPDATE)
return {
"tts": {
@@ -194,6 +226,7 @@ async def update_audio_config(
"OPENAI_API_KEY": app.state.config.STT_OPENAI_API_KEY,
"ENGINE": app.state.config.STT_ENGINE,
"MODEL": app.state.config.STT_MODEL,
+ "WHISPER_MODEL": app.state.config.WHISPER_MODEL,
},
}
@@ -367,27 +400,10 @@ def transcribe(file_path):
id = filename.split(".")[0]
if app.state.config.STT_ENGINE == "":
- from faster_whisper import WhisperModel
-
- whisper_kwargs = {
- "model_size_or_path": WHISPER_MODEL,
- "device": whisper_device_type,
- "compute_type": "int8",
- "download_root": WHISPER_MODEL_DIR,
- "local_files_only": not WHISPER_MODEL_AUTO_UPDATE,
- }
-
- log.debug(f"whisper_kwargs: {whisper_kwargs}")
-
- try:
- model = WhisperModel(**whisper_kwargs)
- except Exception:
- log.warning(
- "WhisperModel initialization failed, attempting download with local_files_only=False"
- )
- whisper_kwargs["local_files_only"] = False
- model = WhisperModel(**whisper_kwargs)
+ if app.state.faster_whisper_model is None:
+ set_faster_whisper_model(app.state.config.WHISPER_MODEL)
+ model = app.state.faster_whisper_model
segments, info = model.transcribe(file_path, beam_size=5)
log.info(
"Detected language '%s' with probability %f"
@@ -395,7 +411,6 @@ def transcribe(file_path):
)
transcript = "".join([segment.text for segment in list(segments)])
-
data = {"text": transcript.strip()}
# save the transcript to a json file
@@ -403,7 +418,7 @@ def transcribe(file_path):
with open(transcript_file, "w") as f:
json.dump(data, f)
- print(data)
+ log.debug(data)
return data
elif app.state.config.STT_ENGINE == "openai":
if is_mp4_audio(file_path):
@@ -417,7 +432,7 @@ def transcribe(file_path):
files = {"file": (filename, open(file_path, "rb"))}
data = {"model": app.state.config.STT_MODEL}
- print(files, data)
+ log.debug(files, data)
r = None
try:
@@ -450,7 +465,7 @@ def transcribe(file_path):
except Exception:
error_detail = f"External: {e}"
- raise error_detail
+ raise Exception(error_detail)
@app.post("/transcriptions")
diff --git a/backend/open_webui/apps/images/utils/comfyui.py b/backend/open_webui/apps/images/utils/comfyui.py
index 0a3e3a1d9b..4c421d7c52 100644
--- a/backend/open_webui/apps/images/utils/comfyui.py
+++ b/backend/open_webui/apps/images/utils/comfyui.py
@@ -125,22 +125,34 @@ async def comfyui_generate_image(
workflow[node_id]["inputs"][node.key] = model
elif node.type == "prompt":
for node_id in node.node_ids:
- workflow[node_id]["inputs"]["text"] = payload.prompt
+ workflow[node_id]["inputs"][
+ node.key if node.key else "text"
+ ] = payload.prompt
elif node.type == "negative_prompt":
for node_id in node.node_ids:
- workflow[node_id]["inputs"]["text"] = payload.negative_prompt
+ workflow[node_id]["inputs"][
+ node.key if node.key else "text"
+ ] = payload.negative_prompt
elif node.type == "width":
for node_id in node.node_ids:
- workflow[node_id]["inputs"]["width"] = payload.width
+ workflow[node_id]["inputs"][
+ node.key if node.key else "width"
+ ] = payload.width
elif node.type == "height":
for node_id in node.node_ids:
- workflow[node_id]["inputs"]["height"] = payload.height
+ workflow[node_id]["inputs"][
+ node.key if node.key else "height"
+ ] = payload.height
elif node.type == "n":
for node_id in node.node_ids:
- workflow[node_id]["inputs"]["batch_size"] = payload.n
+ workflow[node_id]["inputs"][
+ node.key if node.key else "batch_size"
+ ] = payload.n
elif node.type == "steps":
for node_id in node.node_ids:
- workflow[node_id]["inputs"]["steps"] = payload.steps
+ workflow[node_id]["inputs"][
+ node.key if node.key else "steps"
+ ] = payload.steps
elif node.type == "seed":
seed = (
payload.seed
diff --git a/backend/open_webui/apps/ollama/main.py b/backend/open_webui/apps/ollama/main.py
index 33d9846557..cb38a53eb6 100644
--- a/backend/open_webui/apps/ollama/main.py
+++ b/backend/open_webui/apps/ollama/main.py
@@ -547,8 +547,8 @@ class GenerateEmbeddingsForm(BaseModel):
class GenerateEmbedForm(BaseModel):
model: str
- input: str
- truncate: Optional[bool]
+ input: list[str] | str
+ truncate: Optional[bool] = None
options: Optional[dict] = None
keep_alive: Optional[Union[int, str]] = None
@@ -560,48 +560,7 @@ async def generate_embeddings(
url_idx: Optional[int] = None,
user=Depends(get_verified_user),
):
- if url_idx is None:
- model = form_data.model
-
- if ":" not in model:
- model = f"{model}:latest"
-
- if model in app.state.MODELS:
- url_idx = random.choice(app.state.MODELS[model]["urls"])
- else:
- raise HTTPException(
- status_code=400,
- detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
- )
-
- url = app.state.config.OLLAMA_BASE_URLS[url_idx]
- log.info(f"url: {url}")
-
- r = requests.request(
- method="POST",
- url=f"{url}/api/embed",
- headers={"Content-Type": "application/json"},
- data=form_data.model_dump_json(exclude_none=True).encode(),
- )
- try:
- r.raise_for_status()
-
- return r.json()
- except Exception as e:
- log.exception(e)
- error_detail = "Open WebUI: Server Connection Error"
- if r is not None:
- try:
- res = r.json()
- if "error" in res:
- error_detail = f"Ollama: {res['error']}"
- except Exception:
- error_detail = f"Ollama: {e}"
-
- raise HTTPException(
- status_code=r.status_code if r else 500,
- detail=error_detail,
- )
+ return generate_ollama_batch_embeddings(form_data, url_idx)
@app.post("/api/embeddings")
@@ -611,48 +570,7 @@ async def generate_embeddings(
url_idx: Optional[int] = None,
user=Depends(get_verified_user),
):
- if url_idx is None:
- model = form_data.model
-
- if ":" not in model:
- model = f"{model}:latest"
-
- if model in app.state.MODELS:
- url_idx = random.choice(app.state.MODELS[model]["urls"])
- else:
- raise HTTPException(
- status_code=400,
- detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
- )
-
- url = app.state.config.OLLAMA_BASE_URLS[url_idx]
- log.info(f"url: {url}")
-
- r = requests.request(
- method="POST",
- url=f"{url}/api/embeddings",
- headers={"Content-Type": "application/json"},
- data=form_data.model_dump_json(exclude_none=True).encode(),
- )
- try:
- r.raise_for_status()
-
- return r.json()
- except Exception as e:
- log.exception(e)
- error_detail = "Open WebUI: Server Connection Error"
- if r is not None:
- try:
- res = r.json()
- if "error" in res:
- error_detail = f"Ollama: {res['error']}"
- except Exception:
- error_detail = f"Ollama: {e}"
-
- raise HTTPException(
- status_code=r.status_code if r else 500,
- detail=error_detail,
- )
+ return generate_ollama_embeddings(form_data=form_data, url_idx=url_idx)
def generate_ollama_embeddings(
@@ -692,7 +610,64 @@ def generate_ollama_embeddings(
log.info(f"generate_ollama_embeddings {data}")
if "embedding" in data:
- return data["embedding"]
+ return data
+ else:
+ raise Exception("Something went wrong :/")
+ except Exception as e:
+ log.exception(e)
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except Exception:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
+def generate_ollama_batch_embeddings(
+ form_data: GenerateEmbedForm,
+ url_idx: Optional[int] = None,
+):
+ log.info(f"generate_ollama_batch_embeddings {form_data}")
+
+ if url_idx is None:
+ model = form_data.model
+
+ if ":" not in model:
+ model = f"{model}:latest"
+
+ if model in app.state.MODELS:
+ url_idx = random.choice(app.state.MODELS[model]["urls"])
+ else:
+ raise HTTPException(
+ status_code=400,
+ detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
+ )
+
+ url = app.state.config.OLLAMA_BASE_URLS[url_idx]
+ log.info(f"url: {url}")
+
+ r = requests.request(
+ method="POST",
+ url=f"{url}/api/embed",
+ headers={"Content-Type": "application/json"},
+ data=form_data.model_dump_json(exclude_none=True).encode(),
+ )
+ try:
+ r.raise_for_status()
+
+ data = r.json()
+
+ log.info(f"generate_ollama_batch_embeddings {data}")
+
+ if "embeddings" in data:
+ return data
else:
raise Exception("Something went wrong :/")
except Exception as e:
@@ -788,8 +763,7 @@ async def generate_chat_completion(
user=Depends(get_verified_user),
):
payload = {**form_data.model_dump(exclude_none=True)}
- log.debug(f"{payload = }")
-
+ log.debug(f"generate_chat_completion() - 1.payload = {payload}")
if "metadata" in payload:
del payload["metadata"]
@@ -824,7 +798,7 @@ async def generate_chat_completion(
url = get_ollama_url(url_idx, payload["model"])
log.info(f"url: {url}")
- log.debug(payload)
+ log.debug(f"generate_chat_completion() - 2.payload = {payload}")
return await post_streaming_url(
f"{url}/api/chat",
diff --git a/backend/open_webui/apps/openai/main.py b/backend/open_webui/apps/openai/main.py
index 70cefb29ca..3647977cad 100644
--- a/backend/open_webui/apps/openai/main.py
+++ b/backend/open_webui/apps/openai/main.py
@@ -18,7 +18,10 @@ from open_webui.config import (
OPENAI_API_KEYS,
AppConfig,
)
-from open_webui.env import AIOHTTP_CLIENT_TIMEOUT
+from open_webui.env import (
+ AIOHTTP_CLIENT_TIMEOUT,
+ AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST,
+)
from open_webui.constants import ERROR_MESSAGES
from open_webui.env import SRC_LOG_LEVELS
@@ -179,7 +182,7 @@ async def speech(request: Request, user=Depends(get_verified_user)):
async def fetch_url(url, key):
- timeout = aiohttp.ClientTimeout(total=3)
+ timeout = aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT_OPENAI_MODEL_LIST)
try:
headers = {"Authorization": f"Bearer {key}"}
async with aiohttp.ClientSession(timeout=timeout, trust_env=True) as session:
@@ -237,9 +240,7 @@ def merge_models_lists(model_lists):
def is_openai_api_disabled():
- api_keys = app.state.config.OPENAI_API_KEYS
- no_keys = len(api_keys) == 1 and api_keys[0] == ""
- return no_keys or not app.state.config.ENABLE_OPENAI_API
+ return not app.state.config.ENABLE_OPENAI_API
async def get_all_models_raw() -> list:
diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py
index 52cebeabc4..04eece38c6 100644
--- a/backend/open_webui/apps/retrieval/main.py
+++ b/backend/open_webui/apps/retrieval/main.py
@@ -15,6 +15,9 @@ from fastapi import Depends, FastAPI, File, Form, HTTPException, UploadFile, sta
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
+
+from open_webui.storage.provider import Storage
+from open_webui.apps.webui.models.knowledge import Knowledges
from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT
# Document loaders
@@ -47,6 +50,8 @@ from open_webui.apps.retrieval.utils import (
from open_webui.apps.webui.models.files import Files
from open_webui.config import (
BRAVE_SEARCH_API_KEY,
+ TIKTOKEN_ENCODING_NAME,
+ RAG_TEXT_SPLITTER,
CHUNK_OVERLAP,
CHUNK_SIZE,
CONTENT_EXTRACTION_ENGINE,
@@ -63,7 +68,7 @@ from open_webui.config import (
RAG_EMBEDDING_MODEL,
RAG_EMBEDDING_MODEL_AUTO_UPDATE,
RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
- RAG_EMBEDDING_OPENAI_BATCH_SIZE,
+ RAG_EMBEDDING_BATCH_SIZE,
RAG_FILE_MAX_COUNT,
RAG_FILE_MAX_SIZE,
RAG_OPENAI_API_BASE_URL,
@@ -102,7 +107,7 @@ from open_webui.utils.misc import (
)
from open_webui.utils.utils import get_admin_user, get_verified_user
-from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.text_splitter import RecursiveCharacterTextSplitter, TokenTextSplitter
from langchain_community.document_loaders import (
YoutubeLoader,
)
@@ -129,12 +134,15 @@ app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
app.state.config.CONTENT_EXTRACTION_ENGINE = CONTENT_EXTRACTION_ENGINE
app.state.config.TIKA_SERVER_URL = TIKA_SERVER_URL
+app.state.config.TEXT_SPLITTER = RAG_TEXT_SPLITTER
+app.state.config.TIKTOKEN_ENCODING_NAME = TIKTOKEN_ENCODING_NAME
+
app.state.config.CHUNK_SIZE = CHUNK_SIZE
app.state.config.CHUNK_OVERLAP = CHUNK_OVERLAP
app.state.config.RAG_EMBEDDING_ENGINE = RAG_EMBEDDING_ENGINE
app.state.config.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL
-app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE = RAG_EMBEDDING_OPENAI_BATCH_SIZE
+app.state.config.RAG_EMBEDDING_BATCH_SIZE = RAG_EMBEDDING_BATCH_SIZE
app.state.config.RAG_RERANKING_MODEL = RAG_RERANKING_MODEL
app.state.config.RAG_TEMPLATE = RAG_TEMPLATE
@@ -171,9 +179,9 @@ def update_embedding_model(
auto_update: bool = False,
):
if embedding_model and app.state.config.RAG_EMBEDDING_ENGINE == "":
- import sentence_transformers
+ from sentence_transformers import SentenceTransformer
- app.state.sentence_transformer_ef = sentence_transformers.SentenceTransformer(
+ app.state.sentence_transformer_ef = SentenceTransformer(
get_model_path(embedding_model, auto_update),
device=DEVICE_TYPE,
trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
@@ -233,7 +241,7 @@ app.state.EMBEDDING_FUNCTION = get_embedding_function(
app.state.sentence_transformer_ef,
app.state.config.OPENAI_API_KEY,
app.state.config.OPENAI_API_BASE_URL,
- app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE,
+ app.state.config.RAG_EMBEDDING_BATCH_SIZE,
)
app.add_middleware(
@@ -267,7 +275,7 @@ async def get_status():
"embedding_engine": app.state.config.RAG_EMBEDDING_ENGINE,
"embedding_model": app.state.config.RAG_EMBEDDING_MODEL,
"reranking_model": app.state.config.RAG_RERANKING_MODEL,
- "openai_batch_size": app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE,
+ "embedding_batch_size": app.state.config.RAG_EMBEDDING_BATCH_SIZE,
}
@@ -277,10 +285,10 @@ async def get_embedding_config(user=Depends(get_admin_user)):
"status": True,
"embedding_engine": app.state.config.RAG_EMBEDDING_ENGINE,
"embedding_model": app.state.config.RAG_EMBEDDING_MODEL,
+ "embedding_batch_size": app.state.config.RAG_EMBEDDING_BATCH_SIZE,
"openai_config": {
"url": app.state.config.OPENAI_API_BASE_URL,
"key": app.state.config.OPENAI_API_KEY,
- "batch_size": app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE,
},
}
@@ -296,13 +304,13 @@ async def get_reraanking_config(user=Depends(get_admin_user)):
class OpenAIConfigForm(BaseModel):
url: str
key: str
- batch_size: Optional[int] = None
class EmbeddingModelUpdateForm(BaseModel):
openai_config: Optional[OpenAIConfigForm] = None
embedding_engine: str
embedding_model: str
+ embedding_batch_size: Optional[int] = 1
@app.post("/embedding/update")
@@ -320,11 +328,7 @@ async def update_embedding_config(
if form_data.openai_config is not None:
app.state.config.OPENAI_API_BASE_URL = form_data.openai_config.url
app.state.config.OPENAI_API_KEY = form_data.openai_config.key
- app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE = (
- form_data.openai_config.batch_size
- if form_data.openai_config.batch_size
- else 1
- )
+ app.state.config.RAG_EMBEDDING_BATCH_SIZE = form_data.embedding_batch_size
update_embedding_model(app.state.config.RAG_EMBEDDING_MODEL)
@@ -334,17 +338,17 @@ async def update_embedding_config(
app.state.sentence_transformer_ef,
app.state.config.OPENAI_API_KEY,
app.state.config.OPENAI_API_BASE_URL,
- app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE,
+ app.state.config.RAG_EMBEDDING_BATCH_SIZE,
)
return {
"status": True,
"embedding_engine": app.state.config.RAG_EMBEDDING_ENGINE,
"embedding_model": app.state.config.RAG_EMBEDDING_MODEL,
+ "embedding_batch_size": app.state.config.RAG_EMBEDDING_BATCH_SIZE,
"openai_config": {
"url": app.state.config.OPENAI_API_BASE_URL,
"key": app.state.config.OPENAI_API_KEY,
- "batch_size": app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE,
},
}
except Exception as e:
@@ -388,18 +392,19 @@ async def get_rag_config(user=Depends(get_admin_user)):
return {
"status": True,
"pdf_extract_images": app.state.config.PDF_EXTRACT_IMAGES,
- "file": {
- "max_size": app.state.config.FILE_MAX_SIZE,
- "max_count": app.state.config.FILE_MAX_COUNT,
- },
"content_extraction": {
"engine": app.state.config.CONTENT_EXTRACTION_ENGINE,
"tika_server_url": app.state.config.TIKA_SERVER_URL,
},
"chunk": {
+ "text_splitter": app.state.config.TEXT_SPLITTER,
"chunk_size": app.state.config.CHUNK_SIZE,
"chunk_overlap": app.state.config.CHUNK_OVERLAP,
},
+ "file": {
+ "max_size": app.state.config.FILE_MAX_SIZE,
+ "max_count": app.state.config.FILE_MAX_COUNT,
+ },
"youtube": {
"language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
"translation": app.state.YOUTUBE_LOADER_TRANSLATION,
@@ -438,6 +443,7 @@ class ContentExtractionConfig(BaseModel):
class ChunkParamUpdateForm(BaseModel):
+ text_splitter: Optional[str] = None
chunk_size: int
chunk_overlap: int
@@ -497,6 +503,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
app.state.config.TIKA_SERVER_URL = form_data.content_extraction.tika_server_url
if form_data.chunk is not None:
+ app.state.config.TEXT_SPLITTER = form_data.chunk.text_splitter
app.state.config.CHUNK_SIZE = form_data.chunk.chunk_size
app.state.config.CHUNK_OVERLAP = form_data.chunk.chunk_overlap
@@ -543,6 +550,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
"tika_server_url": app.state.config.TIKA_SERVER_URL,
},
"chunk": {
+ "text_splitter": app.state.config.TEXT_SPLITTER,
"chunk_size": app.state.config.CHUNK_SIZE,
"chunk_overlap": app.state.config.CHUNK_OVERLAP,
},
@@ -603,11 +611,10 @@ class QuerySettingsForm(BaseModel):
async def update_query_settings(
form_data: QuerySettingsForm, user=Depends(get_admin_user)
):
- app.state.config.RAG_TEMPLATE = (
- form_data.template if form_data.template != "" else DEFAULT_RAG_TEMPLATE
- )
+ app.state.config.RAG_TEMPLATE = form_data.template
app.state.config.TOP_K = form_data.k if form_data.k else 4
app.state.config.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0
+
app.state.config.ENABLE_RAG_HYBRID_SEARCH = (
form_data.hybrid if form_data.hybrid else False
)
@@ -645,25 +652,48 @@ def save_docs_to_vector_db(
filter={"hash": metadata["hash"]},
)
- if result:
+ if result is not None:
existing_doc_ids = result.ids[0]
if existing_doc_ids:
log.info(f"Document with hash {metadata['hash']} already exists")
raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
if split:
- text_splitter = RecursiveCharacterTextSplitter(
- chunk_size=app.state.config.CHUNK_SIZE,
- chunk_overlap=app.state.config.CHUNK_OVERLAP,
- add_start_index=True,
- )
+ if app.state.config.TEXT_SPLITTER in ["", "character"]:
+ text_splitter = RecursiveCharacterTextSplitter(
+ chunk_size=app.state.config.CHUNK_SIZE,
+ chunk_overlap=app.state.config.CHUNK_OVERLAP,
+ add_start_index=True,
+ )
+ elif app.state.config.TEXT_SPLITTER == "token":
+ text_splitter = TokenTextSplitter(
+ encoding_name=app.state.config.TIKTOKEN_ENCODING_NAME,
+ chunk_size=app.state.config.CHUNK_SIZE,
+ chunk_overlap=app.state.config.CHUNK_OVERLAP,
+ add_start_index=True,
+ )
+ else:
+ raise ValueError(ERROR_MESSAGES.DEFAULT("Invalid text splitter"))
+
docs = text_splitter.split_documents(docs)
if len(docs) == 0:
raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT)
texts = [doc.page_content for doc in docs]
- metadatas = [{**doc.metadata, **(metadata if metadata else {})} for doc in docs]
+ metadatas = [
+ {
+ **doc.metadata,
+ **(metadata if metadata else {}),
+ "embedding_config": json.dumps(
+ {
+ "engine": app.state.config.RAG_EMBEDDING_ENGINE,
+ "model": app.state.config.RAG_EMBEDDING_MODEL,
+ }
+ ),
+ }
+ for doc in docs
+ ]
# ChromaDB does not like datetime formats
# for meta-data so convert them to string.
@@ -679,8 +709,10 @@ def save_docs_to_vector_db(
if overwrite:
VECTOR_DB_CLIENT.delete_collection(collection_name=collection_name)
log.info(f"deleting existing collection {collection_name}")
-
- if add is False:
+ elif add is False:
+ log.info(
+ f"collection {collection_name} already exists, overwrite is False and add is False"
+ )
return True
log.info(f"adding to collection {collection_name}")
@@ -690,7 +722,7 @@ def save_docs_to_vector_db(
app.state.sentence_transformer_ef,
app.state.config.OPENAI_API_KEY,
app.state.config.OPENAI_API_BASE_URL,
- app.state.config.RAG_EMBEDDING_OPENAI_BATCH_SIZE,
+ app.state.config.RAG_EMBEDDING_BATCH_SIZE,
)
embeddings = embedding_function(
@@ -767,7 +799,7 @@ def process_file(
collection_name=f"file-{file.id}", filter={"file_id": file.id}
)
- if len(result.ids[0]) > 0:
+ if result is not None and len(result.ids[0]) > 0:
docs = [
Document(
page_content=result.documents[0][idx],
@@ -792,15 +824,14 @@ def process_file(
else:
# Process the file and save the content
# Usage: /files/
-
- file_path = file.meta.get("path", None)
+ file_path = file.path
if file_path:
+ file_path = Storage.get_file(file_path)
loader = Loader(
engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL,
PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
)
-
docs = loader.load(
file.filename, file.meta.get("content_type"), file_path
)
@@ -816,7 +847,6 @@ def process_file(
},
)
]
-
text_content = " ".join([doc.page_content for doc in docs])
log.debug(f"text_content: {text_content}")
@@ -1259,6 +1289,7 @@ def delete_entries_from_collection(form_data: DeleteForm, user=Depends(get_admin
@app.post("/reset/db")
def reset_vector_db(user=Depends(get_admin_user)):
VECTOR_DB_CLIENT.reset()
+ Knowledges.delete_all_knowledge()
@app.post("/reset/uploads")
@@ -1281,28 +1312,6 @@ def reset_upload_dir(user=Depends(get_admin_user)) -> bool:
print(f"The directory {folder} does not exist")
except Exception as e:
print(f"Failed to process the directory {folder}. Reason: {e}")
-
- return True
-
-
-@app.post("/reset")
-def reset(user=Depends(get_admin_user)) -> bool:
- folder = f"{UPLOAD_DIR}"
- for filename in os.listdir(folder):
- file_path = os.path.join(folder, filename)
- try:
- if os.path.isfile(file_path) or os.path.islink(file_path):
- os.unlink(file_path)
- elif os.path.isdir(file_path):
- shutil.rmtree(file_path)
- except Exception as e:
- log.error("Failed to delete %s. Reason: %s" % (file_path, e))
-
- try:
- VECTOR_DB_CLIENT.reset()
- except Exception as e:
- log.exception(e)
-
return True
diff --git a/backend/open_webui/apps/retrieval/utils.py b/backend/open_webui/apps/retrieval/utils.py
index 0fe206c966..153bd804ff 100644
--- a/backend/open_webui/apps/retrieval/utils.py
+++ b/backend/open_webui/apps/retrieval/utils.py
@@ -12,13 +12,14 @@ from langchain_core.documents import Document
from open_webui.apps.ollama.main import (
- GenerateEmbeddingsForm,
- generate_ollama_embeddings,
+ GenerateEmbedForm,
+ generate_ollama_batch_embeddings,
)
from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT
from open_webui.utils.misc import get_last_user_message
from open_webui.env import SRC_LOG_LEVELS
+from open_webui.config import DEFAULT_RAG_TEMPLATE
log = logging.getLogger(__name__)
@@ -193,7 +194,8 @@ def query_collection(
k=k,
query_embedding=query_embedding,
)
- results.append(result.model_dump())
+ if result is not None:
+ results.append(result.model_dump())
except Exception as e:
log.exception(f"Error when querying the collection: {e}")
else:
@@ -238,8 +240,13 @@ def query_collection_with_hybrid_search(
def rag_template(template: str, context: str, query: str):
- count = template.count("[context]")
- assert "[context]" in template, "RAG template does not contain '[context]'"
+ if template == "":
+ template = DEFAULT_RAG_TEMPLATE
+
+ if "[context]" not in template and "{{CONTEXT}}" not in template:
+ log.debug(
+ "WARNING: The RAG template does not contain the '[context]' or '{{CONTEXT}}' placeholder."
+ )
if "
*/ +.markdown-section p + ul { + margin-top: 0; +} + +/* Remove bottom margin of
if it is followed by a
not followed by