From a9805df7efb85eae8444ecabc2ac810567f26b3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20J=C3=B8rgensen?= Date: Sat, 17 Feb 2024 14:18:45 +0100 Subject: [PATCH 001/491] open-webui --- kubernetes/helm/Chart.yaml | 2 +- kubernetes/helm/values.yaml | 2 +- kubernetes/manifest/base/ollama-service.yaml | 2 +- kubernetes/manifest/base/ollama-statefulset.yaml | 2 +- .../manifest/base/{ollama-namespace.yaml => open-webui.yaml} | 2 +- kubernetes/manifest/base/webui-deployment.yaml | 4 ++-- kubernetes/manifest/base/webui-ingress.yaml | 2 +- kubernetes/manifest/base/webui-service.yaml | 2 +- kubernetes/manifest/kustomization.yaml | 2 +- kubernetes/manifest/patches/ollama-statefulset-gpu.yaml | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) rename kubernetes/manifest/base/{ollama-namespace.yaml => open-webui.yaml} (63%) diff --git a/kubernetes/helm/Chart.yaml b/kubernetes/helm/Chart.yaml index 52683b65e6..a5e686e98f 100644 --- a/kubernetes/helm/Chart.yaml +++ b/kubernetes/helm/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 -name: ollama-webui +name: open-webui description: "Ollama Web UI: A User-Friendly Web Interface for Chat Interactions 👋" version: 1.0.0 icon: https://raw.githubusercontent.com/ollama-webui/ollama-webui/main/static/favicon.png diff --git a/kubernetes/helm/values.yaml b/kubernetes/helm/values.yaml index 648b405093..efb78d1e68 100644 --- a/kubernetes/helm/values.yaml +++ b/kubernetes/helm/values.yaml @@ -1,4 +1,4 @@ -namespace: ollama-namespace +namespace: open-webui ollama: replicaCount: 1 diff --git a/kubernetes/manifest/base/ollama-service.yaml b/kubernetes/manifest/base/ollama-service.yaml index a9467fc445..8bab65b59e 100644 --- a/kubernetes/manifest/base/ollama-service.yaml +++ b/kubernetes/manifest/base/ollama-service.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: Service metadata: name: ollama-service - namespace: ollama-namespace + namespace: open-webui spec: selector: app: ollama diff --git a/kubernetes/manifest/base/ollama-statefulset.yaml b/kubernetes/manifest/base/ollama-statefulset.yaml index ee63faa955..125e0c6224 100644 --- a/kubernetes/manifest/base/ollama-statefulset.yaml +++ b/kubernetes/manifest/base/ollama-statefulset.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: ollama - namespace: ollama-namespace + namespace: open-webui spec: serviceName: "ollama" replicas: 1 diff --git a/kubernetes/manifest/base/ollama-namespace.yaml b/kubernetes/manifest/base/open-webui.yaml similarity index 63% rename from kubernetes/manifest/base/ollama-namespace.yaml rename to kubernetes/manifest/base/open-webui.yaml index f296eb206d..9c1a599f32 100644 --- a/kubernetes/manifest/base/ollama-namespace.yaml +++ b/kubernetes/manifest/base/open-webui.yaml @@ -1,4 +1,4 @@ apiVersion: v1 kind: Namespace metadata: - name: ollama-namespace \ No newline at end of file + name: open-webui \ No newline at end of file diff --git a/kubernetes/manifest/base/webui-deployment.yaml b/kubernetes/manifest/base/webui-deployment.yaml index 58de036808..24e91d60d1 100644 --- a/kubernetes/manifest/base/webui-deployment.yaml +++ b/kubernetes/manifest/base/webui-deployment.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: ollama-webui-deployment - namespace: ollama-namespace + namespace: open-webui spec: replicas: 1 selector: @@ -24,5 +24,5 @@ spec: memory: "500Mi" env: - name: OLLAMA_API_BASE_URL - value: "http://ollama-service.ollama-namespace.svc.cluster.local:11434/api" + value: "http://ollama-service.open-webui.svc.cluster.local:11434/api" tty: true \ No newline at end of file diff --git a/kubernetes/manifest/base/webui-ingress.yaml b/kubernetes/manifest/base/webui-ingress.yaml index 0038807cbf..f3c7d545c6 100644 --- a/kubernetes/manifest/base/webui-ingress.yaml +++ b/kubernetes/manifest/base/webui-ingress.yaml @@ -2,7 +2,7 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: ollama-webui-ingress - namespace: ollama-namespace + namespace: open-webui #annotations: # Use appropriate annotations for your Ingress controller, e.g., for NGINX: # nginx.ingress.kubernetes.io/rewrite-target: / diff --git a/kubernetes/manifest/base/webui-service.yaml b/kubernetes/manifest/base/webui-service.yaml index b41daeafb9..04bf57d7d1 100644 --- a/kubernetes/manifest/base/webui-service.yaml +++ b/kubernetes/manifest/base/webui-service.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: Service metadata: name: ollama-webui-service - namespace: ollama-namespace + namespace: open-webui spec: type: NodePort # Use LoadBalancer if you're on a cloud that supports it selector: diff --git a/kubernetes/manifest/kustomization.yaml b/kubernetes/manifest/kustomization.yaml index a4b03d9619..f581839e8e 100644 --- a/kubernetes/manifest/kustomization.yaml +++ b/kubernetes/manifest/kustomization.yaml @@ -1,5 +1,5 @@ resources: -- base/ollama-namespace.yaml +- base/open-webui.yaml - base/ollama-service.yaml - base/ollama-statefulset.yaml - base/webui-deployment.yaml diff --git a/kubernetes/manifest/patches/ollama-statefulset-gpu.yaml b/kubernetes/manifest/patches/ollama-statefulset-gpu.yaml index 54e5aba650..3e42443656 100644 --- a/kubernetes/manifest/patches/ollama-statefulset-gpu.yaml +++ b/kubernetes/manifest/patches/ollama-statefulset-gpu.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: StatefulSet metadata: name: ollama - namespace: ollama-namespace + namespace: open-webui spec: selector: matchLabels: From 1846c1e80dc597d83ad70759742abce67884c0e0 Mon Sep 17 00:00:00 2001 From: Jannik Streidl Date: Sat, 17 Feb 2024 19:38:29 +0100 Subject: [PATCH 002/491] choose embedding model when using docker --- Dockerfile | 12 ++++++++-- backend/apps/rag/main.py | 51 ++++++++++++++++++++++++++-------------- backend/config.py | 3 ++- 3 files changed, 46 insertions(+), 20 deletions(-) diff --git a/Dockerfile b/Dockerfile index 520c2964d6..7223034837 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,10 +30,16 @@ ENV WEBUI_SECRET_KEY "" ENV SCARF_NO_ANALYTICS true ENV DO_NOT_TRACK true -#Whisper TTS Settings +# whisper TTS Settings ENV WHISPER_MODEL="base" ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" +# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers +# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard +# for better persormance and multilangauge support use "intfloat/multilingual-e5-large" +# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. +ENV DOCKER_SENTENCE_TRANSFORMER_EMBED_MODEL="all-MiniLM-L6-v2" + WORKDIR /app/backend # install python dependencies @@ -48,7 +54,9 @@ RUN apt-get update \ && apt-get install -y pandoc netcat-openbsd \ && rm -rf /var/lib/apt/lists/* -# RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('all-MiniLM-L6-v2')" +# preload embedding model +RUN python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['DOCKER_SENTENCE_TRANSFORMER_EMBED_MODEL'])" +# preload tts model RUN python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 07a30adee5..defe10f958 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -1,6 +1,5 @@ from fastapi import ( FastAPI, - Request, Depends, HTTPException, status, @@ -12,7 +11,7 @@ from fastapi.middleware.cors import CORSMiddleware import os, shutil from typing import List -# from chromadb.utils import embedding_functions +from chromadb.utils import embedding_functions from langchain_community.document_loaders import ( WebBaseLoader, @@ -28,24 +27,19 @@ from langchain_community.document_loaders import ( UnstructuredExcelLoader, ) from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.vectorstores import Chroma -from langchain.chains import RetrievalQA from pydantic import BaseModel from typing import Optional import uuid -import time from utils.misc import calculate_sha256, calculate_sha256_string from utils.utils import get_current_user, get_admin_user -from config import UPLOAD_DIR, EMBED_MODEL, CHROMA_CLIENT, CHUNK_SIZE, CHUNK_OVERLAP +from config import UPLOAD_DIR, SENTENCE_TRANSFORMER_EMBED_MODEL, CHROMA_CLIENT, CHUNK_SIZE, CHUNK_OVERLAP from constants import ERROR_MESSAGES -# EMBEDDING_FUNC = embedding_functions.SentenceTransformerEmbeddingFunction( -# model_name=EMBED_MODEL -# ) +sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=SENTENCE_TRANSFORMER_EMBED_MODEL) app = FastAPI() @@ -78,11 +72,17 @@ def store_data_in_vector_db(data, collection_name) -> bool: metadatas = [doc.metadata for doc in docs] try: - collection = CHROMA_CLIENT.create_collection(name=collection_name) + if 'DOCKER_SENTENCE_TRANSFORMER_EMBED_MODEL' in os.environ: + # if you use docker use the model from the environment variable + collection = CHROMA_CLIENT.create_collection(name=collection_name, embedding_function=sentence_transformer_ef) + + else: + # for local development use the default model + collection = CHROMA_CLIENT.create_collection(name=collection_name) collection.add( - documents=texts, metadatas=metadatas, ids=[str(uuid.uuid1()) for _ in texts] - ) + documents=texts, metadatas=metadatas, ids=[str(uuid.uuid1()) for _ in texts] + ) return True except Exception as e: print(e) @@ -109,9 +109,17 @@ def query_doc( user=Depends(get_current_user), ): try: - collection = CHROMA_CLIENT.get_collection( - name=form_data.collection_name, - ) + if 'DOCKER_SENTENCE_TRANSFORMER_EMBED_MODEL' in os.environ: + # if you use docker use the model from the environment variable + collection = CHROMA_CLIENT.get_collection( + name=form_data.collection_name, + embedding_function=sentence_transformer_ef + ) + else: + # for local development use the default model + collection = CHROMA_CLIENT.get_collection( + name=form_data.collection_name, + ) result = collection.query(query_texts=[form_data.query], n_results=form_data.k) return result except Exception as e: @@ -182,9 +190,18 @@ def query_collection( for collection_name in form_data.collection_names: try: - collection = CHROMA_CLIENT.get_collection( - name=collection_name, + if 'DOCKER_SENTENCE_TRANSFORMER_EMBED_MODEL' in os.environ: + # if you use docker use the model from the environment variable + collection = CHROMA_CLIENT.get_collection( + name=form_data.collection_name, + embedding_function=sentence_transformer_ef + ) + else: + # for local development use the default model + collection = CHROMA_CLIENT.get_collection( + name=form_data.collection_name, ) + result = collection.query( query_texts=[form_data.query], n_results=form_data.k ) diff --git a/backend/config.py b/backend/config.py index d7c89b3baf..023954a4de 100644 --- a/backend/config.py +++ b/backend/config.py @@ -128,7 +128,8 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "": #################################### CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db" -EMBED_MODEL = "all-MiniLM-L6-v2" +# this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2) +SENTENCE_TRANSFORMER_EMBED_MODEL = os.getenv("DOCKER_SENTENCE_TRANSFORMER_EMBED_MODEL") CHROMA_CLIENT = chromadb.PersistentClient( path=CHROMA_DATA_PATH, settings=Settings(allow_reset=True, anonymized_telemetry=False), From be3400e3f8e93f94463d7e349003232601d860fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20J=C3=B8rgensen?= Date: Sat, 17 Feb 2024 21:18:21 +0100 Subject: [PATCH 003/491] rename more --- kubernetes/helm/Chart.yaml | 4 ++-- kubernetes/helm/templates/webui-deployment.yaml | 10 +++++----- kubernetes/helm/templates/webui-ingress.yaml | 4 ++-- kubernetes/helm/templates/webui-pvc.yaml | 4 ++-- kubernetes/helm/templates/webui-service.yaml | 4 ++-- kubernetes/helm/values.yaml | 4 ++-- kubernetes/manifest/base/ollama-service.yaml | 2 +- kubernetes/manifest/base/webui-deployment.yaml | 2 +- kubernetes/manifest/base/webui-ingress.yaml | 2 +- 9 files changed, 18 insertions(+), 18 deletions(-) diff --git a/kubernetes/helm/Chart.yaml b/kubernetes/helm/Chart.yaml index a5e686e98f..64d0bd2024 100644 --- a/kubernetes/helm/Chart.yaml +++ b/kubernetes/helm/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 name: open-webui -description: "Ollama Web UI: A User-Friendly Web Interface for Chat Interactions 👋" +description: "open-webui UI: A User-Friendly Web Interface for Chat Interactions 👋" version: 1.0.0 -icon: https://raw.githubusercontent.com/ollama-webui/ollama-webui/main/static/favicon.png +icon: https://github.com/open-webui/open-webui/blob/main/static/favicon.png diff --git a/kubernetes/helm/templates/webui-deployment.yaml b/kubernetes/helm/templates/webui-deployment.yaml index d9721ee05c..08c966886a 100644 --- a/kubernetes/helm/templates/webui-deployment.yaml +++ b/kubernetes/helm/templates/webui-deployment.yaml @@ -1,20 +1,20 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: ollama-webui-deployment + name: open-webui-deployment namespace: {{ .Values.namespace }} spec: replicas: 1 selector: matchLabels: - app: ollama-webui + app: open-webui template: metadata: labels: - app: ollama-webui + app: open-webui spec: containers: - - name: ollama-webui + - name: open-webui image: {{ .Values.webui.image }} ports: - containerPort: 8080 @@ -35,4 +35,4 @@ spec: volumes: - name: webui-volume persistentVolumeClaim: - claimName: ollama-webui-pvc \ No newline at end of file + claimName: open-webui-pvc \ No newline at end of file diff --git a/kubernetes/helm/templates/webui-ingress.yaml b/kubernetes/helm/templates/webui-ingress.yaml index 84f819f378..cbd456d3f6 100644 --- a/kubernetes/helm/templates/webui-ingress.yaml +++ b/kubernetes/helm/templates/webui-ingress.yaml @@ -2,7 +2,7 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - name: ollama-webui-ingress + name: open-webui-ingress namespace: {{ .Values.namespace }} {{- if .Values.webui.ingress.annotations }} annotations: @@ -17,7 +17,7 @@ spec: pathType: Prefix backend: service: - name: ollama-webui-service + name: open-webui-service port: number: {{ .Values.webui.servicePort }} {{- end }} diff --git a/kubernetes/helm/templates/webui-pvc.yaml b/kubernetes/helm/templates/webui-pvc.yaml index e9961aa8d1..d090fe8720 100644 --- a/kubernetes/helm/templates/webui-pvc.yaml +++ b/kubernetes/helm/templates/webui-pvc.yaml @@ -2,8 +2,8 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: labels: - app: ollama-webui - name: ollama-webui-pvc + app: open-webui + name: open-webui-pvc namespace: {{ .Values.namespace }} spec: accessModes: [ "ReadWriteOnce" ] diff --git a/kubernetes/helm/templates/webui-service.yaml b/kubernetes/helm/templates/webui-service.yaml index 7fefa4fd4f..afd526a150 100644 --- a/kubernetes/helm/templates/webui-service.yaml +++ b/kubernetes/helm/templates/webui-service.yaml @@ -1,12 +1,12 @@ apiVersion: v1 kind: Service metadata: - name: ollama-webui-service + name: open-webui-service namespace: {{ .Values.namespace }} spec: type: {{ .Values.webui.service.type }} # Default: NodePort # Use LoadBalancer if you're on a cloud that supports it selector: - app: ollama-webui + app: open-webui ports: - protocol: TCP port: {{ .Values.webui.servicePort }} diff --git a/kubernetes/helm/values.yaml b/kubernetes/helm/values.yaml index efb78d1e68..997dbba1ab 100644 --- a/kubernetes/helm/values.yaml +++ b/kubernetes/helm/values.yaml @@ -19,7 +19,7 @@ ollama: webui: replicaCount: 1 - image: ghcr.io/ollama-webui/ollama-webui:main + image: ghcr.io/open-webui/open-webui:main servicePort: 8080 resources: limits: @@ -30,7 +30,7 @@ webui: annotations: # Use appropriate annotations for your Ingress controller, e.g., for NGINX: # nginx.ingress.kubernetes.io/rewrite-target: / - host: ollama.minikube.local + host: open-webui.minikube.local volumeSize: 1Gi nodeSelector: {} tolerations: [] diff --git a/kubernetes/manifest/base/ollama-service.yaml b/kubernetes/manifest/base/ollama-service.yaml index 8bab65b59e..10b1224747 100644 --- a/kubernetes/manifest/base/ollama-service.yaml +++ b/kubernetes/manifest/base/ollama-service.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Service metadata: - name: ollama-service + name: open-webui-service namespace: open-webui spec: selector: diff --git a/kubernetes/manifest/base/webui-deployment.yaml b/kubernetes/manifest/base/webui-deployment.yaml index 24e91d60d1..e04eb94e46 100644 --- a/kubernetes/manifest/base/webui-deployment.yaml +++ b/kubernetes/manifest/base/webui-deployment.yaml @@ -15,7 +15,7 @@ spec: spec: containers: - name: ollama-webui - image: ghcr.io/ollama-webui/ollama-webui:main + image: ghcr.io/open-webui/open-webui:main ports: - containerPort: 8080 resources: diff --git a/kubernetes/manifest/base/webui-ingress.yaml b/kubernetes/manifest/base/webui-ingress.yaml index f3c7d545c6..57588de0ee 100644 --- a/kubernetes/manifest/base/webui-ingress.yaml +++ b/kubernetes/manifest/base/webui-ingress.yaml @@ -8,7 +8,7 @@ metadata: # nginx.ingress.kubernetes.io/rewrite-target: / spec: rules: - - host: ollama.minikube.local + - host: open-webui.minikube.local http: paths: - path: / From 3e1639be3cd0294cc815814f15ced241f230c3b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20J=C3=B8rgensen?= Date: Sat, 17 Feb 2024 21:22:57 +0100 Subject: [PATCH 004/491] fix link for icon --- kubernetes/helm/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm/Chart.yaml b/kubernetes/helm/Chart.yaml index 64d0bd2024..71857e992a 100644 --- a/kubernetes/helm/Chart.yaml +++ b/kubernetes/helm/Chart.yaml @@ -2,4 +2,4 @@ apiVersion: v2 name: open-webui description: "open-webui UI: A User-Friendly Web Interface for Chat Interactions 👋" version: 1.0.0 -icon: https://github.com/open-webui/open-webui/blob/main/static/favicon.png +icon: https://raw.githubusercontent.com/open-webui/open-webui/main/static/favicon.png From 74b0e7996d979b2107ec1659597d947f1155a166 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20J=C3=B8rgensen?= Date: Sat, 17 Feb 2024 21:30:47 +0100 Subject: [PATCH 005/491] back to ollama for olllama stuff --- kubernetes/manifest/base/ollama-service.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/manifest/base/ollama-service.yaml b/kubernetes/manifest/base/ollama-service.yaml index 10b1224747..8bab65b59e 100644 --- a/kubernetes/manifest/base/ollama-service.yaml +++ b/kubernetes/manifest/base/ollama-service.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Service metadata: - name: open-webui-service + name: ollama-service namespace: open-webui spec: selector: From cf3e8accd675f64504bb5c5c6ea51b5f7a812cd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20J=C3=B8rgensen?= Date: Sat, 17 Feb 2024 21:35:01 +0100 Subject: [PATCH 006/491] rename to Open WebUI: --- kubernetes/helm/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm/Chart.yaml b/kubernetes/helm/Chart.yaml index 71857e992a..c35338c84d 100644 --- a/kubernetes/helm/Chart.yaml +++ b/kubernetes/helm/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 name: open-webui -description: "open-webui UI: A User-Friendly Web Interface for Chat Interactions 👋" +description: "Open WebUI: A User-Friendly Web Interface for Chat Interactions 👋" version: 1.0.0 icon: https://raw.githubusercontent.com/open-webui/open-webui/main/static/favicon.png From 034096793056345dedc168326bc994dc55aee96a Mon Sep 17 00:00:00 2001 From: braveokafor Date: Fri, 16 Feb 2024 19:36:38 +0100 Subject: [PATCH 007/491] feat: added kubernetes persistent volume claim for webui --- kubernetes/manifest/base/webui-deployment.yaml | 9 ++++++++- kubernetes/manifest/base/webui-pvc.yaml | 12 ++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 kubernetes/manifest/base/webui-pvc.yaml diff --git a/kubernetes/manifest/base/webui-deployment.yaml b/kubernetes/manifest/base/webui-deployment.yaml index 58de036808..762c09efaf 100644 --- a/kubernetes/manifest/base/webui-deployment.yaml +++ b/kubernetes/manifest/base/webui-deployment.yaml @@ -25,4 +25,11 @@ spec: env: - name: OLLAMA_API_BASE_URL value: "http://ollama-service.ollama-namespace.svc.cluster.local:11434/api" - tty: true \ No newline at end of file + tty: true + volumeMounts: + - name: webui-volume + mountPath: /app/backend/data + volumes: + - name: webui-volume + persistentVolumeClaim: + claimName: ollama-webui-pvc \ No newline at end of file diff --git a/kubernetes/manifest/base/webui-pvc.yaml b/kubernetes/manifest/base/webui-pvc.yaml new file mode 100644 index 0000000000..9680c2385a --- /dev/null +++ b/kubernetes/manifest/base/webui-pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + app: ollama-webui + name: ollama-webui-pvc + namespace: ollama-namespace +spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 1Gi \ No newline at end of file From 31b903d831156ea76ae34bee0eb8f8c0e2b63403 Mon Sep 17 00:00:00 2001 From: braveokafor Date: Sat, 17 Feb 2024 17:20:53 +0100 Subject: [PATCH 008/491] chore: bump kubernetes resources --- kubernetes/helm/values.yaml | 14 ++++++++++---- kubernetes/manifest/base/ollama-statefulset.yaml | 8 ++++++-- kubernetes/manifest/base/webui-deployment.yaml | 5 ++++- kubernetes/manifest/base/webui-pvc.yaml | 2 +- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/kubernetes/helm/values.yaml b/kubernetes/helm/values.yaml index 648b405093..11d23f5eab 100644 --- a/kubernetes/helm/values.yaml +++ b/kubernetes/helm/values.yaml @@ -5,11 +5,14 @@ ollama: image: ollama/ollama:latest servicePort: 11434 resources: - limits: + requests: cpu: "2000m" memory: "2Gi" + limits: + cpu: "4000m" + memory: "4Gi" nvidia.com/gpu: "0" - volumeSize: 1Gi + volumeSize: 30Gi nodeSelector: {} tolerations: [] service: @@ -22,16 +25,19 @@ webui: image: ghcr.io/ollama-webui/ollama-webui:main servicePort: 8080 resources: - limits: + requests: cpu: "500m" memory: "500Mi" + limits: + cpu: "1000m" + memory: "1Gi" ingress: enabled: true annotations: # Use appropriate annotations for your Ingress controller, e.g., for NGINX: # nginx.ingress.kubernetes.io/rewrite-target: / host: ollama.minikube.local - volumeSize: 1Gi + volumeSize: 2Gi nodeSelector: {} tolerations: [] service: diff --git a/kubernetes/manifest/base/ollama-statefulset.yaml b/kubernetes/manifest/base/ollama-statefulset.yaml index ee63faa955..eb91c41800 100644 --- a/kubernetes/manifest/base/ollama-statefulset.yaml +++ b/kubernetes/manifest/base/ollama-statefulset.yaml @@ -20,9 +20,13 @@ spec: ports: - containerPort: 11434 resources: - limits: + requests: cpu: "2000m" memory: "2Gi" + limits: + cpu: "4000m" + memory: "4Gi" + nvidia.com/gpu: "0" volumeMounts: - name: ollama-volume mountPath: /root/.ollama @@ -34,4 +38,4 @@ spec: accessModes: [ "ReadWriteOnce" ] resources: requests: - storage: 1Gi \ No newline at end of file + storage: 30Gi \ No newline at end of file diff --git a/kubernetes/manifest/base/webui-deployment.yaml b/kubernetes/manifest/base/webui-deployment.yaml index 762c09efaf..e67e562b4a 100644 --- a/kubernetes/manifest/base/webui-deployment.yaml +++ b/kubernetes/manifest/base/webui-deployment.yaml @@ -19,9 +19,12 @@ spec: ports: - containerPort: 8080 resources: - limits: + requests: cpu: "500m" memory: "500Mi" + limits: + cpu: "1000m" + memory: "1Gi" env: - name: OLLAMA_API_BASE_URL value: "http://ollama-service.ollama-namespace.svc.cluster.local:11434/api" diff --git a/kubernetes/manifest/base/webui-pvc.yaml b/kubernetes/manifest/base/webui-pvc.yaml index 9680c2385a..285dfeef7f 100644 --- a/kubernetes/manifest/base/webui-pvc.yaml +++ b/kubernetes/manifest/base/webui-pvc.yaml @@ -9,4 +9,4 @@ spec: accessModes: ["ReadWriteOnce"] resources: requests: - storage: 1Gi \ No newline at end of file + storage: 2Gi \ No newline at end of file From 82194a5df94d0d16b79c0c3c9fc1637c512e3f5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20J=C3=B8rgensen?= Date: Sat, 17 Feb 2024 21:54:12 +0100 Subject: [PATCH 009/491] more renaming --- kubernetes/manifest/base/webui-deployment.yaml | 8 ++++---- kubernetes/manifest/base/webui-ingress.yaml | 4 ++-- kubernetes/manifest/base/webui-service.yaml | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/kubernetes/manifest/base/webui-deployment.yaml b/kubernetes/manifest/base/webui-deployment.yaml index e04eb94e46..bd2c227368 100644 --- a/kubernetes/manifest/base/webui-deployment.yaml +++ b/kubernetes/manifest/base/webui-deployment.yaml @@ -1,20 +1,20 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: ollama-webui-deployment + name: open-webui-deployment namespace: open-webui spec: replicas: 1 selector: matchLabels: - app: ollama-webui + app: open-webui template: metadata: labels: - app: ollama-webui + app: open-webui spec: containers: - - name: ollama-webui + - name: open-webui image: ghcr.io/open-webui/open-webui:main ports: - containerPort: 8080 diff --git a/kubernetes/manifest/base/webui-ingress.yaml b/kubernetes/manifest/base/webui-ingress.yaml index 57588de0ee..dc0b53ccd4 100644 --- a/kubernetes/manifest/base/webui-ingress.yaml +++ b/kubernetes/manifest/base/webui-ingress.yaml @@ -1,7 +1,7 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - name: ollama-webui-ingress + name: open-webui-ingress namespace: open-webui #annotations: # Use appropriate annotations for your Ingress controller, e.g., for NGINX: @@ -15,6 +15,6 @@ spec: pathType: Prefix backend: service: - name: ollama-webui-service + name: open-webui-service port: number: 8080 diff --git a/kubernetes/manifest/base/webui-service.yaml b/kubernetes/manifest/base/webui-service.yaml index 04bf57d7d1..d73845f00a 100644 --- a/kubernetes/manifest/base/webui-service.yaml +++ b/kubernetes/manifest/base/webui-service.yaml @@ -1,12 +1,12 @@ apiVersion: v1 kind: Service metadata: - name: ollama-webui-service + name: open-webui-service namespace: open-webui spec: type: NodePort # Use LoadBalancer if you're on a cloud that supports it selector: - app: ollama-webui + app: open-webui ports: - protocol: TCP port: 8080 From aa39305dec83a6c58472b0205b66fff7b92497ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9sar=20Garc=C3=ADa?= Date: Sun, 18 Feb 2024 04:08:58 +0100 Subject: [PATCH 010/491] Update README.md Added detailed instructions to migrate from ollama-webui to open-webui, preserving and not preserving existing data --- README.md | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a1d089e776..466e3af62f 100644 --- a/README.md +++ b/README.md @@ -219,9 +219,48 @@ docker rm -f open-webui docker pull ghcr.io/open-webui/open-webui:main [insert command you used to install] ``` - In the last line, you need to use the very same command you used to install (local install, remote server, etc.) +### Moving from Ollama WebUI to Open WebUI + +Given recent name changes, the docker image has been renamed. Additional steps are required to update for those people that used Ollama WebUI previously and want to start using the new images. + +#### Updating to Open WebUI without keeping your data + +If you want to update to the new image but don't want to keep any previous data like conversations, prompts, documents, etc. you can perform the following steps: + +```bash +docker rm -f ollama-webui +docker pull ghcr.io/open-webui/open-webui:main +[insert the equivalent command that you used to install with the new Docker image name] +docker volume rm ollama-webui +``` + +For example, for local installation it would be `docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main`. For other installation commands, check the relevant parts of this README document. + +#### Migrating your contents from Ollama WebUI to Open WebUI + +If you want to update to the new image migrating all your previous settings like conversations, prompts, documents, etc. you can perform the following steps: + +```bash +docker rm -f ollama-webui +docker pull ghcr.io/open-webui/open-webui:main +# Creates a new volume and uses a temporary container to copy from one volume to another as per https://github.com/moby/moby/issues/31154#issuecomment-360531460 +docker volume create --name open-webui +docker run --rm -v ollama-webui:/from -v open-webui:/to alpine ash -c "cd /from ; cp -av . /to" +[insert the equivalent command that you used to install with the new Docker image name] +``` + +Once you verify that all the data has been migrated you can erase the old volumen using the following command: + +```bash +docker volume rm ollama-webui +``` + + + + + ## How to Install Without Docker While we strongly recommend using our convenient Docker container installation for optimal support, we understand that some situations may require a non-Docker setup, especially for development purposes. Please note that non-Docker installations are not officially supported, and you might need to troubleshoot on your own. From e07001e5f6c0570b7a4dbf594b30b1d5f1ccbd54 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 17 Feb 2024 21:06:08 -0800 Subject: [PATCH 011/491] feat: rag folder scan support --- backend/apps/rag/main.py | 99 ++++++++++++++++--- backend/apps/web/routers/documents.py | 4 + backend/config.py | 8 ++ backend/utils/misc.py | 38 +++++++ src/lib/apis/rag/index.ts | 26 +++++ .../chat/Messages/ResponseMessage.svelte | 2 +- .../documents/Settings/General.svelte | 68 +++++++++++++ .../components/documents/SettingsModal.svelte | 86 ++++++++++++++++ src/routes/(app)/documents/+page.svelte | 31 ++++++ 9 files changed, 350 insertions(+), 12 deletions(-) create mode 100644 src/lib/components/documents/Settings/General.svelte create mode 100644 src/lib/components/documents/SettingsModal.svelte diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 07a30adee5..ec9e0a8b86 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -10,6 +10,8 @@ from fastapi import ( ) from fastapi.middleware.cors import CORSMiddleware import os, shutil + +from pathlib import Path from typing import List # from chromadb.utils import embedding_functions @@ -28,19 +30,39 @@ from langchain_community.document_loaders import ( UnstructuredExcelLoader, ) from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.vectorstores import Chroma from langchain.chains import RetrievalQA +from langchain_community.vectorstores import Chroma from pydantic import BaseModel from typing import Optional - +import mimetypes import uuid +import json import time -from utils.misc import calculate_sha256, calculate_sha256_string + +from apps.web.models.documents import ( + Documents, + DocumentForm, + DocumentResponse, +) + +from utils.misc import ( + calculate_sha256, + calculate_sha256_string, + sanitize_filename, + extract_folders_after_data_docs, +) from utils.utils import get_current_user, get_admin_user -from config import UPLOAD_DIR, EMBED_MODEL, CHROMA_CLIENT, CHUNK_SIZE, CHUNK_OVERLAP +from config import ( + UPLOAD_DIR, + DOCS_DIR, + EMBED_MODEL, + CHROMA_CLIENT, + CHUNK_SIZE, + CHUNK_OVERLAP, +) from constants import ERROR_MESSAGES # EMBEDDING_FUNC = embedding_functions.SentenceTransformerEmbeddingFunction( @@ -220,8 +242,8 @@ def store_web(form_data: StoreWebForm, user=Depends(get_current_user)): ) -def get_loader(file, file_path): - file_ext = file.filename.split(".")[-1].lower() +def get_loader(filename: str, file_content_type: str, file_path: str): + file_ext = filename.split(".")[-1].lower() known_type = True known_source_ext = [ @@ -279,20 +301,20 @@ def get_loader(file, file_path): loader = UnstructuredXMLLoader(file_path) elif file_ext == "md": loader = UnstructuredMarkdownLoader(file_path) - elif file.content_type == "application/epub+zip": + elif file_content_type == "application/epub+zip": loader = UnstructuredEPubLoader(file_path) elif ( - file.content_type + file_content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" or file_ext in ["doc", "docx"] ): loader = Docx2txtLoader(file_path) - elif file.content_type in [ + elif file_content_type in [ "application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ] or file_ext in ["xls", "xlsx"]: loader = UnstructuredExcelLoader(file_path) - elif file_ext in known_source_ext or file.content_type.find("text/") >= 0: + elif file_ext in known_source_ext or file_content_type.find("text/") >= 0: loader = TextLoader(file_path) else: loader = TextLoader(file_path) @@ -323,7 +345,7 @@ def store_doc( collection_name = calculate_sha256(f)[:63] f.close() - loader, known_type = get_loader(file, file_path) + loader, known_type = get_loader(file.filename, file.content_type, file_path) data = loader.load() result = store_data_in_vector_db(data, collection_name) @@ -353,6 +375,61 @@ def store_doc( ) +@app.get("/scan") +def scan_docs_dir(user=Depends(get_admin_user)): + try: + for path in Path(DOCS_DIR).rglob("./**/*"): + if path.is_file() and not path.name.startswith("."): + tags = extract_folders_after_data_docs(path) + filename = path.name + file_content_type = mimetypes.guess_type(path) + + f = open(path, "rb") + collection_name = calculate_sha256(f)[:63] + f.close() + + loader, known_type = get_loader(filename, file_content_type, str(path)) + data = loader.load() + + result = store_data_in_vector_db(data, collection_name) + + if result: + sanitized_filename = sanitize_filename(filename) + doc = Documents.get_doc_by_name(sanitized_filename) + + if doc == None: + doc = Documents.insert_new_doc( + user.id, + DocumentForm( + **{ + "name": sanitized_filename, + "title": filename, + "collection_name": collection_name, + "filename": filename, + "content": ( + json.dumps( + { + "tags": list( + map( + lambda name: {"name": name}, + tags, + ) + ) + } + ) + if len(tags) + else "{}" + ), + } + ), + ) + + except Exception as e: + print(e) + + return True + + @app.get("/reset/db") def reset_vector_db(user=Depends(get_admin_user)): CHROMA_CLIENT.reset() diff --git a/backend/apps/web/routers/documents.py b/backend/apps/web/routers/documents.py index 5bc473faf4..7c69514fe9 100644 --- a/backend/apps/web/routers/documents.py +++ b/backend/apps/web/routers/documents.py @@ -96,6 +96,10 @@ async def get_doc_by_name(name: str, user=Depends(get_current_user)): ############################ +class TagItem(BaseModel): + name: str + + class TagDocumentForm(BaseModel): name: str tags: List[dict] diff --git a/backend/config.py b/backend/config.py index d7c89b3baf..f5acf06b70 100644 --- a/backend/config.py +++ b/backend/config.py @@ -43,6 +43,14 @@ Path(UPLOAD_DIR).mkdir(parents=True, exist_ok=True) CACHE_DIR = f"{DATA_DIR}/cache" Path(CACHE_DIR).mkdir(parents=True, exist_ok=True) + +#################################### +# Docs DIR +#################################### + +DOCS_DIR = f"{DATA_DIR}/docs" +Path(DOCS_DIR).mkdir(parents=True, exist_ok=True) + #################################### # OLLAMA_API_BASE_URL #################################### diff --git a/backend/utils/misc.py b/backend/utils/misc.py index 385a2c4154..5e9d5876e3 100644 --- a/backend/utils/misc.py +++ b/backend/utils/misc.py @@ -1,3 +1,4 @@ +from pathlib import Path import hashlib import re @@ -38,3 +39,40 @@ def validate_email_format(email: str) -> bool: if not re.match(r"[^@]+@[^@]+\.[^@]+", email): return False return True + + +def sanitize_filename(file_name): + # Convert to lowercase + lower_case_file_name = file_name.lower() + + # Remove special characters using regular expression + sanitized_file_name = re.sub(r"[^\w\s]", "", lower_case_file_name) + + # Replace spaces with dashes + final_file_name = re.sub(r"\s+", "-", sanitized_file_name) + + return final_file_name + + +def extract_folders_after_data_docs(path): + # Convert the path to a Path object if it's not already + path = Path(path) + + # Extract parts of the path + parts = path.parts + + # Find the index of '/data/docs' in the path + try: + index_data_docs = parts.index("data") + 1 + index_docs = parts.index("docs", index_data_docs) + 1 + except ValueError: + return [] + + # Exclude the filename and accumulate folder names + tags = [] + + folders = parts[index_docs:-1] + for idx, part in enumerate(folders): + tags.append("/".join(folders[: idx + 1])) + + return tags diff --git a/src/lib/apis/rag/index.ts b/src/lib/apis/rag/index.ts index 3f4f30bf34..fc3571aa38 100644 --- a/src/lib/apis/rag/index.ts +++ b/src/lib/apis/rag/index.ts @@ -138,6 +138,32 @@ export const queryCollection = async ( return res; }; +export const scanDocs = async (token: string) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/scan`, { + method: 'GET', + headers: { + Accept: 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const resetVectorDB = async (token: string) => { let error = null; diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index a0ffc83cfb..cc42d0b942 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -366,7 +366,7 @@ {#if message.done}
{#if siblings.length > 1}
diff --git a/src/lib/components/documents/Settings/General.svelte b/src/lib/components/documents/Settings/General.svelte new file mode 100644 index 0000000000..19a6493b3e --- /dev/null +++ b/src/lib/components/documents/Settings/General.svelte @@ -0,0 +1,68 @@ + + +
{ + // console.log('submit'); + saveHandler(); + }} +> +
+
+
General Settings
+ +
+
Scan for documents from '/data/docs'
+ + +
+
+
+ + +
diff --git a/src/lib/components/documents/SettingsModal.svelte b/src/lib/components/documents/SettingsModal.svelte new file mode 100644 index 0000000000..332bf9d5a8 --- /dev/null +++ b/src/lib/components/documents/SettingsModal.svelte @@ -0,0 +1,86 @@ + + + +
+
+
Document Settings
+ +
+
+ +
+
+ +
+
+ {#if selectedTab === 'general'} + { + show = false; + }} + /> + + + {/if} +
+
+
+
diff --git a/src/routes/(app)/documents/+page.svelte b/src/routes/(app)/documents/+page.svelte index a5653483e6..ab3d6553e4 100644 --- a/src/routes/(app)/documents/+page.svelte +++ b/src/routes/(app)/documents/+page.svelte @@ -13,6 +13,7 @@ import EditDocModal from '$lib/components/documents/EditDocModal.svelte'; import AddFilesPlaceholder from '$lib/components/AddFilesPlaceholder.svelte'; + import SettingsModal from '$lib/components/documents/SettingsModal.svelte'; let importFiles = ''; let inputFiles = ''; @@ -20,6 +21,7 @@ let tags = []; + let showSettingsModal = false; let showEditDocModal = false; let selectedDoc; let selectedTag = ''; @@ -179,11 +181,38 @@ }} /> + +
My Documents
+ +
+ +
@@ -419,6 +448,8 @@
+ +
{/each} {#if $documents.length > 0} From 79d0932a3ab12ccacf7e4c82c3fc658544364008 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 17 Feb 2024 21:14:42 -0800 Subject: [PATCH 012/491] refac: styling --- .../documents/Settings/General.svelte | 46 +++++++++++++++++-- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/src/lib/components/documents/Settings/General.svelte b/src/lib/components/documents/Settings/General.svelte index 19a6493b3e..c3c7df5bb9 100644 --- a/src/lib/components/documents/Settings/General.svelte +++ b/src/lib/components/documents/Settings/General.svelte @@ -3,13 +3,20 @@ import { scanDocs } from '$lib/apis/rag'; import { documents } from '$lib/stores'; import { onMount } from 'svelte'; + import toast from 'svelte-french-toast'; export let saveHandler: Function; + + let loading = false; + const scanHandler = async () => { + loading = true; const res = await scanDocs(localStorage.token); + loading = false; if (res) { await documents.set(await getDocs(localStorage.token)); + toast.success('Scan complete!'); } }; @@ -31,16 +38,19 @@
Scan for documents from '/data/docs'
From a94e4161f778e646542037ef372533e2ab1061f9 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 17 Feb 2024 21:31:46 -0800 Subject: [PATCH 013/491] fix: file content type issue --- backend/apps/rag/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index ec9e0a8b86..f24b6b9051 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -388,7 +388,9 @@ def scan_docs_dir(user=Depends(get_admin_user)): collection_name = calculate_sha256(f)[:63] f.close() - loader, known_type = get_loader(filename, file_content_type, str(path)) + loader, known_type = get_loader( + filename, file_content_type[0], str(path) + ) data = loader.load() result = store_data_in_vector_db(data, collection_name) From ccf08fb91e3960b1b8457306ac231d18dc7f21e2 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 17 Feb 2024 22:29:52 -0800 Subject: [PATCH 014/491] feat: editable chunk params --- backend/apps/rag/main.py | 39 +++++++++++- src/lib/apis/rag/index.ts | 58 +++++++++++++++++ .../documents/Settings/General.svelte | 62 +++++++++++++++++-- 3 files changed, 152 insertions(+), 7 deletions(-) diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index f24b6b9051..79981680f9 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -71,6 +71,9 @@ from constants import ERROR_MESSAGES app = FastAPI() +app.state.CHUNK_SIZE = CHUNK_SIZE +app.state.CHUNK_OVERLAP = CHUNK_OVERLAP + origins = ["*"] app.add_middleware( @@ -92,7 +95,7 @@ class StoreWebForm(CollectionNameForm): def store_data_in_vector_db(data, collection_name) -> bool: text_splitter = RecursiveCharacterTextSplitter( - chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP + chunk_size=app.state.CHUNK_SIZE, chunk_overlap=app.state.CHUNK_OVERLAP ) docs = text_splitter.split_documents(data) @@ -116,7 +119,39 @@ def store_data_in_vector_db(data, collection_name) -> bool: @app.get("/") async def get_status(): - return {"status": True} + return { + "status": True, + "chunk_size": app.state.CHUNK_SIZE, + "chunk_overlap": app.state.CHUNK_OVERLAP, + } + + +@app.get("/chunk") +async def get_chunk_params(user=Depends(get_admin_user)): + return { + "status": True, + "chunk_size": app.state.CHUNK_SIZE, + "chunk_overlap": app.state.CHUNK_OVERLAP, + } + + +class ChunkParamUpdateForm(BaseModel): + chunk_size: int + chunk_overlap: int + + +@app.post("/chunk/update") +async def update_chunk_params( + form_data: ChunkParamUpdateForm, user=Depends(get_admin_user) +): + app.state.CHUNK_SIZE = form_data.chunk_size + app.state.CHUNK_OVERLAP = form_data.chunk_overlap + + return { + "status": True, + "chunk_size": app.state.CHUNK_SIZE, + "chunk_overlap": app.state.CHUNK_OVERLAP, + } class QueryDocForm(BaseModel): diff --git a/src/lib/apis/rag/index.ts b/src/lib/apis/rag/index.ts index fc3571aa38..5819badbdc 100644 --- a/src/lib/apis/rag/index.ts +++ b/src/lib/apis/rag/index.ts @@ -1,5 +1,63 @@ import { RAG_API_BASE_URL } from '$lib/constants'; +export const getChunkParams = async (token: string) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/chunk`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const updateChunkParams = async (token: string, size: number, overlap: number) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/chunk/update`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + chunk_size: size, + chunk_overlap: overlap + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const uploadDocToVectorDB = async (token: string, collection_name: string, file: File) => { const data = new FormData(); data.append('file', file); diff --git a/src/lib/components/documents/Settings/General.svelte b/src/lib/components/documents/Settings/General.svelte index c3c7df5bb9..038fa83ba8 100644 --- a/src/lib/components/documents/Settings/General.svelte +++ b/src/lib/components/documents/Settings/General.svelte @@ -1,6 +1,6 @@
{ - // console.log('submit'); + submitHandler(); saveHandler(); }} > @@ -93,14 +107,52 @@
+ +
+ +
+
Chunk Params
+ +
+
+
Chunk Size
+ +
+ +
+
+ +
+
Chunk Overlap
+ +
+ +
+
+
+
- + From 5270efa9e5cf5d39de326930344ac94611622935 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 17 Feb 2024 22:41:03 -0800 Subject: [PATCH 015/491] feat: editable rag template --- backend/apps/rag/main.py | 22 +++++++++++ backend/config.py | 15 ++++++++ src/lib/apis/rag/index.ts | 57 ++++++++++++++++++++++++++++ src/lib/utils/rag/index.ts | 32 +++++++++------- src/routes/(app)/+page.svelte | 6 ++- src/routes/(app)/c/[id]/+page.svelte | 6 ++- 6 files changed, 122 insertions(+), 16 deletions(-) diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 79981680f9..fbfba258df 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -62,6 +62,7 @@ from config import ( CHROMA_CLIENT, CHUNK_SIZE, CHUNK_OVERLAP, + RAG_TEMPLATE, ) from constants import ERROR_MESSAGES @@ -73,6 +74,8 @@ app = FastAPI() app.state.CHUNK_SIZE = CHUNK_SIZE app.state.CHUNK_OVERLAP = CHUNK_OVERLAP +app.state.RAG_TEMPLATE = RAG_TEMPLATE + origins = ["*"] @@ -154,6 +157,25 @@ async def update_chunk_params( } +@app.get("/template") +async def get_rag_template(user=Depends(get_current_user)): + return { + "status": True, + "template": app.state.RAG_TEMPLATE, + } + + +class RAGTemplateForm(BaseModel): + template: str + + +@app.post("/template/update") +async def update_rag_template(form_data: RAGTemplateForm, user=Depends(get_admin_user)): + # TODO: check template requirements + app.state.RAG_TEMPLATE = form_data.template + return {"status": True, "template": app.state.RAG_TEMPLATE} + + class QueryDocForm(BaseModel): collection_name: str query: str diff --git a/backend/config.py b/backend/config.py index f5acf06b70..440256c483 100644 --- a/backend/config.py +++ b/backend/config.py @@ -144,6 +144,21 @@ CHROMA_CLIENT = chromadb.PersistentClient( CHUNK_SIZE = 1500 CHUNK_OVERLAP = 100 + +RAG_TEMPLATE = """Use the following context as your learned knowledge, inside XML tags. + + [context] + + +When answer to user: +- If you don't know, just say that you don't know. +- If you don't know when you are not sure, ask for clarification. +Avoid mentioning that you obtained the information from the context. +And answer according to the language of the user's question. + +Given the context information, answer the query. +Query: [query]""" + #################################### # Transcribe #################################### diff --git a/src/lib/apis/rag/index.ts b/src/lib/apis/rag/index.ts index 5819badbdc..78c220b6f5 100644 --- a/src/lib/apis/rag/index.ts +++ b/src/lib/apis/rag/index.ts @@ -58,6 +58,63 @@ export const updateChunkParams = async (token: string, size: number, overlap: nu return res; }; +export const getRAGTemplate = async (token: string) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/template`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const updateRAGTemplate = async (token: string, template: string) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/template/update`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + template: template + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const uploadDocToVectorDB = async (token: string, collection_name: string, file: File) => { const data = new FormData(); data.append('file', file); diff --git a/src/lib/utils/rag/index.ts b/src/lib/utils/rag/index.ts index 6b219ef201..ba1f29f888 100644 --- a/src/lib/utils/rag/index.ts +++ b/src/lib/utils/rag/index.ts @@ -1,17 +1,21 @@ -export const RAGTemplate = (context: string, query: string) => { - let template = `Use the following context as your learned knowledge, inside XML tags. - - [context] - - - When answer to user: - - If you don't know, just say that you don't know. - - If you don't know when you are not sure, ask for clarification. - Avoid mentioning that you obtained the information from the context. - And answer according to the language of the user's question. - - Given the context information, answer the query. - Query: [query]`; +import { getRAGTemplate } from '$lib/apis/rag'; + +export const RAGTemplate = async (token: string, context: string, query: string) => { + let template = await getRAGTemplate(token).catch(() => { + return `Use the following context as your learned knowledge, inside XML tags. + + [context] + + + When answer to user: + - If you don't know, just say that you don't know. + - If you don't know when you are not sure, ask for clarification. + Avoid mentioning that you obtained the information from the context. + And answer according to the language of the user's question. + + Given the context information, answer the query. + Query: [query]`; + }); template = template.replace(/\[context\]/g, context); template = template.replace(/\[query\]/g, query); diff --git a/src/routes/(app)/+page.svelte b/src/routes/(app)/+page.svelte index 604cb544d2..1d91a61446 100644 --- a/src/routes/(app)/+page.svelte +++ b/src/routes/(app)/+page.svelte @@ -266,7 +266,11 @@ console.log(contextString); - history.messages[parentId].raContent = RAGTemplate(contextString, query); + history.messages[parentId].raContent = await RAGTemplate( + localStorage.token, + contextString, + query + ); history.messages[parentId].contexts = relevantContexts; await tick(); processing = ''; diff --git a/src/routes/(app)/c/[id]/+page.svelte b/src/routes/(app)/c/[id]/+page.svelte index aab03d74fa..b719ebf2b4 100644 --- a/src/routes/(app)/c/[id]/+page.svelte +++ b/src/routes/(app)/c/[id]/+page.svelte @@ -280,7 +280,11 @@ console.log(contextString); - history.messages[parentId].raContent = RAGTemplate(contextString, query); + history.messages[parentId].raContent = await RAGTemplate( + localStorage.token, + contextString, + query + ); history.messages[parentId].contexts = relevantContexts; await tick(); processing = ''; From 082d1d15c39bc37687afada7213875e367549055 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 17 Feb 2024 22:42:36 -0800 Subject: [PATCH 016/491] fix: template load issue --- src/lib/apis/rag/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/apis/rag/index.ts b/src/lib/apis/rag/index.ts index 78c220b6f5..ed36f0143c 100644 --- a/src/lib/apis/rag/index.ts +++ b/src/lib/apis/rag/index.ts @@ -82,7 +82,7 @@ export const getRAGTemplate = async (token: string) => { throw error; } - return res; + return res?.template ?? ''; }; export const updateRAGTemplate = async (token: string, template: string) => { From a31feccd64e53ef0934b3d59de03e9f3e50ada9e Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 17 Feb 2024 22:47:58 -0800 Subject: [PATCH 017/491] feat: editable rag template frontend --- .../documents/Settings/General.svelte | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/lib/components/documents/Settings/General.svelte b/src/lib/components/documents/Settings/General.svelte index 038fa83ba8..503cbc84d4 100644 --- a/src/lib/components/documents/Settings/General.svelte +++ b/src/lib/components/documents/Settings/General.svelte @@ -1,6 +1,12 @@ @@ -144,6 +155,15 @@ + +
+
RAG Template
+