From bbfe2fb1224350be7b9fba1527675c331e75926f Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sat, 23 Dec 2023 16:55:45 +0100 Subject: [PATCH 001/129] Added a better more generic gitignore which covers both python and nodejs --- .gitignore | 291 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) diff --git a/.gitignore b/.gitignore index 6635cf5542..1250aef96f 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,294 @@ node_modules !.env.example vite.config.js.timestamp-* vite.config.ts.timestamp-* +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# vuepress v2.x temp and cache directory +.temp +.cache + +# Docusaurus cache and generated files +.docusaurus + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* \ No newline at end of file From a580aa898466b83ae07975a04c331f8ea4de71b4 Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sat, 23 Dec 2023 18:33:15 +0100 Subject: [PATCH 002/129] general-compose-improvements --- docker-compose.api.yaml | 6 + docker-compose.gpu.yaml | 12 ++ docker-compose.yml => docker-compose.yaml | 16 +- run-compose.sh | 194 ++++++++++++++++++++++ 4 files changed, 214 insertions(+), 14 deletions(-) create mode 100644 docker-compose.api.yaml create mode 100644 docker-compose.gpu.yaml rename docker-compose.yml => docker-compose.yaml (62%) create mode 100755 run-compose.sh diff --git a/docker-compose.api.yaml b/docker-compose.api.yaml new file mode 100644 index 0000000000..5e4b222749 --- /dev/null +++ b/docker-compose.api.yaml @@ -0,0 +1,6 @@ +version: '3.8' + +services: + ollama: + ports: + - ${OLLAMA_WEBAPI_PORT-11434}:11434 diff --git a/docker-compose.gpu.yaml b/docker-compose.gpu.yaml new file mode 100644 index 0000000000..7df6b91a7b --- /dev/null +++ b/docker-compose.gpu.yaml @@ -0,0 +1,12 @@ +version: '3.8' + +services: + ollama: + deploy: + resources: + reservations: + devices: + - driver: ${OLLAMA_GPU_DRIVER-nvidia} + count: ${OLLAMA_GPU_COUNT-1} + capabilities: + - gpu diff --git a/docker-compose.yml b/docker-compose.yaml similarity index 62% rename from docker-compose.yml rename to docker-compose.yaml index b503635429..68e1954d70 100644 --- a/docker-compose.yml +++ b/docker-compose.yaml @@ -1,21 +1,9 @@ -version: '3.6' +version: '3.8' services: ollama: - # Uncomment below for GPU support - # deploy: - # resources: - # reservations: - # devices: - # - driver: nvidia - # count: 1 - # capabilities: - # - gpu volumes: - ollama:/root/.ollama - # Uncomment below to expose Ollama API outside the container stack - # ports: - # - 11434:11434 container_name: ollama pull_policy: always tty: true @@ -33,7 +21,7 @@ services: depends_on: - ollama ports: - - 3000:8080 + - ${OLLAMA_WEBUI_PORT-3000}:8080 environment: - "OLLAMA_API_BASE_URL=http://ollama:11434/api" extra_hosts: diff --git a/run-compose.sh b/run-compose.sh new file mode 100755 index 0000000000..dba87625ab --- /dev/null +++ b/run-compose.sh @@ -0,0 +1,194 @@ +#!/bin/bash + +# Define color and formatting codes +BOLD='\033[1m' +GREEN='\033[1;32m' +WHITE='\033[1;37m' +RED='\033[0;31m' +NC='\033[0m' # No Color +# Unicode character for tick mark +TICK='\u2713' + +# Detect GPU driver +get_gpu_driver() { + # Detect NVIDIA GPUs + if lspci | grep -i nvidia >/dev/null; then + echo "nvidia" + return + fi + + # Detect AMD GPUs (including GCN architecture check for amdgpu vs radeon) + if lspci | grep -i amd >/dev/null; then + # List of known GCN and later architecture cards + # This is a simplified list, and in a real-world scenario, you'd want a more comprehensive one + local gcn_and_later=("Radeon HD 7000" "Radeon HD 8000" "Radeon R5" "Radeon R7" "Radeon R9" "Radeon RX") + + # Get GPU information + local gpu_info=$(lspci | grep -i 'vga.*amd') + + for model in "${gcn_and_later[@]}"; do + if echo "$gpu_info" | grep -iq "$model"; then + echo "amdgpu" + return + fi + done + + # Default to radeon if no GCN or later architecture is detected + echo "radeon" + return + fi + + # Detect Intel GPUs + if lspci | grep -i intel >/dev/null; then + echo "i915" + return + fi + + # If no known GPU is detected + echo "Unknown or unsupported GPU driver" + exit 1 +} + +# Function for rolling animation +show_loading() { + local spin='-\|/' + local i=0 + + printf " " + + while kill -0 $1 2>/dev/null; do + i=$(( (i+1) %4 )) + printf "\b${spin:$i:1}" + sleep .1 + done + + # Replace the spinner with a tick + printf "\b${GREEN}${TICK}${NC}" +} + +# Usage information +usage() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --enable-gpu[count=COUNT] Enable GPU support with the specified count." + echo " --enable-api[port=PORT] Enable API and expose it on the specified port." + echo " --webui[port=PORT] Set the port for the web user interface." + echo "" + echo "Examples:" + echo " $0 --enable-gpu[count=1]" + echo " $0 --enable-api[port=11435]" + echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]" + echo "" + echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration." + echo "About the gpu to use, the script automatically detects it using the "lspci" command." + echo "In this case the gpu detected is: $(get_gpu_driver)" +} + +# Default values +gpu_count=1 +api_port=11435 +webui_port=3000 + +# Function to extract value from the parameter +extract_value() { + echo "$1" | sed -E 's/.*\[.*=(.*)\].*/\1/; t; s/.*//' +} + +# Check if no arguments were provided +# if [ $# -eq 0 ]; then +# usage +# exit 1 +# fi + +# Parse arguments +while [[ $# -gt 0 ]]; do + key="$1" + + case $key in + --enable-gpu*) + enable_gpu=true + value=$(extract_value "$key") + gpu_count=${value:-1} + ;; + --enable-api*) + enable_api=true + value=$(extract_value "$key") + api_port=${value:-11435} + ;; + --webui*) + value=$(extract_value "$key") + webui_port=${value:-3000} + ;; + -h|--help) + usage + exit + ;; + *) + # Unknown option + echo "Unknown option: $key" + usage + exit 1 + ;; + esac + shift # past argument or value +done + +DEFAULT_COMPOSE_COMMAND="docker compose -f docker-compose.yaml" +if [[ $enable_gpu == true ]]; then + # Validate and process command-line arguments + if [[ -n $gpu_count ]]; then + if ! [[ $gpu_count =~ ^[0-9]+$ ]]; then + echo "Invalid GPU count: $gpu_count" + exit 1 + fi + echo "Enabling GPU with $gpu_count GPUs" + # Add your GPU allocation logic here + export OLLAMA_GPU_DRIVER=$(get_gpu_driver) + fi + DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.gpu.yaml" +fi +if [[ $enable_api == true ]]; then + DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.api.yaml" + if [[ -n $api_port ]]; then + export OLLAMA_WEBAPI_PORT=$api_port # Set OLLAMA_WEBAPI_PORT environment variable + fi +fi +DEFAULT_COMPOSE_COMMAND+=" up -d > /dev/null 2>&1" + +# Recap of environment variables +echo +echo -e "${WHITE}${BOLD}Current Setup:${NC}" +echo -e " ${GREEN}${BOLD}GPU Driver:${NC} ${OLLAMA_GPU_DRIVER:-Not Enabled}" +echo -e " ${GREEN}${BOLD}WebAPI Port:${NC} ${OLLAMA_WEBAPI_PORT:-Not Enabled}" +echo -e " ${GREEN}${BOLD}WebUI Port:${NC} $webui_port" +echo + +# Ask for user acceptance +echo -ne "${WHITE}${BOLD}Do you want to proceed with current setup? (Y/n): ${NC}" +read -n1 -s choice + +if [[ $choice == "" || $choice == "y" ]]; then + # Execute the command with the current user + eval "docker compose down > /dev/null 2>&1; $DEFAULT_COMPOSE_COMMAND" & + + # Capture the background process PID + PID=$! + + # Display the loading animation + show_loading $PID + + # Wait for the command to finish + wait $PID + + echo + # Check exit status + if [ $? -eq 0 ]; then + echo -e "${GREEN}${BOLD}Compose project started successfully.${NC}" + else + echo -e "${RED}${BOLD}There was an error starting the compose project.${NC}" + fi +else + echo "Aborted." +fi + +echo \ No newline at end of file From 2994e1abcd30d307fa03246ee14a35802d00003c Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sat, 23 Dec 2023 18:49:49 +0100 Subject: [PATCH 003/129] fixed gpu count env --- run-compose.sh | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/run-compose.sh b/run-compose.sh index dba87625ab..5f0effa9c4 100755 --- a/run-compose.sh +++ b/run-compose.sh @@ -94,12 +94,6 @@ extract_value() { echo "$1" | sed -E 's/.*\[.*=(.*)\].*/\1/; t; s/.*//' } -# Check if no arguments were provided -# if [ $# -eq 0 ]; then -# usage -# exit 1 -# fi - # Parse arguments while [[ $# -gt 0 ]]; do key="$1" @@ -144,6 +138,7 @@ if [[ $enable_gpu == true ]]; then echo "Enabling GPU with $gpu_count GPUs" # Add your GPU allocation logic here export OLLAMA_GPU_DRIVER=$(get_gpu_driver) + export OLLAMA_GPU_COUNT=$gpu_count # Set OLLAMA_GPU_COUNT environment variable fi DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.gpu.yaml" fi @@ -159,6 +154,7 @@ DEFAULT_COMPOSE_COMMAND+=" up -d > /dev/null 2>&1" echo echo -e "${WHITE}${BOLD}Current Setup:${NC}" echo -e " ${GREEN}${BOLD}GPU Driver:${NC} ${OLLAMA_GPU_DRIVER:-Not Enabled}" +echo -e " ${GREEN}${BOLD}GPU Count:${NC} ${OLLAMA_GPU_COUNT:-Not Enabled}" echo -e " ${GREEN}${BOLD}WebAPI Port:${NC} ${OLLAMA_WEBAPI_PORT:-Not Enabled}" echo -e " ${GREEN}${BOLD}WebUI Port:${NC} $webui_port" echo From 9e1660e6bb583cc9dc8c3ae708a2cd1f39e9d4d5 Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sun, 24 Dec 2023 13:31:54 +0100 Subject: [PATCH 004/129] Be able to configure compose from a shell program --- run-compose.sh | 211 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100755 run-compose.sh diff --git a/run-compose.sh b/run-compose.sh new file mode 100755 index 0000000000..9498692589 --- /dev/null +++ b/run-compose.sh @@ -0,0 +1,211 @@ +#!/bin/bash + +# Define color and formatting codes +BOLD='\033[1m' +GREEN='\033[1;32m' +WHITE='\033[1;37m' +RED='\033[0;31m' +NC='\033[0m' # No Color +# Unicode character for tick mark +TICK='\u2713' + +# Detect GPU driver +get_gpu_driver() { + # Detect NVIDIA GPUs + if lspci | grep -i nvidia >/dev/null; then + echo "nvidia" + return + fi + + # Detect AMD GPUs (including GCN architecture check for amdgpu vs radeon) + if lspci | grep -i amd >/dev/null; then + # List of known GCN and later architecture cards + # This is a simplified list, and in a real-world scenario, you'd want a more comprehensive one + local gcn_and_later=("Radeon HD 7000" "Radeon HD 8000" "Radeon R5" "Radeon R7" "Radeon R9" "Radeon RX") + + # Get GPU information + local gpu_info=$(lspci | grep -i 'vga.*amd') + + for model in "${gcn_and_later[@]}"; do + if echo "$gpu_info" | grep -iq "$model"; then + echo "amdgpu" + return + fi + done + + # Default to radeon if no GCN or later architecture is detected + echo "radeon" + return + fi + + # Detect Intel GPUs + if lspci | grep -i intel >/dev/null; then + echo "i915" + return + fi + + # If no known GPU is detected + echo "Unknown or unsupported GPU driver" + exit 1 +} + +# Function for rolling animation +show_loading() { + local spin='-\|/' + local i=0 + + printf " " + + while kill -0 $1 2>/dev/null; do + i=$(( (i+1) %4 )) + printf "\b${spin:$i:1}" + sleep .1 + done + + # Replace the spinner with a tick + printf "\b${GREEN}${TICK}${NC}" +} + +# Usage information +usage() { + echo "Usage: $0 [OPTIONS]" + echo "Options:" + echo " --enable-gpu[count=COUNT] Enable GPU support with the specified count." + echo " --enable-api[port=PORT] Enable API and expose it on the specified port." + echo " --webui[port=PORT] Set the port for the web user interface." + echo " --data[folder=PATH] Bind mount for ollama data folder (by default will create the 'ollama' volume)." + echo " -q, --quiet Run script in headless mode." + echo " -h, --help Show this help message." + echo "" + echo "Examples:" + echo " $0 --enable-gpu[count=1]" + echo " $0 --enable-api[port=11435]" + echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]" + echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]" + echo "" + echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration." + echo "About the gpu to use, the script automatically detects it using the "lspci" command." + echo "In this case the gpu detected is: $(get_gpu_driver)" +} + +# Default values +gpu_count=1 +api_port=11435 +webui_port=3000 +headless=false + +# Function to extract value from the parameter +extract_value() { + echo "$1" | sed -E 's/.*\[.*=(.*)\].*/\1/; t; s/.*//' +} + +# Parse arguments +while [[ $# -gt 0 ]]; do + key="$1" + + case $key in + --enable-gpu*) + enable_gpu=true + value=$(extract_value "$key") + gpu_count=${value:-1} + ;; + --enable-api*) + enable_api=true + value=$(extract_value "$key") + api_port=${value:-11435} + ;; + --webui*) + value=$(extract_value "$key") + webui_port=${value:-3000} + ;; + --data*) + value=$(extract_value "$key") + data_dir=${value:-"./ollama-data"} + ;; + -q|--quiet) + headless=true + ;; + -h|--help) + usage + exit + ;; + *) + # Unknown option + echo "Unknown option: $key" + usage + exit 1 + ;; + esac + shift # past argument or value +done + +DEFAULT_COMPOSE_COMMAND="docker compose -f docker-compose.yaml" +if [[ $enable_gpu == true ]]; then + # Validate and process command-line arguments + if [[ -n $gpu_count ]]; then + if ! [[ $gpu_count =~ ^[0-9]+$ ]]; then + echo "Invalid GPU count: $gpu_count" + exit 1 + fi + # Add your GPU allocation logic here + export OLLAMA_GPU_DRIVER=$(get_gpu_driver) + export OLLAMA_GPU_COUNT=$gpu_count # Set OLLAMA_GPU_COUNT environment variable + fi + DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.gpu.yaml" +fi +if [[ $enable_api == true ]]; then + DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.api.yaml" + if [[ -n $api_port ]]; then + export OLLAMA_WEBAPI_PORT=$api_port # Set OLLAMA_WEBAPI_PORT environment variable + fi +fi +if [[ -n $data_dir ]]; then + DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.data.yaml" + export OLLAMA_DATA_DIR=$data_dir # Set OLLAMA_DATA_DIR environment variable +fi +DEFAULT_COMPOSE_COMMAND+=" up -d > /dev/null 2>&1" + +# Recap of environment variables +echo +echo -e "${WHITE}${BOLD}Current Setup:${NC}" +echo -e " ${GREEN}${BOLD}GPU Driver:${NC} ${OLLAMA_GPU_DRIVER:-Not Enabled}" +echo -e " ${GREEN}${BOLD}GPU Count:${NC} ${OLLAMA_GPU_COUNT:-Not Enabled}" +echo -e " ${GREEN}${BOLD}WebAPI Port:${NC} ${OLLAMA_WEBAPI_PORT:-Not Enabled}" +echo -e " ${GREEN}${BOLD}Data Folder:${NC} ${data_dir:-Using ollama volume}" +echo -e " ${GREEN}${BOLD}WebUI Port:${NC} $webui_port" +echo + +if [[ $headless == true ]]; then + echo -ne "${WHITE}${BOLD}Running in headless mode... ${NC}" + choice="y" +else + # Ask for user acceptance + echo -ne "${WHITE}${BOLD}Do you want to proceed with current setup? (Y/n): ${NC}" + read -n1 -s choice +fi + +if [[ $choice == "" || $choice == "y" ]]; then + # Execute the command with the current user + eval "docker compose down > /dev/null 2>&1; $DEFAULT_COMPOSE_COMMAND" & + + # Capture the background process PID + PID=$! + + # Display the loading animation + show_loading $PID + + # Wait for the command to finish + wait $PID + + echo + # Check exit status + if [ $? -eq 0 ]; then + echo -e "${GREEN}${BOLD}Compose project started successfully.${NC}" + else + echo -e "${RED}${BOLD}There was an error starting the compose project.${NC}" + fi +else + echo "Aborted." +fi + +echo \ No newline at end of file From 017825e437d3c6c7a3eae40c3ff58a59f439543a Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sun, 24 Dec 2023 13:38:06 +0100 Subject: [PATCH 005/129] added compose override for data volume --- docker-compose.data.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docker-compose.data.yaml diff --git a/docker-compose.data.yaml b/docker-compose.data.yaml new file mode 100644 index 0000000000..57d6fc372b --- /dev/null +++ b/docker-compose.data.yaml @@ -0,0 +1,6 @@ +version: '3.8' + +services: + ollama: + volumes: + - ${OLLAMA_DATA_DIR-./ollama-data}:/root/.ollama \ No newline at end of file From e2688dc2b17bd85a3a92ed67ec3d1b7d53acea4c Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sun, 24 Dec 2023 13:47:25 +0100 Subject: [PATCH 006/129] Use the built image on repository instead of the local one --- docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 68e1954d70..1a46500d30 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -16,7 +16,7 @@ services: args: OLLAMA_API_BASE_URL: '/ollama/api' dockerfile: Dockerfile - image: ollama-webui:latest + image: ghcr.io/ollama-webui/ollama-webui:main container_name: ollama-webui depends_on: - ollama From 16a2d0cdb08a7600489c502387c258e46495b9b0 Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sun, 24 Dec 2023 13:51:07 +0100 Subject: [PATCH 007/129] added option to specify if build image or not --- run-compose.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/run-compose.sh b/run-compose.sh index f8e019adc4..1db8cff029 100755 --- a/run-compose.sh +++ b/run-compose.sh @@ -74,6 +74,7 @@ usage() { echo " --enable-api[port=PORT] Enable API and expose it on the specified port." echo " --webui[port=PORT] Set the port for the web user interface." echo " --data[folder=PATH] Bind mount for ollama data folder (by default will create the 'ollama' volume)." + echo " --build Build the docker image before running the compose project." echo " -q, --quiet Run script in headless mode." echo " -h, --help Show this help message." echo "" @@ -93,6 +94,7 @@ gpu_count=1 api_port=11435 webui_port=3000 headless=false +build_image=false # Function to extract value from the parameter extract_value() { @@ -122,6 +124,9 @@ while [[ $# -gt 0 ]]; do value=$(extract_value "$key") data_dir=${value:-"./ollama-data"} ;; + --build) + build_image=true + ;; -q|--quiet) headless=true ;; @@ -164,7 +169,13 @@ if [[ -n $data_dir ]]; then DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.data.yaml" export OLLAMA_DATA_DIR=$data_dir # Set OLLAMA_DATA_DIR environment variable fi -DEFAULT_COMPOSE_COMMAND+=" up -d > /dev/null 2>&1" +DEFAULT_COMPOSE_COMMAND+=" up -d" +DEFAULT_COMPOSE_COMMAND+=" --remove-orphans" +DEFAULT_COMPOSE_COMMAND+=" --force-recreate" +if [[ -n $build_image ]]; then + DEFAULT_COMPOSE_COMMAND+=" --build" +fi +DEFAULT_COMPOSE_COMMAND+=" > /dev/null 2>&1" # Recap of environment variables echo @@ -193,7 +204,7 @@ read -n1 -s choice if [[ $choice == "" || $choice == "y" ]]; then # Execute the command with the current user - eval "docker compose down > /dev/null 2>&1; $DEFAULT_COMPOSE_COMMAND" & + eval "$DEFAULT_COMPOSE_COMMAND" & # Capture the background process PID PID=$! From 567b88bb006630f9542a341e92a675845e416005 Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sun, 24 Dec 2023 14:03:41 +0100 Subject: [PATCH 008/129] added drop to kill and build capabilities --- run-compose.sh | 74 +++++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/run-compose.sh b/run-compose.sh index 1db8cff029..25cc12db31 100755 --- a/run-compose.sh +++ b/run-compose.sh @@ -75,6 +75,7 @@ usage() { echo " --webui[port=PORT] Set the port for the web user interface." echo " --data[folder=PATH] Bind mount for ollama data folder (by default will create the 'ollama' volume)." echo " --build Build the docker image before running the compose project." + echo " --drop Drop the compose project." echo " -q, --quiet Run script in headless mode." echo " -h, --help Show this help message." echo "" @@ -95,6 +96,7 @@ api_port=11435 webui_port=3000 headless=false build_image=false +kill_compose=false # Function to extract value from the parameter extract_value() { @@ -124,6 +126,9 @@ while [[ $# -gt 0 ]]; do value=$(extract_value "$key") data_dir=${value:-"./ollama-data"} ;; + --drop) + kill_compose=true + ;; --build) build_image=true ;; @@ -144,38 +149,43 @@ while [[ $# -gt 0 ]]; do shift # past argument or value done -DEFAULT_COMPOSE_COMMAND="docker compose -f docker-compose.yaml" -if [[ $enable_gpu == true ]]; then - # Validate and process command-line arguments - if [[ -n $gpu_count ]]; then - if ! [[ $gpu_count =~ ^[0-9]+$ ]]; then - echo "Invalid GPU count: $gpu_count" - exit 1 +if [[ $kill_compose == true ]]; then + docker compose down --remove-orphans + echo -e "${GREEN}${BOLD}Compose project dropped successfully.${NC}" + exit +else + DEFAULT_COMPOSE_COMMAND="docker compose -f docker-compose.yaml" + if [[ $enable_gpu == true ]]; then + # Validate and process command-line arguments + if [[ -n $gpu_count ]]; then + if ! [[ $gpu_count =~ ^[0-9]+$ ]]; then + echo "Invalid GPU count: $gpu_count" + exit 1 + fi + echo "Enabling GPU with $gpu_count GPUs" + # Add your GPU allocation logic here + export OLLAMA_GPU_DRIVER=$(get_gpu_driver) + export OLLAMA_GPU_COUNT=$gpu_count # Set OLLAMA_GPU_COUNT environment variable fi - echo "Enabling GPU with $gpu_count GPUs" - # Add your GPU allocation logic here - export OLLAMA_GPU_DRIVER=$(get_gpu_driver) - export OLLAMA_GPU_COUNT=$gpu_count # Set OLLAMA_GPU_COUNT environment variable + DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.gpu.yaml" fi - DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.gpu.yaml" -fi -if [[ $enable_api == true ]]; then - DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.api.yaml" - if [[ -n $api_port ]]; then - export OLLAMA_WEBAPI_PORT=$api_port # Set OLLAMA_WEBAPI_PORT environment variable + if [[ $enable_api == true ]]; then + DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.api.yaml" + if [[ -n $api_port ]]; then + export OLLAMA_WEBAPI_PORT=$api_port # Set OLLAMA_WEBAPI_PORT environment variable + fi + fi + if [[ -n $data_dir ]]; then + DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.data.yaml" + export OLLAMA_DATA_DIR=$data_dir # Set OLLAMA_DATA_DIR environment variable + fi + DEFAULT_COMPOSE_COMMAND+=" up -d" + DEFAULT_COMPOSE_COMMAND+=" --remove-orphans" + DEFAULT_COMPOSE_COMMAND+=" --force-recreate" + if [[ $build_image == true ]]; then + DEFAULT_COMPOSE_COMMAND+=" --build" fi fi -if [[ -n $data_dir ]]; then - DEFAULT_COMPOSE_COMMAND+=" -f docker-compose.data.yaml" - export OLLAMA_DATA_DIR=$data_dir # Set OLLAMA_DATA_DIR environment variable -fi -DEFAULT_COMPOSE_COMMAND+=" up -d" -DEFAULT_COMPOSE_COMMAND+=" --remove-orphans" -DEFAULT_COMPOSE_COMMAND+=" --force-recreate" -if [[ -n $build_image ]]; then - DEFAULT_COMPOSE_COMMAND+=" --build" -fi -DEFAULT_COMPOSE_COMMAND+=" > /dev/null 2>&1" # Recap of environment variables echo @@ -195,12 +205,8 @@ else echo -ne "${WHITE}${BOLD}Do you want to proceed with current setup? (Y/n): ${NC}" read -n1 -s choice fi -echo -e " ${GREEN}${BOLD}WebUI Port:${NC} $webui_port" -echo -# Ask for user acceptance -echo -ne "${WHITE}${BOLD}Do you want to proceed with current setup? (Y/n): ${NC}" -read -n1 -s choice +echo if [[ $choice == "" || $choice == "y" ]]; then # Execute the command with the current user @@ -210,7 +216,7 @@ if [[ $choice == "" || $choice == "y" ]]; then PID=$! # Display the loading animation - show_loading $PID + #show_loading $PID # Wait for the command to finish wait $PID From 7063f00b71ad3276ac43f6d450b322dbcb945c88 Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sun, 24 Dec 2023 14:21:34 +0100 Subject: [PATCH 009/129] added the drop capability and updated readme accordingly --- README.md | 15 ++++++++++++--- run-compose.sh | 10 ++++++---- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 5af1a6c424..756ac30cb6 100644 --- a/README.md +++ b/README.md @@ -73,13 +73,22 @@ Don't forget to explore our sibling project, [OllamaHub](https://ollamahub.com/) ### Installing Both Ollama and Ollama Web UI Using Docker Compose -If you don't have Ollama installed yet, you can use the provided Docker Compose file for a hassle-free installation. Simply run the following command: +If you don't have Ollama installed yet, you can use the provided bash script for a hassle-free installation. Simply run the following command: +For cpu-only container ```bash -docker compose up -d --build +chmod +x run-compose.sh && ./run-compose.sh ``` -This command will install both Ollama and Ollama Web UI on your system. Ensure to modify the `compose.yaml` file for GPU support and Exposing Ollama API outside the container stack if needed. +For gpu-enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)) +```bash +chmod +x run-compose.sh && ./run-compose.sh --enable-gpu[count=1] +``` + +Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example: +```bash +./run-compose.sh --build --enable-gpu[count=1] +``` ### Installing Ollama Web UI Only diff --git a/run-compose.sh b/run-compose.sh index 25cc12db31..7c7ceb7145 100755 --- a/run-compose.sh +++ b/run-compose.sh @@ -80,10 +80,12 @@ usage() { echo " -h, --help Show this help message." echo "" echo "Examples:" - echo " $0 --enable-gpu[count=1]" - echo " $0 --enable-api[port=11435]" - echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]" - echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]" + echo " ./$0 --drop" + echo " ./$0 --enable-gpu[count=1]" + echo " ./$0 --enable-api[port=11435]" + echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]" + echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]" + echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build" echo "" echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration." echo "About the gpu to use, the script automatically detects it using the "lspci" command." From d14d26bdfd607793d86927d7667f8ccac029068a Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sun, 24 Dec 2023 16:34:33 +0100 Subject: [PATCH 010/129] Added support for kubernetes, kustomize, helm --- kubernetes/helm/.helmignore | 0 kubernetes/helm/Chart.yaml | 5 +++ .../helm/templates/ollama-namespace.yaml | 4 ++ kubernetes/helm/templates/ollama-service.yaml | 12 ++++++ .../helm/templates/ollama-statefulset.yaml | 38 +++++++++++++++++++ .../helm/templates/webui-deployment.yaml | 28 ++++++++++++++ kubernetes/helm/templates/webui-ingress.yaml | 20 ++++++++++ kubernetes/helm/templates/webui-service.yaml | 15 ++++++++ kubernetes/helm/values.yaml | 23 +++++++++++ .../manifest/base/ollama-namespace.yaml | 4 ++ kubernetes/manifest/base/ollama-service.yaml | 12 ++++++ .../manifest/base/ollama-statefulset.yaml | 37 ++++++++++++++++++ .../manifest/base/webui-deployment.yaml | 28 ++++++++++++++ kubernetes/manifest/base/webui-ingress.yaml | 20 ++++++++++ kubernetes/manifest/base/webui-service.yaml | 15 ++++++++ kubernetes/manifest/kustomization.yaml | 12 ++++++ .../patches/ollama-statefulset-gpu.yaml | 17 +++++++++ 17 files changed, 290 insertions(+) create mode 100644 kubernetes/helm/.helmignore create mode 100644 kubernetes/helm/Chart.yaml create mode 100644 kubernetes/helm/templates/ollama-namespace.yaml create mode 100644 kubernetes/helm/templates/ollama-service.yaml create mode 100644 kubernetes/helm/templates/ollama-statefulset.yaml create mode 100644 kubernetes/helm/templates/webui-deployment.yaml create mode 100644 kubernetes/helm/templates/webui-ingress.yaml create mode 100644 kubernetes/helm/templates/webui-service.yaml create mode 100644 kubernetes/helm/values.yaml create mode 100644 kubernetes/manifest/base/ollama-namespace.yaml create mode 100644 kubernetes/manifest/base/ollama-service.yaml create mode 100644 kubernetes/manifest/base/ollama-statefulset.yaml create mode 100644 kubernetes/manifest/base/webui-deployment.yaml create mode 100644 kubernetes/manifest/base/webui-ingress.yaml create mode 100644 kubernetes/manifest/base/webui-service.yaml create mode 100644 kubernetes/manifest/kustomization.yaml create mode 100644 kubernetes/manifest/patches/ollama-statefulset-gpu.yaml diff --git a/kubernetes/helm/.helmignore b/kubernetes/helm/.helmignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/kubernetes/helm/Chart.yaml b/kubernetes/helm/Chart.yaml new file mode 100644 index 0000000000..9c6ce1e7a3 --- /dev/null +++ b/kubernetes/helm/Chart.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: ollama-webui +description: "Ollama Web UI: A User-Friendly Web Interface for Chat Interactions 👋" +version: 0.1.0 +icon: https://raw.githubusercontent.com/ollama-webui/ollama-webui/main/static/favicon.png diff --git a/kubernetes/helm/templates/ollama-namespace.yaml b/kubernetes/helm/templates/ollama-namespace.yaml new file mode 100644 index 0000000000..59f794477a --- /dev/null +++ b/kubernetes/helm/templates/ollama-namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Values.namespace }} \ No newline at end of file diff --git a/kubernetes/helm/templates/ollama-service.yaml b/kubernetes/helm/templates/ollama-service.yaml new file mode 100644 index 0000000000..afa25e38b2 --- /dev/null +++ b/kubernetes/helm/templates/ollama-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: ollama-service + namespace: {{ .Values.namespace }} +spec: + selector: + app: ollama + ports: + - protocol: TCP + port: {{ .Values.ollama.servicePort }} + targetPort: {{ .Values.ollama.servicePort }} \ No newline at end of file diff --git a/kubernetes/helm/templates/ollama-statefulset.yaml b/kubernetes/helm/templates/ollama-statefulset.yaml new file mode 100644 index 0000000000..755ed008ac --- /dev/null +++ b/kubernetes/helm/templates/ollama-statefulset.yaml @@ -0,0 +1,38 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: ollama + namespace: {{ .Values.namespace }} +spec: + serviceName: "ollama" + replicas: {{ .Values.ollama.replicaCount }} + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + containers: + - name: ollama + image: {{ .Values.ollama.image }} + ports: + - containerPort: {{ .Values.ollama.servicePort }} + resources: + limits: + cpu: {{ .Values.ollama.resources.limits.cpu }} + memory: {{ .Values.ollama.resources.limits.memory }} + nvidia.com/gpu: {{ .Values.ollama.resources.limits.gpu }} + volumeMounts: + - name: ollama-volume + mountPath: /root/.ollama + tty: true + volumeClaimTemplates: + - metadata: + name: ollama-volume + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 1Gi \ No newline at end of file diff --git a/kubernetes/helm/templates/webui-deployment.yaml b/kubernetes/helm/templates/webui-deployment.yaml new file mode 100644 index 0000000000..ec4fc79f49 --- /dev/null +++ b/kubernetes/helm/templates/webui-deployment.yaml @@ -0,0 +1,28 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama-webui-deployment + namespace: {{ .Values.namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: ollama-webui + template: + metadata: + labels: + app: ollama-webui + spec: + containers: + - name: ollama-webui + image: ghcr.io/ollama-webui/ollama-webui:main + ports: + - containerPort: 8080 + resources: + limits: + cpu: "500m" + memory: "500Mi" + env: + - name: OLLAMA_API_BASE_URL + value: "http://ollama-service.{{ .Values.namespace }}.svc.cluster.local:{{ .Values.ollama.servicePort }}/api" + tty: true \ No newline at end of file diff --git a/kubernetes/helm/templates/webui-ingress.yaml b/kubernetes/helm/templates/webui-ingress.yaml new file mode 100644 index 0000000000..3970825d13 --- /dev/null +++ b/kubernetes/helm/templates/webui-ingress.yaml @@ -0,0 +1,20 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ollama-webui-ingress + namespace: {{ .Values.namespace }} + #annotations: + # Use appropriate annotations for your Ingress controller, e.g., for NGINX: + # nginx.ingress.kubernetes.io/rewrite-target: / +spec: + rules: + - host: {{ .Values.webui.ingress.host }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: ollama-webui-service + port: + number: {{ .Values.webui.servicePort }} diff --git a/kubernetes/helm/templates/webui-service.yaml b/kubernetes/helm/templates/webui-service.yaml new file mode 100644 index 0000000000..dd60585584 --- /dev/null +++ b/kubernetes/helm/templates/webui-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: ollama-webui-service + namespace: {{ .Values.namespace }} +spec: + type: NodePort # Use LoadBalancer if you're on a cloud that supports it + selector: + app: ollama-webui + ports: + - protocol: TCP + port: {{ .Values.webui.servicePort }} + targetPort: {{ .Values.webui.servicePort }} + # If using NodePort, you can optionally specify the nodePort: + # nodePort: 30000 \ No newline at end of file diff --git a/kubernetes/helm/values.yaml b/kubernetes/helm/values.yaml new file mode 100644 index 0000000000..61372736eb --- /dev/null +++ b/kubernetes/helm/values.yaml @@ -0,0 +1,23 @@ +namespace: ollama-namespace + +ollama: + replicaCount: 1 + image: ollama/ollama:latest + servicePort: 11434 + resources: + limits: + cpu: "2000m" + memory: "2Gi" + nvidia.com/gpu: "1" + volumeSize: 1Gi + +webui: + replicaCount: 1 + image: ghcr.io/ollama-webui/ollama-webui:main + servicePort: 8080 + resources: + limits: + cpu: "500m" + memory: "500Mi" + ingress: + host: ollama.minikube.local diff --git a/kubernetes/manifest/base/ollama-namespace.yaml b/kubernetes/manifest/base/ollama-namespace.yaml new file mode 100644 index 0000000000..f296eb206d --- /dev/null +++ b/kubernetes/manifest/base/ollama-namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: ollama-namespace \ No newline at end of file diff --git a/kubernetes/manifest/base/ollama-service.yaml b/kubernetes/manifest/base/ollama-service.yaml new file mode 100644 index 0000000000..a9467fc445 --- /dev/null +++ b/kubernetes/manifest/base/ollama-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: ollama-service + namespace: ollama-namespace +spec: + selector: + app: ollama + ports: + - protocol: TCP + port: 11434 + targetPort: 11434 \ No newline at end of file diff --git a/kubernetes/manifest/base/ollama-statefulset.yaml b/kubernetes/manifest/base/ollama-statefulset.yaml new file mode 100644 index 0000000000..ee63faa955 --- /dev/null +++ b/kubernetes/manifest/base/ollama-statefulset.yaml @@ -0,0 +1,37 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: ollama + namespace: ollama-namespace +spec: + serviceName: "ollama" + replicas: 1 + selector: + matchLabels: + app: ollama + template: + metadata: + labels: + app: ollama + spec: + containers: + - name: ollama + image: ollama/ollama:latest + ports: + - containerPort: 11434 + resources: + limits: + cpu: "2000m" + memory: "2Gi" + volumeMounts: + - name: ollama-volume + mountPath: /root/.ollama + tty: true + volumeClaimTemplates: + - metadata: + name: ollama-volume + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 1Gi \ No newline at end of file diff --git a/kubernetes/manifest/base/webui-deployment.yaml b/kubernetes/manifest/base/webui-deployment.yaml new file mode 100644 index 0000000000..58de036808 --- /dev/null +++ b/kubernetes/manifest/base/webui-deployment.yaml @@ -0,0 +1,28 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ollama-webui-deployment + namespace: ollama-namespace +spec: + replicas: 1 + selector: + matchLabels: + app: ollama-webui + template: + metadata: + labels: + app: ollama-webui + spec: + containers: + - name: ollama-webui + image: ghcr.io/ollama-webui/ollama-webui:main + ports: + - containerPort: 8080 + resources: + limits: + cpu: "500m" + memory: "500Mi" + env: + - name: OLLAMA_API_BASE_URL + value: "http://ollama-service.ollama-namespace.svc.cluster.local:11434/api" + tty: true \ No newline at end of file diff --git a/kubernetes/manifest/base/webui-ingress.yaml b/kubernetes/manifest/base/webui-ingress.yaml new file mode 100644 index 0000000000..0038807cbf --- /dev/null +++ b/kubernetes/manifest/base/webui-ingress.yaml @@ -0,0 +1,20 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: ollama-webui-ingress + namespace: ollama-namespace + #annotations: + # Use appropriate annotations for your Ingress controller, e.g., for NGINX: + # nginx.ingress.kubernetes.io/rewrite-target: / +spec: + rules: + - host: ollama.minikube.local + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: ollama-webui-service + port: + number: 8080 diff --git a/kubernetes/manifest/base/webui-service.yaml b/kubernetes/manifest/base/webui-service.yaml new file mode 100644 index 0000000000..b41daeafb9 --- /dev/null +++ b/kubernetes/manifest/base/webui-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: ollama-webui-service + namespace: ollama-namespace +spec: + type: NodePort # Use LoadBalancer if you're on a cloud that supports it + selector: + app: ollama-webui + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 + # If using NodePort, you can optionally specify the nodePort: + # nodePort: 30000 \ No newline at end of file diff --git a/kubernetes/manifest/kustomization.yaml b/kubernetes/manifest/kustomization.yaml new file mode 100644 index 0000000000..a4b03d9619 --- /dev/null +++ b/kubernetes/manifest/kustomization.yaml @@ -0,0 +1,12 @@ +resources: +- base/ollama-namespace.yaml +- base/ollama-service.yaml +- base/ollama-statefulset.yaml +- base/webui-deployment.yaml +- base/webui-service.yaml +- base/webui-ingress.yaml + +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +patches: +- path: patches/ollama-statefulset-gpu.yaml diff --git a/kubernetes/manifest/patches/ollama-statefulset-gpu.yaml b/kubernetes/manifest/patches/ollama-statefulset-gpu.yaml new file mode 100644 index 0000000000..54e5aba650 --- /dev/null +++ b/kubernetes/manifest/patches/ollama-statefulset-gpu.yaml @@ -0,0 +1,17 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: ollama + namespace: ollama-namespace +spec: + selector: + matchLabels: + app: ollama + serviceName: "ollama" + template: + spec: + containers: + - name: ollama + resources: + limits: + nvidia.com/gpu: "1" From bdf2a67df69db1d154a49e947203f51dcfc47ab1 Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sun, 24 Dec 2023 16:49:56 +0100 Subject: [PATCH 011/129] fixed version and gpu default value --- kubernetes/helm/Chart.yaml | 2 +- kubernetes/helm/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kubernetes/helm/Chart.yaml b/kubernetes/helm/Chart.yaml index 9c6ce1e7a3..52683b65e6 100644 --- a/kubernetes/helm/Chart.yaml +++ b/kubernetes/helm/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 name: ollama-webui description: "Ollama Web UI: A User-Friendly Web Interface for Chat Interactions 👋" -version: 0.1.0 +version: 1.0.0 icon: https://raw.githubusercontent.com/ollama-webui/ollama-webui/main/static/favicon.png diff --git a/kubernetes/helm/values.yaml b/kubernetes/helm/values.yaml index 61372736eb..bfdf152576 100644 --- a/kubernetes/helm/values.yaml +++ b/kubernetes/helm/values.yaml @@ -8,7 +8,7 @@ ollama: limits: cpu: "2000m" memory: "2Gi" - nvidia.com/gpu: "1" + nvidia.com/gpu: "0" volumeSize: 1Gi webui: From 8312841b10778fbc09f5d7dba364d14386e705c9 Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sun, 24 Dec 2023 16:50:05 +0100 Subject: [PATCH 012/129] Updated readme accordingly --- README.md | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 756ac30cb6..aea5c8a51a 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Also check our sibling project, [OllamaHub](https://ollamahub.com/), where you c - ⚡ **Swift Responsiveness**: Enjoy fast and responsive performance. -- 🚀 **Effortless Setup**: Install seamlessly using Docker for a hassle-free experience. +- 🚀 **Effortless Setup**: Install seamlessly using Docker or Kubernetes (kubectl, kustomize or helm) for a hassle-free experience. - 💻 **Code Syntax Highlighting**: Enjoy enhanced code readability with our syntax highlighting feature. @@ -90,6 +90,33 @@ Note that both the above commands will use the latest production docker image in ./run-compose.sh --build --enable-gpu[count=1] ``` +### Installing Both Ollama and Ollama Web UI Using Kustomize +For cpu-only pod +```bash +kubectl apply -f ./kubernetes/manifest/base +``` +For gpu-enabled pod +```bash +kubectl apply -k ./kubernetes/manifest +``` + +### Installing Both Ollama and Ollama Web UI Using Helm +Package Helm file first +```bash +helm package ./kubernetes/helm/ +``` + +For cpu-only pod +```bash +helm install ollama-webui ./ollama-webui-*.tgz +``` +For gpu-enabled pod +```bash +helm install ollama-webui ./ollama-webui-*.tgz --set ollama.resources.limits.nvidia.com/gpu="1" +``` + +Check the `kubernetes/helm/values.yaml` file to know which parameters are available for customization + ### Installing Ollama Web UI Only #### Prerequisites From f4bf7773a6a766a9c6d80b3c28cef6758dcc8f25 Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Tue, 26 Dec 2023 02:11:22 +0100 Subject: [PATCH 013/129] Update run-compose.sh --- run-compose.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/run-compose.sh b/run-compose.sh index 7c7ceb7145..0557bce95d 100755 --- a/run-compose.sh +++ b/run-compose.sh @@ -80,12 +80,12 @@ usage() { echo " -h, --help Show this help message." echo "" echo "Examples:" - echo " ./$0 --drop" - echo " ./$0 --enable-gpu[count=1]" - echo " ./$0 --enable-api[port=11435]" - echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]" - echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]" - echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build" + echo " $0 --drop" + echo " $0 --enable-gpu[count=1]" + echo " $0 --enable-api[port=11435]" + echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]" + echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]" + echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build" echo "" echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration." echo "About the gpu to use, the script automatically detects it using the "lspci" command." @@ -234,4 +234,4 @@ else echo "Aborted." fi -echo \ No newline at end of file +echo From 54e89a451649bcbb41942c8ad9f00290ff3e7f85 Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Tue, 26 Dec 2023 02:28:45 +0100 Subject: [PATCH 014/129] Restored docker compose configuration Also added the override for enabling GPU and better explained SO and hardware limitations --- README.md | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index aea5c8a51a..be6b508c5d 100644 --- a/README.md +++ b/README.md @@ -71,23 +71,40 @@ Don't forget to explore our sibling project, [OllamaHub](https://ollamahub.com/) ## How to Install 🚀 -### Installing Both Ollama and Ollama Web UI Using Docker Compose +### Installing Both Ollama and Ollama Web UI Using Provided run-compose.sh bash script +Also available on Windows under any docker-enabled WSL2 linux distro (you have to enable it from Docker Desktop) -If you don't have Ollama installed yet, you can use the provided bash script for a hassle-free installation. Simply run the following command: - -For cpu-only container +Simply run the following command: +Grant execute permission to script ```bash -chmod +x run-compose.sh && ./run-compose.sh +chmod +x run-compose.sh ``` -For gpu-enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)) +For CPU only container ```bash -chmod +x run-compose.sh && ./run-compose.sh --enable-gpu[count=1] +./run-compose.sh +``` + +For GPU enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)) +Warning! A GPU-enabled installation has only been tested using linux and nvidia GPU, full functionalities are not guaranteed under Windows or Macos or using a different GPU +```bash +./run-compose.sh --enable-gpu ``` Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example: ```bash -./run-compose.sh --build --enable-gpu[count=1] +./run-compose.sh --enable-gpu --build +``` + +### Installing Both Ollama and Ollama Web UI Using Docker Compose +To install using docker compose script as CPU-only installation simply run this command +```bash +docker compose up -d +``` + +for a GPU-enabled installation (provided you installed the necessary gpu drivers and you are using nvidia) +```bash +docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up -d ``` ### Installing Both Ollama and Ollama Web UI Using Kustomize From 02ce0c47f48fa9cd86dbc8266457f75e982ba147 Mon Sep 17 00:00:00 2001 From: Kenneth Bingham Date: Wed, 27 Dec 2023 19:49:51 -0500 Subject: [PATCH 015/129] let ingress be enabled by default --- kubernetes/helm/templates/webui-ingress.yaml | 9 ++++++--- kubernetes/helm/values.yaml | 4 ++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/kubernetes/helm/templates/webui-ingress.yaml b/kubernetes/helm/templates/webui-ingress.yaml index 3970825d13..84f819f378 100644 --- a/kubernetes/helm/templates/webui-ingress.yaml +++ b/kubernetes/helm/templates/webui-ingress.yaml @@ -1,11 +1,13 @@ +{{- if .Values.webui.ingress.enabled }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: ollama-webui-ingress namespace: {{ .Values.namespace }} - #annotations: - # Use appropriate annotations for your Ingress controller, e.g., for NGINX: - # nginx.ingress.kubernetes.io/rewrite-target: / +{{- if .Values.webui.ingress.annotations }} + annotations: +{{ toYaml .Values.webui.ingress.annotations | trimSuffix "\n" | indent 4 }} +{{- end }} spec: rules: - host: {{ .Values.webui.ingress.host }} @@ -18,3 +20,4 @@ spec: name: ollama-webui-service port: number: {{ .Values.webui.servicePort }} +{{- end }} diff --git a/kubernetes/helm/values.yaml b/kubernetes/helm/values.yaml index bfdf152576..f115f82fe2 100644 --- a/kubernetes/helm/values.yaml +++ b/kubernetes/helm/values.yaml @@ -20,4 +20,8 @@ webui: cpu: "500m" memory: "500Mi" ingress: + enabled: true + annotations: + # Use appropriate annotations for your Ingress controller, e.g., for NGINX: + # nginx.ingress.kubernetes.io/rewrite-target: / host: ollama.minikube.local From b42b2e7890b403d798628dbe3089c2887d3c0fa9 Mon Sep 17 00:00:00 2001 From: braveokafor Date: Thu, 28 Dec 2023 16:28:09 +0100 Subject: [PATCH 016/129] Added nodeSelectors for allocating GPU nodePools in the cloud and configured volumes for WebUI --- kubernetes/helm/templates/ollama-service.yaml | 1 + .../helm/templates/ollama-statefulset.yaml | 29 +++++++++++++++---- .../helm/templates/webui-deployment.yaml | 22 ++++++++++---- kubernetes/helm/templates/webui-pvc.yaml | 12 ++++++++ kubernetes/helm/templates/webui-service.yaml | 2 +- kubernetes/helm/values.yaml | 11 +++++++ 6 files changed, 64 insertions(+), 13 deletions(-) create mode 100644 kubernetes/helm/templates/webui-pvc.yaml diff --git a/kubernetes/helm/templates/ollama-service.yaml b/kubernetes/helm/templates/ollama-service.yaml index afa25e38b2..54558473a3 100644 --- a/kubernetes/helm/templates/ollama-service.yaml +++ b/kubernetes/helm/templates/ollama-service.yaml @@ -4,6 +4,7 @@ metadata: name: ollama-service namespace: {{ .Values.namespace }} spec: + type: {{ .Values.ollama.service.type }} selector: app: ollama ports: diff --git a/kubernetes/helm/templates/ollama-statefulset.yaml b/kubernetes/helm/templates/ollama-statefulset.yaml index 755ed008ac..83cb6883f7 100644 --- a/kubernetes/helm/templates/ollama-statefulset.yaml +++ b/kubernetes/helm/templates/ollama-statefulset.yaml @@ -19,15 +19,32 @@ spec: image: {{ .Values.ollama.image }} ports: - containerPort: {{ .Values.ollama.servicePort }} - resources: - limits: - cpu: {{ .Values.ollama.resources.limits.cpu }} - memory: {{ .Values.ollama.resources.limits.memory }} - nvidia.com/gpu: {{ .Values.ollama.resources.limits.gpu }} + env: + {{- if .Values.ollama.gpu.enabled }} + - name: PATH + value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib:/usr/local/nvidia/lib64 + - name: NVIDIA_DRIVER_CAPABILITIES + value: compute,utility + {{- end}} + {{- if .Values.ollama.resources }} + resources: {{- toYaml .Values.ollama.resources | nindent 10 }} + {{- end }} volumeMounts: - name: ollama-volume mountPath: /root/.ollama tty: true + {{- with .Values.ollama.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + tolerations: + {{- if .Values.ollama.gpu.enabled }} + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + {{- end }} volumeClaimTemplates: - metadata: name: ollama-volume @@ -35,4 +52,4 @@ spec: accessModes: [ "ReadWriteOnce" ] resources: requests: - storage: 1Gi \ No newline at end of file + storage: {{ .Values.ollama.volumeSize }} \ No newline at end of file diff --git a/kubernetes/helm/templates/webui-deployment.yaml b/kubernetes/helm/templates/webui-deployment.yaml index ec4fc79f49..d9721ee05c 100644 --- a/kubernetes/helm/templates/webui-deployment.yaml +++ b/kubernetes/helm/templates/webui-deployment.yaml @@ -15,14 +15,24 @@ spec: spec: containers: - name: ollama-webui - image: ghcr.io/ollama-webui/ollama-webui:main + image: {{ .Values.webui.image }} ports: - containerPort: 8080 - resources: - limits: - cpu: "500m" - memory: "500Mi" + {{- if .Values.webui.resources }} + resources: {{- toYaml .Values.webui.resources | nindent 10 }} + {{- end }} + volumeMounts: + - name: webui-volume + mountPath: /app/backend/data env: - name: OLLAMA_API_BASE_URL value: "http://ollama-service.{{ .Values.namespace }}.svc.cluster.local:{{ .Values.ollama.servicePort }}/api" - tty: true \ No newline at end of file + tty: true + {{- with .Values.webui.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: webui-volume + persistentVolumeClaim: + claimName: ollama-webui-pvc \ No newline at end of file diff --git a/kubernetes/helm/templates/webui-pvc.yaml b/kubernetes/helm/templates/webui-pvc.yaml new file mode 100644 index 0000000000..e9961aa8d1 --- /dev/null +++ b/kubernetes/helm/templates/webui-pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + app: ollama-webui + name: ollama-webui-pvc + namespace: {{ .Values.namespace }} +spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: {{ .Values.webui.volumeSize }} \ No newline at end of file diff --git a/kubernetes/helm/templates/webui-service.yaml b/kubernetes/helm/templates/webui-service.yaml index dd60585584..7fefa4fd4f 100644 --- a/kubernetes/helm/templates/webui-service.yaml +++ b/kubernetes/helm/templates/webui-service.yaml @@ -4,7 +4,7 @@ metadata: name: ollama-webui-service namespace: {{ .Values.namespace }} spec: - type: NodePort # Use LoadBalancer if you're on a cloud that supports it + type: {{ .Values.webui.service.type }} # Default: NodePort # Use LoadBalancer if you're on a cloud that supports it selector: app: ollama-webui ports: diff --git a/kubernetes/helm/values.yaml b/kubernetes/helm/values.yaml index f115f82fe2..648b405093 100644 --- a/kubernetes/helm/values.yaml +++ b/kubernetes/helm/values.yaml @@ -10,6 +10,12 @@ ollama: memory: "2Gi" nvidia.com/gpu: "0" volumeSize: 1Gi + nodeSelector: {} + tolerations: [] + service: + type: ClusterIP + gpu: + enabled: false webui: replicaCount: 1 @@ -25,3 +31,8 @@ webui: # Use appropriate annotations for your Ingress controller, e.g., for NGINX: # nginx.ingress.kubernetes.io/rewrite-target: / host: ollama.minikube.local + volumeSize: 1Gi + nodeSelector: {} + tolerations: [] + service: + type: NodePort \ No newline at end of file From a01b112f7fb018131a77929bf8900c9878047c3e Mon Sep 17 00:00:00 2001 From: Anuraag Jain Date: Thu, 28 Dec 2023 22:15:54 +0200 Subject: [PATCH 017/129] feat(auth): add auth middleware - refactored chat routes to use request.user instead of doing authentication in every route --- backend/.gitignore | 3 +- backend/apps/web/main.py | 10 +-- backend/apps/web/middlewares/auth.py | 27 +++++++ backend/apps/web/routers/chats.py | 102 ++++++--------------------- backend/utils/utils.py | 10 +-- 5 files changed, 63 insertions(+), 89 deletions(-) create mode 100644 backend/apps/web/middlewares/auth.py diff --git a/backend/.gitignore b/backend/.gitignore index 11f9256fec..da641cf7dc 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -4,4 +4,5 @@ _old uploads .ipynb_checkpoints *.db -_test \ No newline at end of file +_test +Pipfile \ No newline at end of file diff --git a/backend/apps/web/main.py b/backend/apps/web/main.py index 03273f8bc2..cb53e9e2f8 100644 --- a/backend/apps/web/main.py +++ b/backend/apps/web/main.py @@ -1,8 +1,9 @@ -from fastapi import FastAPI, Request, Depends, HTTPException +from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware - +from starlette.middleware.authentication import AuthenticationMiddleware from apps.web.routers import auths, users, chats, modelfiles, utils from config import WEBUI_VERSION, WEBUI_AUTH +from apps.web.middlewares.auth import BearerTokenAuthBackend, on_auth_error app = FastAPI() @@ -18,11 +19,12 @@ app.add_middleware( app.include_router(auths.router, prefix="/auths", tags=["auths"]) + +app.add_middleware(AuthenticationMiddleware, backend=BearerTokenAuthBackend(), on_error=on_auth_error) + app.include_router(users.router, prefix="/users", tags=["users"]) app.include_router(chats.router, prefix="/chats", tags=["chats"]) app.include_router(modelfiles.router, prefix="/modelfiles", tags=["modelfiles"]) - - app.include_router(utils.router, prefix="/utils", tags=["utils"]) diff --git a/backend/apps/web/middlewares/auth.py b/backend/apps/web/middlewares/auth.py new file mode 100644 index 0000000000..433cdfef63 --- /dev/null +++ b/backend/apps/web/middlewares/auth.py @@ -0,0 +1,27 @@ +from apps.web.models.users import Users +from fastapi import Request, status +from starlette.authentication import ( + AuthCredentials, AuthenticationBackend, AuthenticationError, +) +from starlette.requests import HTTPConnection +from utils.utils import verify_token +from starlette.responses import JSONResponse +from constants import ERROR_MESSAGES + +class BearerTokenAuthBackend(AuthenticationBackend): + + async def authenticate(self, conn: HTTPConnection): + if "Authorization" not in conn.headers: + return + data = verify_token(conn) + if data != None and 'email' in data: + user = Users.get_user_by_email(data['email']) + if user is None: + raise AuthenticationError('Invalid credentials') + return AuthCredentials([user.role]), user + else: + raise AuthenticationError('Invalid credentials') + +def on_auth_error(request: Request, exc: Exception): + print('Authentication failed: ', exc) + return JSONResponse({"detail": ERROR_MESSAGES.INVALID_TOKEN}, status_code=status.HTTP_401_UNAUTHORIZED) \ No newline at end of file diff --git a/backend/apps/web/routers/chats.py b/backend/apps/web/routers/chats.py index 798972e7c9..e67776b3cd 100644 --- a/backend/apps/web/routers/chats.py +++ b/backend/apps/web/routers/chats.py @@ -1,5 +1,5 @@ -from fastapi import Response -from fastapi import Depends, FastAPI, HTTPException, status + +from fastapi import Depends, Request, HTTPException, status from datetime import datetime, timedelta from typing import List, Union, Optional @@ -30,17 +30,8 @@ router = APIRouter() @router.get("/", response_model=List[ChatTitleIdResponse]) -async def get_user_chats(skip: int = 0, limit: int = 50, cred=Depends(bearer_scheme)): - token = cred.credentials - user = Users.get_user_by_token(token) - - if user: - return Chats.get_chat_lists_by_user_id(user.id, skip, limit) - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, - ) +async def get_user_chats(request:Request, skip: int = 0, limit: int = 50): + return Chats.get_chat_lists_by_user_id(request.user.id, skip, limit) ############################ @@ -49,20 +40,11 @@ async def get_user_chats(skip: int = 0, limit: int = 50, cred=Depends(bearer_sch @router.get("/all", response_model=List[ChatResponse]) -async def get_all_user_chats(cred=Depends(bearer_scheme)): - token = cred.credentials - user = Users.get_user_by_token(token) - - if user: - return [ +async def get_all_user_chats(request:Request,): + return [ ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) - for chat in Chats.get_all_chats_by_user_id(user.id) + for chat in Chats.get_all_chats_by_user_id(request.user.id) ] - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, - ) ############################ @@ -71,18 +53,9 @@ async def get_all_user_chats(cred=Depends(bearer_scheme)): @router.post("/new", response_model=Optional[ChatResponse]) -async def create_new_chat(form_data: ChatForm, cred=Depends(bearer_scheme)): - token = cred.credentials - user = Users.get_user_by_token(token) - - if user: - chat = Chats.insert_new_chat(user.id, form_data) - return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, - ) +async def create_new_chat(form_data: ChatForm,request:Request): + chat = Chats.insert_new_chat(request.user.id, form_data) + return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) ############################ @@ -91,25 +64,14 @@ async def create_new_chat(form_data: ChatForm, cred=Depends(bearer_scheme)): @router.get("/{id}", response_model=Optional[ChatResponse]) -async def get_chat_by_id(id: str, cred=Depends(bearer_scheme)): - token = cred.credentials - user = Users.get_user_by_token(token) +async def get_chat_by_id(id: str, request:Request): + chat = Chats.get_chat_by_id_and_user_id(id, request.user.id) - if user: - chat = Chats.get_chat_by_id_and_user_id(id, user.id) - - if chat: - return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.NOT_FOUND, - ) + if chat: + return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, - ) + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.NOT_FOUND) ############################ @@ -118,27 +80,18 @@ async def get_chat_by_id(id: str, cred=Depends(bearer_scheme)): @router.post("/{id}", response_model=Optional[ChatResponse]) -async def update_chat_by_id(id: str, form_data: ChatForm, cred=Depends(bearer_scheme)): - token = cred.credentials - user = Users.get_user_by_token(token) - - if user: - chat = Chats.get_chat_by_id_and_user_id(id, user.id) - if chat: +async def update_chat_by_id(id: str, form_data: ChatForm, request:Request): + chat = Chats.get_chat_by_id_and_user_id(id, request.user.id) + if chat: updated_chat = {**json.loads(chat.chat), **form_data.chat} chat = Chats.update_chat_by_id(id, updated_chat) return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) - else: + else: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail=ERROR_MESSAGES.ACCESS_PROHIBITED, ) - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, - ) ############################ @@ -147,15 +100,6 @@ async def update_chat_by_id(id: str, form_data: ChatForm, cred=Depends(bearer_sc @router.delete("/{id}", response_model=bool) -async def delete_chat_by_id(id: str, cred=Depends(bearer_scheme)): - token = cred.credentials - user = Users.get_user_by_token(token) - - if user: - result = Chats.delete_chat_by_id_and_user_id(id, user.id) - return result - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, - ) +async def delete_chat_by_id(id: str, request: Request): + result = Chats.delete_chat_by_id_and_user_id(id, request.user.id) + return result \ No newline at end of file diff --git a/backend/utils/utils.py b/backend/utils/utils.py index 62e6958fb2..97fd1f6f85 100644 --- a/backend/utils/utils.py +++ b/backend/utils/utils.py @@ -55,13 +55,13 @@ def extract_token_from_auth_header(auth_header: str): def verify_token(request): try: - bearer = request.headers["authorization"] - if bearer: - token = bearer[len("Bearer ") :] - decoded = jwt.decode( + authorization = request.headers["authorization"] + if authorization: + _, token = authorization.split() + decoded_token = jwt.decode( token, JWT_SECRET_KEY, options={"verify_signature": False} ) - return decoded + return decoded_token else: return None except Exception as e: From 431b710e46b3a4fcd17a9d687ff9715a93e693e2 Mon Sep 17 00:00:00 2001 From: David Young Date: Fri, 29 Dec 2023 16:09:59 +0000 Subject: [PATCH 018/129] Settings were being referenced incorrectly and as a result, no settings were being sent to openai endpoint. Added num_predict slider in advanced UI. Also added seed, stop and max_tokens (uses num_predict) being sent to openai endpoint. --- .../components/chat/Settings/Advanced.svelte | 48 ++++++++++++++++++- src/lib/components/chat/SettingsModal.svelte | 7 ++- src/routes/(app)/+page.svelte | 11 +++-- src/routes/(app)/c/[id]/+page.svelte | 11 +++-- .../(app)/modelfiles/create/+page.svelte | 4 +- 5 files changed, 69 insertions(+), 12 deletions(-) diff --git a/src/lib/components/chat/Settings/Advanced.svelte b/src/lib/components/chat/Settings/Advanced.svelte index a188f107ab..41ad69f33f 100644 --- a/src/lib/components/chat/Settings/Advanced.svelte +++ b/src/lib/components/chat/Settings/Advanced.svelte @@ -12,7 +12,8 @@ top_k: '', top_p: '', tfs_z: '', - num_ctx: '' + num_ctx: '', + num_predict: '' }; @@ -507,4 +508,49 @@ {/if} +
+
+
Max Tokens
+ + +
+ + {#if options.num_predict !== ''} +
+
+ +
+
+ +
+
+ {/if} +
diff --git a/src/lib/components/chat/SettingsModal.svelte b/src/lib/components/chat/SettingsModal.svelte index 18aa6ecad1..12405537b8 100644 --- a/src/lib/components/chat/SettingsModal.svelte +++ b/src/lib/components/chat/SettingsModal.svelte @@ -52,7 +52,8 @@ top_p: '', stop: '', tfs_z: '', - num_ctx: '' + num_ctx: '', + num_predict: '' }; // Models @@ -644,6 +645,7 @@ options.top_k = settings.top_k ?? ''; options.top_p = settings.top_p ?? ''; options.num_ctx = settings.num_ctx ?? ''; + options.num_predict = settings.num_predict ?? ''; options = { ...options, ...settings.options }; options.stop = (settings?.options?.stop ?? []).join(','); @@ -1122,7 +1124,8 @@ top_k: options.top_k !== '' ? options.top_k : undefined, top_p: options.top_p !== '' ? options.top_p : undefined, tfs_z: options.tfs_z !== '' ? options.tfs_z : undefined, - num_ctx: options.num_ctx !== '' ? options.num_ctx : undefined + num_ctx: options.num_ctx !== '' ? options.num_ctx : undefined, + num_predict: options.num_predict !== '' ? options.num_predict : undefined } }); show = false; diff --git a/src/routes/(app)/+page.svelte b/src/routes/(app)/+page.svelte index 6f272a62ee..0655cd9128 100644 --- a/src/routes/(app)/+page.svelte +++ b/src/routes/(app)/+page.svelte @@ -368,10 +368,13 @@ } : { content: message.content }) })), - temperature: $settings.temperature ?? undefined, - top_p: $settings.top_p ?? undefined, - num_ctx: $settings.num_ctx ?? undefined, - frequency_penalty: $settings.repeat_penalty ?? undefined + seed: $settings.options.seed ?? undefined, + stop: $settings.options.stop ?? undefined, + temperature: $settings.options.temperature ?? undefined, + top_p: $settings.options.top_p ?? undefined, + num_ctx: $settings.options.num_ctx ?? undefined, + frequency_penalty: $settings.options.repeat_penalty ?? undefined, + max_tokens: $settings.options.num_predict ?? undefined, }) } ).catch((err) => { diff --git a/src/routes/(app)/c/[id]/+page.svelte b/src/routes/(app)/c/[id]/+page.svelte index 9600f2982c..8954a21fbc 100644 --- a/src/routes/(app)/c/[id]/+page.svelte +++ b/src/routes/(app)/c/[id]/+page.svelte @@ -395,10 +395,13 @@ } : { content: message.content }) })), - temperature: $settings.temperature ?? undefined, - top_p: $settings.top_p ?? undefined, - num_ctx: $settings.num_ctx ?? undefined, - frequency_penalty: $settings.repeat_penalty ?? undefined + seed: $settings.options.seed ?? undefined, + stop: $settings.options.stop ?? undefined, + temperature: $settings.options.temperature ?? undefined, + top_p: $settings.options.top_p ?? undefined, + num_ctx: $settings.options.num_ctx ?? undefined, + frequency_penalty: $settings.options.repeat_penalty ?? undefined, + max_tokens: $settings.options.num_predict ?? undefined, }) } ).catch((err) => { diff --git a/src/routes/(app)/modelfiles/create/+page.svelte b/src/routes/(app)/modelfiles/create/+page.svelte index 506edb9f73..23a0cffc07 100644 --- a/src/routes/(app)/modelfiles/create/+page.svelte +++ b/src/routes/(app)/modelfiles/create/+page.svelte @@ -51,7 +51,8 @@ top_k: '', top_p: '', tfs_z: '', - num_ctx: '' + num_ctx: '', + num_predict: '' }; let modelfileCreator = null; @@ -73,6 +74,7 @@ ${options.top_k !== '' ? `PARAMETER top_k ${options.top_k}` : ''} ${options.top_p !== '' ? `PARAMETER top_p ${options.top_p}` : ''} ${options.tfs_z !== '' ? `PARAMETER tfs_z ${options.tfs_z}` : ''} ${options.num_ctx !== '' ? `PARAMETER num_ctx ${options.num_ctx}` : ''} +${options.num_predict !== '' ? `PARAMETER num_predict ${options.num_predict}` : ''} SYSTEM """${system}"""`.replace(/^\s*\n/gm, ''); } From bdd153d8f5d91487f8c7eca77c340c62b1073626 Mon Sep 17 00:00:00 2001 From: Anuraag Jain Date: Sat, 30 Dec 2023 12:53:33 +0200 Subject: [PATCH 019/129] refac: use dependencies to verify token - feat: added new util to get the current user when needed. Middleware was adding authentication logic to all the routes. let's revisit if we can move the non-auth endpoints to a separate route. - refac: update the routes to use new helpers for verification and retrieving user - chore: added black for local formatting of py code --- backend/apps/ollama/main.py | 10 +- backend/apps/web/main.py | 29 +++-- backend/apps/web/middlewares/auth.py | 27 ---- backend/apps/web/models/users.py | 10 -- backend/apps/web/routers/auths.py | 26 ++-- backend/apps/web/routers/chats.py | 56 +++++---- backend/apps/web/routers/modelfiles.py | 164 +++++++++---------------- backend/apps/web/routers/users.py | 57 +++------ backend/requirements.txt | 2 + backend/utils/utils.py | 37 +++--- 10 files changed, 167 insertions(+), 251 deletions(-) delete mode 100644 backend/apps/web/middlewares/auth.py diff --git a/backend/apps/ollama/main.py b/backend/apps/ollama/main.py index 64c6361ed6..7e138c39a3 100644 --- a/backend/apps/ollama/main.py +++ b/backend/apps/ollama/main.py @@ -8,7 +8,7 @@ import json from apps.web.models.users import Users from constants import ERROR_MESSAGES -from utils.utils import extract_token_from_auth_header +from utils.utils import decode_token from config import OLLAMA_API_BASE_URL, WEBUI_AUTH app = Flask(__name__) @@ -34,8 +34,12 @@ def proxy(path): # Basic RBAC support if WEBUI_AUTH: if "Authorization" in headers: - token = extract_token_from_auth_header(headers["Authorization"]) - user = Users.get_user_by_token(token) + _, credentials = headers["Authorization"].split() + token_data = decode_token(credentials) + if token_data is None or "email" not in token_data: + return jsonify({"detail": ERROR_MESSAGES.UNAUTHORIZED}), 401 + + user = Users.get_user_by_email(token_data["email"]) if user: # Only user and admin roles can access if user.role in ["user", "admin"]: diff --git a/backend/apps/web/main.py b/backend/apps/web/main.py index cb53e9e2f8..4519048b09 100644 --- a/backend/apps/web/main.py +++ b/backend/apps/web/main.py @@ -1,9 +1,10 @@ -from fastapi import FastAPI +from fastapi import FastAPI, Depends +from fastapi.routing import APIRoute from fastapi.middleware.cors import CORSMiddleware from starlette.middleware.authentication import AuthenticationMiddleware from apps.web.routers import auths, users, chats, modelfiles, utils from config import WEBUI_VERSION, WEBUI_AUTH -from apps.web.middlewares.auth import BearerTokenAuthBackend, on_auth_error +from utils.utils import verify_auth_token app = FastAPI() @@ -17,14 +18,26 @@ app.add_middleware( allow_headers=["*"], ) - app.include_router(auths.router, prefix="/auths", tags=["auths"]) -app.add_middleware(AuthenticationMiddleware, backend=BearerTokenAuthBackend(), on_error=on_auth_error) - -app.include_router(users.router, prefix="/users", tags=["users"]) -app.include_router(chats.router, prefix="/chats", tags=["chats"]) -app.include_router(modelfiles.router, prefix="/modelfiles", tags=["modelfiles"]) +app.include_router( + users.router, + prefix="/users", + tags=["users"], + dependencies=[Depends(verify_auth_token)], +) +app.include_router( + chats.router, + prefix="/chats", + tags=["chats"], + dependencies=[Depends(verify_auth_token)], +) +app.include_router( + modelfiles.router, + prefix="/modelfiles", + tags=["modelfiles"], + dependencies=[Depends(verify_auth_token)], +) app.include_router(utils.router, prefix="/utils", tags=["utils"]) diff --git a/backend/apps/web/middlewares/auth.py b/backend/apps/web/middlewares/auth.py deleted file mode 100644 index 433cdfef63..0000000000 --- a/backend/apps/web/middlewares/auth.py +++ /dev/null @@ -1,27 +0,0 @@ -from apps.web.models.users import Users -from fastapi import Request, status -from starlette.authentication import ( - AuthCredentials, AuthenticationBackend, AuthenticationError, -) -from starlette.requests import HTTPConnection -from utils.utils import verify_token -from starlette.responses import JSONResponse -from constants import ERROR_MESSAGES - -class BearerTokenAuthBackend(AuthenticationBackend): - - async def authenticate(self, conn: HTTPConnection): - if "Authorization" not in conn.headers: - return - data = verify_token(conn) - if data != None and 'email' in data: - user = Users.get_user_by_email(data['email']) - if user is None: - raise AuthenticationError('Invalid credentials') - return AuthCredentials([user.role]), user - else: - raise AuthenticationError('Invalid credentials') - -def on_auth_error(request: Request, exc: Exception): - print('Authentication failed: ', exc) - return JSONResponse({"detail": ERROR_MESSAGES.INVALID_TOKEN}, status_code=status.HTTP_401_UNAUTHORIZED) \ No newline at end of file diff --git a/backend/apps/web/models/users.py b/backend/apps/web/models/users.py index 782b7f47e7..669ab7dc98 100644 --- a/backend/apps/web/models/users.py +++ b/backend/apps/web/models/users.py @@ -3,8 +3,6 @@ from peewee import * from playhouse.shortcuts import model_to_dict from typing import List, Union, Optional import time - -from utils.utils import decode_token from utils.misc import get_gravatar_url from apps.web.internal.db import DB @@ -83,14 +81,6 @@ class UsersTable: except: return None - def get_user_by_token(self, token: str) -> Optional[UserModel]: - data = decode_token(token) - - if data != None and "email" in data: - return self.get_user_by_email(data["email"]) - else: - return None - def get_users(self, skip: int = 0, limit: int = 50) -> List[UserModel]: return [ UserModel(**model_to_dict(user)) diff --git a/backend/apps/web/routers/auths.py b/backend/apps/web/routers/auths.py index 27d6a3b6ad..5193afc6bc 100644 --- a/backend/apps/web/routers/auths.py +++ b/backend/apps/web/routers/auths.py @@ -20,7 +20,7 @@ from apps.web.models.users import Users from utils.utils import ( get_password_hash, - bearer_scheme, + get_current_user, create_token, ) from utils.misc import get_gravatar_url @@ -35,22 +35,14 @@ router = APIRouter() @router.get("/", response_model=UserResponse) -async def get_session_user(cred=Depends(bearer_scheme)): - token = cred.credentials - user = Users.get_user_by_token(token) - if user: - return { - "id": user.id, - "email": user.email, - "name": user.name, - "role": user.role, - "profile_image_url": user.profile_image_url, - } - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, - ) +async def get_session_user(user=Depends(get_current_user)): + return { + "id": user.id, + "email": user.email, + "name": user.name, + "role": user.role, + "profile_image_url": user.profile_image_url, + } ############################ diff --git a/backend/apps/web/routers/chats.py b/backend/apps/web/routers/chats.py index e67776b3cd..54b529e655 100644 --- a/backend/apps/web/routers/chats.py +++ b/backend/apps/web/routers/chats.py @@ -1,8 +1,7 @@ - from fastapi import Depends, Request, HTTPException, status from datetime import datetime, timedelta from typing import List, Union, Optional - +from utils.utils import get_current_user from fastapi import APIRouter from pydantic import BaseModel import json @@ -30,8 +29,10 @@ router = APIRouter() @router.get("/", response_model=List[ChatTitleIdResponse]) -async def get_user_chats(request:Request, skip: int = 0, limit: int = 50): - return Chats.get_chat_lists_by_user_id(request.user.id, skip, limit) +async def get_user_chats( + user=Depends(get_current_user), skip: int = 0, limit: int = 50 +): + return Chats.get_chat_lists_by_user_id(user.id, skip, limit) ############################ @@ -40,11 +41,11 @@ async def get_user_chats(request:Request, skip: int = 0, limit: int = 50): @router.get("/all", response_model=List[ChatResponse]) -async def get_all_user_chats(request:Request,): +async def get_all_user_chats(user=Depends(get_current_user)): return [ - ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) - for chat in Chats.get_all_chats_by_user_id(request.user.id) - ] + ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) + for chat in Chats.get_all_chats_by_user_id(user.id) + ] ############################ @@ -53,8 +54,8 @@ async def get_all_user_chats(request:Request,): @router.post("/new", response_model=Optional[ChatResponse]) -async def create_new_chat(form_data: ChatForm,request:Request): - chat = Chats.insert_new_chat(request.user.id, form_data) +async def create_new_chat(form_data: ChatForm, user=Depends(get_current_user)): + chat = Chats.insert_new_chat(user.id, form_data) return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) @@ -64,14 +65,15 @@ async def create_new_chat(form_data: ChatForm,request:Request): @router.get("/{id}", response_model=Optional[ChatResponse]) -async def get_chat_by_id(id: str, request:Request): - chat = Chats.get_chat_by_id_and_user_id(id, request.user.id) +async def get_chat_by_id(id: str, user=Depends(get_current_user)): + chat = Chats.get_chat_by_id_and_user_id(id, user.id) if chat: return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) else: - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.NOT_FOUND) + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, detail=ERROR_MESSAGES.NOT_FOUND + ) ############################ @@ -80,18 +82,20 @@ async def get_chat_by_id(id: str, request:Request): @router.post("/{id}", response_model=Optional[ChatResponse]) -async def update_chat_by_id(id: str, form_data: ChatForm, request:Request): - chat = Chats.get_chat_by_id_and_user_id(id, request.user.id) +async def update_chat_by_id( + id: str, form_data: ChatForm, user=Depends(get_current_user) +): + chat = Chats.get_chat_by_id_and_user_id(id, user.id) if chat: - updated_chat = {**json.loads(chat.chat), **form_data.chat} + updated_chat = {**json.loads(chat.chat), **form_data.chat} - chat = Chats.update_chat_by_id(id, updated_chat) - return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) + chat = Chats.update_chat_by_id(id, updated_chat) + return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.ACCESS_PROHIBITED, - ) + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, + ) ############################ @@ -100,6 +104,6 @@ async def update_chat_by_id(id: str, form_data: ChatForm, request:Request): @router.delete("/{id}", response_model=bool) -async def delete_chat_by_id(id: str, request: Request): - result = Chats.delete_chat_by_id_and_user_id(id, request.user.id) - return result \ No newline at end of file +async def delete_chat_by_id(id: str, user=Depends(get_current_user)): + result = Chats.delete_chat_by_id_and_user_id(id, user.id) + return result diff --git a/backend/apps/web/routers/modelfiles.py b/backend/apps/web/routers/modelfiles.py index dd1f6cc590..c54ef4a2cf 100644 --- a/backend/apps/web/routers/modelfiles.py +++ b/backend/apps/web/routers/modelfiles.py @@ -1,4 +1,3 @@ -from fastapi import Response from fastapi import Depends, FastAPI, HTTPException, status from datetime import datetime, timedelta from typing import List, Union, Optional @@ -16,9 +15,7 @@ from apps.web.models.modelfiles import ( ModelfileResponse, ) -from utils.utils import ( - bearer_scheme, -) +from utils.utils import bearer_scheme, get_current_user from constants import ERROR_MESSAGES router = APIRouter() @@ -30,16 +27,7 @@ router = APIRouter() @router.get("/", response_model=List[ModelfileResponse]) async def get_modelfiles(skip: int = 0, limit: int = 50, cred=Depends(bearer_scheme)): - token = cred.credentials - user = Users.get_user_by_token(token) - - if user: - return Modelfiles.get_modelfiles(skip, limit) - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, - ) + return Modelfiles.get_modelfiles(skip, limit) ############################ @@ -48,36 +36,28 @@ async def get_modelfiles(skip: int = 0, limit: int = 50, cred=Depends(bearer_sch @router.post("/create", response_model=Optional[ModelfileResponse]) -async def create_new_modelfile(form_data: ModelfileForm, cred=Depends(bearer_scheme)): - token = cred.credentials - user = Users.get_user_by_token(token) +async def create_new_modelfile( + form_data: ModelfileForm, user=Depends(get_current_user) +): + if user.role != "admin": + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, + ) - if user: - # Admin Only - if user.role == "admin": - modelfile = Modelfiles.insert_new_modelfile(user.id, form_data) + modelfile = Modelfiles.insert_new_modelfile(user.id, form_data) - if modelfile: - return ModelfileResponse( - **{ - **modelfile.model_dump(), - "modelfile": json.loads(modelfile.modelfile), - } - ) - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.DEFAULT(), - ) - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.ACCESS_PROHIBITED, - ) + if modelfile: + return ModelfileResponse( + **{ + **modelfile.model_dump(), + "modelfile": json.loads(modelfile.modelfile), + } + ) else: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, + detail=ERROR_MESSAGES.DEFAULT(), ) @@ -87,31 +67,20 @@ async def create_new_modelfile(form_data: ModelfileForm, cred=Depends(bearer_sch @router.post("/", response_model=Optional[ModelfileResponse]) -async def get_modelfile_by_tag_name( - form_data: ModelfileTagNameForm, cred=Depends(bearer_scheme) -): - token = cred.credentials - user = Users.get_user_by_token(token) +async def get_modelfile_by_tag_name(form_data: ModelfileTagNameForm): + modelfile = Modelfiles.get_modelfile_by_tag_name(form_data.tag_name) - if user: - modelfile = Modelfiles.get_modelfile_by_tag_name(form_data.tag_name) - - if modelfile: - return ModelfileResponse( - **{ - **modelfile.model_dump(), - "modelfile": json.loads(modelfile.modelfile), - } - ) - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.NOT_FOUND, - ) + if modelfile: + return ModelfileResponse( + **{ + **modelfile.model_dump(), + "modelfile": json.loads(modelfile.modelfile), + } + ) else: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, + detail=ERROR_MESSAGES.NOT_FOUND, ) @@ -122,44 +91,34 @@ async def get_modelfile_by_tag_name( @router.post("/update", response_model=Optional[ModelfileResponse]) async def update_modelfile_by_tag_name( - form_data: ModelfileUpdateForm, cred=Depends(bearer_scheme) + form_data: ModelfileUpdateForm, user=Depends(get_current_user) ): - token = cred.credentials - user = Users.get_user_by_token(token) + if user.role != "admin": + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, + ) + modelfile = Modelfiles.get_modelfile_by_tag_name(form_data.tag_name) + if modelfile: + updated_modelfile = { + **json.loads(modelfile.modelfile), + **form_data.modelfile, + } - if user: - if user.role == "admin": - modelfile = Modelfiles.get_modelfile_by_tag_name(form_data.tag_name) - if modelfile: - updated_modelfile = { - **json.loads(modelfile.modelfile), - **form_data.modelfile, - } + modelfile = Modelfiles.update_modelfile_by_tag_name( + form_data.tag_name, updated_modelfile + ) - modelfile = Modelfiles.update_modelfile_by_tag_name( - form_data.tag_name, updated_modelfile - ) - - return ModelfileResponse( - **{ - **modelfile.model_dump(), - "modelfile": json.loads(modelfile.modelfile), - } - ) - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.ACCESS_PROHIBITED, - ) - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.ACCESS_PROHIBITED, - ) + return ModelfileResponse( + **{ + **modelfile.model_dump(), + "modelfile": json.loads(modelfile.modelfile), + } + ) else: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, ) @@ -170,22 +129,13 @@ async def update_modelfile_by_tag_name( @router.delete("/delete", response_model=bool) async def delete_modelfile_by_tag_name( - form_data: ModelfileTagNameForm, cred=Depends(bearer_scheme) + form_data: ModelfileTagNameForm, user=Depends(get_current_user) ): - token = cred.credentials - user = Users.get_user_by_token(token) - - if user: - if user.role == "admin": - result = Modelfiles.delete_modelfile_by_tag_name(form_data.tag_name) - return result - else: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.ACCESS_PROHIBITED, - ) - else: + if user.role != "admin": raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, ) + + result = Modelfiles.delete_modelfile_by_tag_name(form_data.tag_name) + return result diff --git a/backend/apps/web/routers/users.py b/backend/apps/web/routers/users.py index 08437bd34b..f587dcf261 100644 --- a/backend/apps/web/routers/users.py +++ b/backend/apps/web/routers/users.py @@ -10,11 +10,7 @@ import uuid from apps.web.models.users import UserModel, UserRoleUpdateForm, Users -from utils.utils import ( - get_password_hash, - bearer_scheme, - create_token, -) +from utils.utils import get_current_user from constants import ERROR_MESSAGES router = APIRouter() @@ -25,23 +21,13 @@ router = APIRouter() @router.get("/", response_model=List[UserModel]) -async def get_users(skip: int = 0, limit: int = 50, cred=Depends(bearer_scheme)): - token = cred.credentials - user = Users.get_user_by_token(token) - - if user: - if user.role == "admin": - return Users.get_users(skip, limit) - else: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail=ERROR_MESSAGES.ACCESS_PROHIBITED, - ) - else: +async def get_users(skip: int = 0, limit: int = 50, user=Depends(get_current_user)): + if user.role != "admin": raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, + status_code=status.HTTP_403_FORBIDDEN, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, ) + return Users.get_users(skip, limit) ############################ @@ -50,26 +36,19 @@ async def get_users(skip: int = 0, limit: int = 50, cred=Depends(bearer_scheme)) @router.post("/update/role", response_model=Optional[UserModel]) -async def update_user_role(form_data: UserRoleUpdateForm, cred=Depends(bearer_scheme)): - token = cred.credentials - user = Users.get_user_by_token(token) +async def update_user_role( + form_data: UserRoleUpdateForm, user=Depends(get_current_user) +): + if user.role != "admin": + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, + ) - if user: - if user.role == "admin": - if user.id != form_data.id: - return Users.update_user_role_by_id(form_data.id, form_data.role) - else: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail=ERROR_MESSAGES.ACTION_PROHIBITED, - ) - else: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail=ERROR_MESSAGES.ACCESS_PROHIBITED, - ) + if user.id != form_data.id: + return Users.update_user_role_by_id(form_data.id, form_data.role) else: raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, + status_code=status.HTTP_403_FORBIDDEN, + detail=ERROR_MESSAGES.ACTION_PROHIBITED, ) diff --git a/backend/requirements.txt b/backend/requirements.txt index 2644d55971..6da59fb6e9 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -18,3 +18,5 @@ bcrypt PyJWT pyjwt[crypto] + +black \ No newline at end of file diff --git a/backend/utils/utils.py b/backend/utils/utils.py index 97fd1f6f85..c899115278 100644 --- a/backend/utils/utils.py +++ b/backend/utils/utils.py @@ -1,7 +1,9 @@ -from fastapi.security import HTTPBasicCredentials, HTTPBearer +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from fastapi import HTTPException, status, Depends +from apps.web.models.users import Users from pydantic import BaseModel from typing import Union, Optional - +from constants import ERROR_MESSAGES from passlib.context import CryptContext from datetime import datetime, timedelta import requests @@ -53,16 +55,23 @@ def extract_token_from_auth_header(auth_header: str): return auth_header[len("Bearer ") :] -def verify_token(request): - try: - authorization = request.headers["authorization"] - if authorization: - _, token = authorization.split() - decoded_token = jwt.decode( - token, JWT_SECRET_KEY, options={"verify_signature": False} +def verify_auth_token(auth_token: HTTPAuthorizationCredentials = Depends(HTTPBearer())): + data = decode_token(auth_token.credentials) + if data != None and "email" in data: + user = Users.get_user_by_email(data["email"]) + if user is None: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.INVALID_TOKEN, ) - return decoded_token - else: - return None - except Exception as e: - return None + return + else: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail=ERROR_MESSAGES.UNAUTHORIZED, + ) + + +def get_current_user(auth_token: HTTPAuthorizationCredentials = Depends(HTTPBearer())): + data = decode_token(auth_token.credentials) + return Users.get_user_by_email(data["email"]) From 08c0d7a9ec99dceeee0174da6b34f42a13457c0a Mon Sep 17 00:00:00 2001 From: Anuraag Jain Date: Sat, 30 Dec 2023 13:00:21 +0200 Subject: [PATCH 020/129] fix: merge conflicts --- backend/apps/web/routers/auths.py | 14 ++++++++------ backend/apps/web/routers/users.py | 31 +++++++++++-------------------- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/backend/apps/web/routers/auths.py b/backend/apps/web/routers/auths.py index 34dab9b254..7ff645dccb 100644 --- a/backend/apps/web/routers/auths.py +++ b/backend/apps/web/routers/auths.py @@ -23,6 +23,7 @@ from utils.utils import ( get_password_hash, get_current_user, create_token, + verify_auth_token, ) from utils.misc import get_gravatar_url from constants import ERROR_MESSAGES @@ -35,7 +36,7 @@ router = APIRouter() ############################ -@router.get("/", response_model=UserResponse) +@router.get("/", response_model=UserResponse, dependencies=[Depends(verify_auth_token)]) async def get_session_user(user=Depends(get_current_user)): return { "id": user.id, @@ -51,11 +52,12 @@ async def get_session_user(user=Depends(get_current_user)): ############################ -@router.post("/update/password", response_model=bool) -async def update_password(form_data: UpdatePasswordForm, cred=Depends(bearer_scheme)): - token = cred.credentials - session_user = Users.get_user_by_token(token) - +@router.post( + "/update/password", response_model=bool, dependencies=[Depends(verify_auth_token)] +) +async def update_password( + form_data: UpdatePasswordForm, session_user=Depends(get_current_user) +): if session_user: user = Auths.authenticate_user(session_user.email, form_data.password) diff --git a/backend/apps/web/routers/users.py b/backend/apps/web/routers/users.py index 2c33c158a3..950b23fa2f 100644 --- a/backend/apps/web/routers/users.py +++ b/backend/apps/web/routers/users.py @@ -62,34 +62,25 @@ async def update_user_role( @router.delete("/{user_id}", response_model=bool) -async def delete_user_by_id(user_id: str, cred=Depends(bearer_scheme)): - token = cred.credentials - user = Users.get_user_by_token(token) +async def delete_user_by_id(user_id: str, user=Depends(get_current_user)): + if user.role == "admin": + if user.id != user_id: + result = Auths.delete_auth_by_id(user_id) - if user: - if user.role == "admin": - if user.id != user_id: - result = Auths.delete_auth_by_id(user_id) - - if result: - return True - else: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=ERROR_MESSAGES.DELETE_USER_ERROR, - ) + if result: + return True else: raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail=ERROR_MESSAGES.ACTION_PROHIBITED, + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=ERROR_MESSAGES.DELETE_USER_ERROR, ) else: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, - detail=ERROR_MESSAGES.ACCESS_PROHIBITED, + detail=ERROR_MESSAGES.ACTION_PROHIBITED, ) else: raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail=ERROR_MESSAGES.INVALID_TOKEN, + status_code=status.HTTP_403_FORBIDDEN, + detail=ERROR_MESSAGES.ACCESS_PROHIBITED, ) From 60c801ee2238555245b2ad64cc902d8f56a4ad8a Mon Sep 17 00:00:00 2001 From: Anuraag Jain Date: Sat, 30 Dec 2023 13:08:03 +0200 Subject: [PATCH 021/129] remove unused import in main --- backend/apps/web/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/apps/web/main.py b/backend/apps/web/main.py index 4519048b09..b19a7c1f16 100644 --- a/backend/apps/web/main.py +++ b/backend/apps/web/main.py @@ -1,7 +1,6 @@ from fastapi import FastAPI, Depends from fastapi.routing import APIRoute from fastapi.middleware.cors import CORSMiddleware -from starlette.middleware.authentication import AuthenticationMiddleware from apps.web.routers import auths, users, chats, modelfiles, utils from config import WEBUI_VERSION, WEBUI_AUTH from utils.utils import verify_auth_token From 8f21de9c7c348370d13179bb221d63a257a6c8f8 Mon Sep 17 00:00:00 2001 From: Anuraag Jain Date: Sat, 30 Dec 2023 14:20:34 +0200 Subject: [PATCH 022/129] docs: add api workflow - helps in development to understand what's happening under the hood --- docs/README.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docs/README.md diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000000..269919e111 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,3 @@ +# Project workflow + +[![](https://mermaid.ink/img/pako:eNrVk0FrAjEQhf_KkGsV25vsQSj0UC9VlKWXgIzZUQPZJM1k1xbxvze7q61VKfbY04bse9-8PJKdUK4gkQmmt4qsoieN64CltB5D1Ep7tBFypgDI7ffnn4kxWOIiHy_mFOpO1e3BKy2h27xqudAftdI2Y_qj0d05PIOJJ8uwDG6bJD2oNevIYJxCs3Ecs-H98F7ac18_wfoNNIMZxSokxJaMciWBxzXBKrgSuCYTCZaVNoW0Ly6tXRMwbyddRmk70TZSQJUybHXcQNwQ5OPfDjAnWzA8TscQmsI5QnQwQK8H9cOV4N-MI-AZbWGI21EH3xH15T859WWCGbF3TYg2cYERbyqsi9xYmQ6uGzsKpEjXKXJKDj44Rcx0FfjX2lyra1q4qbppcO86TT5BdATgw9X75_2JnigplKiL9KB30gJIke5JSVJkaVnQCisTpZB2n6RYRTf_sEpkMVTUE5VPlOP7F9kKDdP-E6y8fHA?type=png)](https://mermaid.live/edit#pako:eNrVk0FrAjEQhf_KkGsV25vsQSj0UC9VlKWXgIzZUQPZJM1k1xbxvze7q61VKfbY04bse9-8PJKdUK4gkQmmt4qsoieN64CltB5D1Ep7tBFypgDI7ffnn4kxWOIiHy_mFOpO1e3BKy2h27xqudAftdI2Y_qj0d05PIOJJ8uwDG6bJD2oNevIYJxCs3Ecs-H98F7ac18_wfoNNIMZxSokxJaMciWBxzXBKrgSuCYTCZaVNoW0Ly6tXRMwbyddRmk70TZSQJUybHXcQNwQ5OPfDjAnWzA8TscQmsI5QnQwQK8H9cOV4N-MI-AZbWGI21EH3xH15T859WWCGbF3TYg2cYERbyqsi9xYmQ6uGzsKpEjXKXJKDj44Rcx0FfjX2lyra1q4qbppcO86TT5BdATgw9X75_2JnigplKiL9KB30gJIke5JSVJkaVnQCisTpZB2n6RYRTf_sEpkMVTUE5VPlOP7F9kKDdP-E6y8fHA) From 0d6440f5c0162684776e1d5b5975d08b513ed2fb Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 30 Dec 2023 20:16:05 -0800 Subject: [PATCH 023/129] fix: num_predict default value should match ollama default https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values --- src/lib/components/chat/Settings/Advanced.svelte | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/components/chat/Settings/Advanced.svelte b/src/lib/components/chat/Settings/Advanced.svelte index 41ad69f33f..5c70dc56aa 100644 --- a/src/lib/components/chat/Settings/Advanced.svelte +++ b/src/lib/components/chat/Settings/Advanced.svelte @@ -516,7 +516,7 @@ class="p-1 px-3 text-xs flex rounded transition" type="button" on:click={() => { - options.num_predict = options.num_predict === '' ? 256 : ''; + options.num_predict = options.num_predict === '' ? 128 : ''; }} > {#if options.num_predict === ''} @@ -533,7 +533,7 @@ From 14440014e5dc5359a59315ba4c86021337cea2c2 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 30 Dec 2023 20:18:06 -0800 Subject: [PATCH 024/129] fix: new params do not require legacy fallback --- src/lib/components/chat/SettingsModal.svelte | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib/components/chat/SettingsModal.svelte b/src/lib/components/chat/SettingsModal.svelte index 84a98acbbd..58719edc50 100644 --- a/src/lib/components/chat/SettingsModal.svelte +++ b/src/lib/components/chat/SettingsModal.svelte @@ -652,7 +652,6 @@ options.top_k = settings.top_k ?? ''; options.top_p = settings.top_p ?? ''; options.num_ctx = settings.num_ctx ?? ''; - options.num_predict = settings.num_predict ?? ''; options = { ...options, ...settings.options }; options.stop = (settings?.options?.stop ?? []).join(','); From 8cd6eaf1bcaeed1c799db37772ccd8a02d1ed9ad Mon Sep 17 00:00:00 2001 From: Anuraag Jain Date: Sun, 31 Dec 2023 09:28:39 +0200 Subject: [PATCH 025/129] docs: minor changes --- docs/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/README.md b/docs/README.md index 269919e111..4113c35a78 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,3 +1,3 @@ # Project workflow -[![](https://mermaid.ink/img/pako:eNrVk0FrAjEQhf_KkGsV25vsQSj0UC9VlKWXgIzZUQPZJM1k1xbxvze7q61VKfbY04bse9-8PJKdUK4gkQmmt4qsoieN64CltB5D1Ep7tBFypgDI7ffnn4kxWOIiHy_mFOpO1e3BKy2h27xqudAftdI2Y_qj0d05PIOJJ8uwDG6bJD2oNevIYJxCs3Ecs-H98F7ac18_wfoNNIMZxSokxJaMciWBxzXBKrgSuCYTCZaVNoW0Ly6tXRMwbyddRmk70TZSQJUybHXcQNwQ5OPfDjAnWzA8TscQmsI5QnQwQK8H9cOV4N-MI-AZbWGI21EH3xH15T859WWCGbF3TYg2cYERbyqsi9xYmQ6uGzsKpEjXKXJKDj44Rcx0FfjX2lyra1q4qbppcO86TT5BdATgw9X75_2JnigplKiL9KB30gJIke5JSVJkaVnQCisTpZB2n6RYRTf_sEpkMVTUE5VPlOP7F9kKDdP-E6y8fHA?type=png)](https://mermaid.live/edit#pako:eNrVk0FrAjEQhf_KkGsV25vsQSj0UC9VlKWXgIzZUQPZJM1k1xbxvze7q61VKfbY04bse9-8PJKdUK4gkQmmt4qsoieN64CltB5D1Ep7tBFypgDI7ffnn4kxWOIiHy_mFOpO1e3BKy2h27xqudAftdI2Y_qj0d05PIOJJ8uwDG6bJD2oNevIYJxCs3Ecs-H98F7ac18_wfoNNIMZxSokxJaMciWBxzXBKrgSuCYTCZaVNoW0Ly6tXRMwbyddRmk70TZSQJUybHXcQNwQ5OPfDjAnWzA8TscQmsI5QnQwQK8H9cOV4N-MI-AZbWGI21EH3xH15T859WWCGbF3TYg2cYERbyqsi9xYmQ6uGzsKpEjXKXJKDj44Rcx0FfjX2lyra1q4qbppcO86TT5BdATgw9X75_2JnigplKiL9KB30gJIke5JSVJkaVnQCisTpZB2n6RYRTf_sEpkMVTUE5VPlOP7F9kKDdP-E6y8fHA) +[![](https://mermaid.ink/img/pako:eNq1k01rAjEQhv_KkFNLFe1N9iAUevFSRVl6Cci4Gd1ANtlmsmtF_O_N7iqtHxR76ClhMu87zwyZvcicIpEIpo-KbEavGjceC2lL9EFnukQbIGXygNye5y9TY7DAZTpZLsjXXVYXg3dapRM4hh9mu5A7-3hTfSXtAtJK21Tsj8dPl3USmJZkGVbebWNKD2rNOjAYl6HJHYdkNBwNpb3U9aNZvzFNYE6h8tFiSyZzBUGJG4K1dwVwTSYQrCptlLRvLt5dA5i2la5Ruk51Ux0VKQjuxPVbAwuyiuFlNgHfzJ5DoxtgqQf1813gnZRLZ5lAYcD7WT1lpGtiQKug9C4jZrrp-Fd-1-Y1bdzo4dvnZDLz7lPHyj8sOgfg4x84E7RTuEaZt8yRZqtDfgT_rwG2u3Dv_ERPFOQL1Cqu2F5aAClCTgVJkcSrojVWJkgh7SGmYhXcYmczkQRfUU9UZfQ4baRI1miYDl_QqlPg?type=png)](https://mermaid.live/edit#pako:eNq1k01rAjEQhv_KkFNLFe1N9iAUevFSRVl6Cci4Gd1ANtlmsmtF_O_N7iqtHxR76ClhMu87zwyZvcicIpEIpo-KbEavGjceC2lL9EFnukQbIGXygNye5y9TY7DAZTpZLsjXXVYXg3dapRM4hh9mu5A7-3hTfSXtAtJK21Tsj8dPl3USmJZkGVbebWNKD2rNOjAYl6HJHYdkNBwNpb3U9aNZvzFNYE6h8tFiSyZzBUGJG4K1dwVwTSYQrCptlLRvLt5dA5i2la5Ruk51Ux0VKQjuxPVbAwuyiuFlNgHfzJ5DoxtgqQf1813gnZRLZ5lAYcD7WT1lpGtiQKug9C4jZrrp-Fd-1-Y1bdzo4dvnZDLz7lPHyj8sOgfg4x84E7RTuEaZt8yRZqtDfgT_rwG2u3Dv_ERPFOQL1Cqu2F5aAClCTgVJkcSrojVWJkgh7SGmYhXcYmczkQRfUU9UZfQ4baRI1miYDl_QqlPg) From 87177e5bcfa0633d37ca796fbed5b4f7ca3cb0ba Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Sun, 31 Dec 2023 11:27:47 +0100 Subject: [PATCH 026/129] manual merge --- README.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6c7d7e60cd..ea41517b86 100644 --- a/README.md +++ b/README.md @@ -79,11 +79,16 @@ Don't forget to explore our sibling project, [OllamaHub](https://ollamahub.com/) - **Privacy and Data Security:** We prioritize your privacy and data security above all. Please be reassured that all data entered into the Ollama Web UI is stored locally on your device. Our system is designed to be privacy-first, ensuring that no external requests are made, and your data does not leave your local environment. We are committed to maintaining the highest standards of data privacy and security, ensuring that your information remains confidential and under your control. -### Installing Both Ollama and Ollama Web UI Using Docker Compose +### Installing Both Ollama and Ollama Web UI Using Provided run-compose.sh bash script +Also available on Windows under any docker-enabled WSL2 linux distro (you have to enable it from Docker Desktop) -If you don't have Ollama installed yet, you can use the provided bash script for a hassle-free installation. Simply run the following command: +Simply run the following command: +Grant execute permission to script +```bash +chmod +x run-compose.sh +``` -For cpu-only container +For CPU only container ```bash ./run-compose.sh ``` From 7313e4246aa088b72ddb934642252dfd4f104661 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 31 Dec 2023 15:10:33 -0800 Subject: [PATCH 027/129] fix: readme.md formatting --- INSTALLATION.md | 35 +++++++++++ README.md | 126 ++++++++++++++++++++-------------------- docker-compose.api.yaml | 1 + docker-compose.api.yml | 7 --- docker-compose.gpu.yaml | 1 + docker-compose.gpu.yml | 13 ----- 6 files changed, 100 insertions(+), 83 deletions(-) create mode 100644 INSTALLATION.md delete mode 100644 docker-compose.api.yml delete mode 100644 docker-compose.gpu.yml diff --git a/INSTALLATION.md b/INSTALLATION.md new file mode 100644 index 0000000000..4b802c6992 --- /dev/null +++ b/INSTALLATION.md @@ -0,0 +1,35 @@ +### Installing Both Ollama and Ollama Web UI Using Kustomize + +For cpu-only pod + +```bash +kubectl apply -f ./kubernetes/manifest/base +``` + +For gpu-enabled pod + +```bash +kubectl apply -k ./kubernetes/manifest +``` + +### Installing Both Ollama and Ollama Web UI Using Helm + +Package Helm file first + +```bash +helm package ./kubernetes/helm/ +``` + +For cpu-only pod + +```bash +helm install ollama-webui ./ollama-webui-*.tgz +``` + +For gpu-enabled pod + +```bash +helm install ollama-webui ./ollama-webui-*.tgz --set ollama.resources.limits.nvidia.com/gpu="1" +``` + +Check the `kubernetes/helm/values.yaml` file to know which parameters are available for customization diff --git a/README.md b/README.md index ea41517b86..cd65583857 100644 --- a/README.md +++ b/README.md @@ -79,69 +79,6 @@ Don't forget to explore our sibling project, [OllamaHub](https://ollamahub.com/) - **Privacy and Data Security:** We prioritize your privacy and data security above all. Please be reassured that all data entered into the Ollama Web UI is stored locally on your device. Our system is designed to be privacy-first, ensuring that no external requests are made, and your data does not leave your local environment. We are committed to maintaining the highest standards of data privacy and security, ensuring that your information remains confidential and under your control. -### Installing Both Ollama and Ollama Web UI Using Provided run-compose.sh bash script -Also available on Windows under any docker-enabled WSL2 linux distro (you have to enable it from Docker Desktop) - -Simply run the following command: -Grant execute permission to script -```bash -chmod +x run-compose.sh -``` - -For CPU only container -```bash -./run-compose.sh -``` - -For GPU enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)) -Warning! A GPU-enabled installation has only been tested using linux and nvidia GPU, full functionalities are not guaranteed under Windows or Macos or using a different GPU -```bash -./run-compose.sh --enable-gpu -``` - -Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example: -```bash -./run-compose.sh --enable-gpu --build -``` - -### Installing Both Ollama and Ollama Web UI Using Docker Compose -To install using docker compose script as CPU-only installation simply run this command -```bash -docker compose up -d -``` - -for a GPU-enabled installation (provided you installed the necessary gpu drivers and you are using nvidia) -```bash -docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up -d -``` - -### Installing Both Ollama and Ollama Web UI Using Kustomize -For cpu-only pod -```bash -kubectl apply -f ./kubernetes/manifest/base -``` -For gpu-enabled pod -```bash -kubectl apply -k ./kubernetes/manifest -``` - -### Installing Both Ollama and Ollama Web UI Using Helm -Package Helm file first -```bash -helm package ./kubernetes/helm/ -``` - -For cpu-only pod -```bash -helm install ollama-webui ./ollama-webui-*.tgz -``` -For gpu-enabled pod -```bash -helm install ollama-webui ./ollama-webui-*.tgz --set ollama.resources.limits.nvidia.com/gpu="1" -``` - -Check the `kubernetes/helm/values.yaml` file to know which parameters are available for customization - ### Installing Ollama Web UI Only #### Prerequisites @@ -186,6 +123,69 @@ docker build -t ollama-webui . docker run -d -p 3000:8080 -e OLLAMA_API_BASE_URL=https://example.com/api -v ollama-webui:/app/backend/data --name ollama-webui --restart always ollama-webui ``` +### Installing Both Ollama and Ollama Web UI + +#### Using Docker Compose + +If you don't have Ollama installed yet, you can use the provided Docker Compose file for a hassle-free installation. Simply run the following command: + +```bash +docker compose up -d --build +``` + +This command will install both Ollama and Ollama Web UI on your system. + +##### Enable GPU + +Use the additional Docker Compose file designed to enable GPU support by running the following command: + +```bash +docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up -d --build +``` + +##### Expose Ollama API outside the container stack + +Deploy the service with an additional Docker Compose file designed for API exposure: + +```bash +docker compose -f docker-compose.yaml -f docker-compose.api.yaml up -d --build +``` + +#### Using Provided `run-compose.sh` Script (Linux) + +Also available on Windows under any docker-enabled WSL2 linux distro (you have to enable it from Docker Desktop) + +Simply run the following command to grant execute permission to script: + +```bash +chmod +x run-compose.sh +``` + +##### For CPU only container + +```bash +./run-compose.sh +``` + +##### Enable GPU + +For GPU enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)) +Warning! A GPU-enabled installation has only been tested using linux and nvidia GPU, full functionalities are not guaranteed under Windows or Macos or using a different GPU + +```bash +./run-compose.sh --enable-gpu +``` + +Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example: + +```bash +./run-compose.sh --enable-gpu --build +``` + +#### Using Alternative Methods (Kustomize or Helm) + +See [INSTALLATION.md](/INSTALLATION.md) for information on how to install and/or join our [Ollama Web UI Discord community](https://discord.gg/5rJgQTnV4s). + ## How to Install Without Docker While we strongly recommend using our convenient Docker container installation for optimal support, we understand that some situations may require a non-Docker setup, especially for development purposes. Please note that non-Docker installations are not officially supported, and you might need to troubleshoot on your own. diff --git a/docker-compose.api.yaml b/docker-compose.api.yaml index 5e4b222749..f19974e7d7 100644 --- a/docker-compose.api.yaml +++ b/docker-compose.api.yaml @@ -2,5 +2,6 @@ version: '3.8' services: ollama: + # Expose Ollama API outside the container stack ports: - ${OLLAMA_WEBAPI_PORT-11434}:11434 diff --git a/docker-compose.api.yml b/docker-compose.api.yml deleted file mode 100644 index c36cf11e00..0000000000 --- a/docker-compose.api.yml +++ /dev/null @@ -1,7 +0,0 @@ -version: '3.6' - -services: - ollama: - # Expose Ollama API outside the container stack - ports: - - 11434:11434 \ No newline at end of file diff --git a/docker-compose.gpu.yaml b/docker-compose.gpu.yaml index 7df6b91a7b..424f485a1c 100644 --- a/docker-compose.gpu.yaml +++ b/docker-compose.gpu.yaml @@ -2,6 +2,7 @@ version: '3.8' services: ollama: + # GPU support deploy: resources: reservations: diff --git a/docker-compose.gpu.yml b/docker-compose.gpu.yml deleted file mode 100644 index db47ae1360..0000000000 --- a/docker-compose.gpu.yml +++ /dev/null @@ -1,13 +0,0 @@ -version: '3.6' - -services: - ollama: - # GPU support - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: - - gpu From 254c36bea31eca87bf001b7ea8eb806b31923d17 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 31 Dec 2023 23:13:17 -0800 Subject: [PATCH 028/129] feat: download chat as txt file --- src/lib/components/layout/Navbar.svelte | 51 ++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/src/lib/components/layout/Navbar.svelte b/src/lib/components/layout/Navbar.svelte index fb350fb0fa..23f854efa7 100644 --- a/src/lib/components/layout/Navbar.svelte +++ b/src/lib/components/layout/Navbar.svelte @@ -1,7 +1,10 @@