Add USE_SLIM argument to Dockerfile

This commit is contained in:
Everett Wilber 2025-08-27 18:20:23 -04:00 committed by GitHub
parent 1db8dec4f5
commit d2fdf6999b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -3,6 +3,7 @@
# use build args in the docker build command with --build-arg="BUILDARG=true" # use build args in the docker build command with --build-arg="BUILDARG=true"
ARG USE_CUDA=false ARG USE_CUDA=false
ARG USE_OLLAMA=false ARG USE_OLLAMA=false
ARG USE_SLIM=false
# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default) # Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)
ARG USE_CUDA_VER=cu128 ARG USE_CUDA_VER=cu128
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers # any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
@ -43,6 +44,7 @@ FROM python:3.11-slim-bookworm AS base
ARG USE_CUDA ARG USE_CUDA
ARG USE_OLLAMA ARG USE_OLLAMA
ARG USE_CUDA_VER ARG USE_CUDA_VER
ARG USE_SLIM
ARG USE_EMBEDDING_MODEL ARG USE_EMBEDDING_MODEL
ARG USE_RERANKING_MODEL ARG USE_RERANKING_MODEL
ARG UID ARG UID
@ -54,6 +56,7 @@ ENV ENV=prod \
# pass build args to the build # pass build args to the build
USE_OLLAMA_DOCKER=${USE_OLLAMA} \ USE_OLLAMA_DOCKER=${USE_OLLAMA} \
USE_CUDA_DOCKER=${USE_CUDA} \ USE_CUDA_DOCKER=${USE_CUDA} \
USE_SLIM_DOCKER=${USE_SLIM} \
USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \ USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \
USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} \ USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} \
USE_RERANKING_MODEL_DOCKER=${USE_RERANKING_MODEL} USE_RERANKING_MODEL_DOCKER=${USE_RERANKING_MODEL}
@ -120,6 +123,7 @@ RUN apt-get update && \
COPY --chown=$UID:$GID ./backend/requirements.txt ./requirements.txt COPY --chown=$UID:$GID ./backend/requirements.txt ./requirements.txt
RUN pip3 install --no-cache-dir uv && \ RUN pip3 install --no-cache-dir uv && \
if [ "$USE_SLIM" != "true" ]; then \
if [ "$USE_CUDA" = "true" ]; then \ if [ "$USE_CUDA" = "true" ]; then \
# If you use CUDA the whisper and embedding model will be downloaded on first use # If you use CUDA the whisper and embedding model will be downloaded on first use
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
@ -134,10 +138,13 @@ RUN pip3 install --no-cache-dir uv && \
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \ python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \
python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \ python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \
fi; \ fi; \
else \
uv pip install --system -r requirements.txt --no-cache-dir && \
fi; \
chown -R $UID:$GID /app/backend/data/ chown -R $UID:$GID /app/backend/data/
# Install Ollama if requested # Install Ollama if requested
RUN if [ "$USE_OLLAMA" = "true" ]; then \ RUN if [ [ "$USE_OLLAMA" = "true" ] && [ "$USE_SLIM" != "true" ] ]; then \
date +%s > /tmp/ollama_build_hash && \ date +%s > /tmp/ollama_build_hash && \
echo "Cache broken at timestamp: `cat /tmp/ollama_build_hash`" && \ echo "Cache broken at timestamp: `cat /tmp/ollama_build_hash`" && \
curl -fsSL https://ollama.com/install.sh | sh && \ curl -fsSL https://ollama.com/install.sh | sh && \