diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml index 821ffb7206..e597ff8055 100644 --- a/.github/workflows/docker-build.yaml +++ b/.github/workflows/docker-build.yaml @@ -419,6 +419,108 @@ jobs: if-no-files-found: error retention-days: 1 + build-slim-image: + runs-on: ${{ matrix.runner }} + permissions: + contents: read + packages: write + strategy: + fail-fast: false + matrix: + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm + + steps: + # GitHub Packages requires the entire repository name to be in lowercase + # although the repository owner has a lowercase username, this prevents some people from running actions after forking + - name: Set repository and image name to lowercase + run: | + echo "IMAGE_NAME=${IMAGE_NAME,,}" >>${GITHUB_ENV} + echo "FULL_IMAGE_NAME=ghcr.io/${IMAGE_NAME,,}" >>${GITHUB_ENV} + env: + IMAGE_NAME: '${{ github.repository }}' + + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata for Docker images (slim tag) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FULL_IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=tag + type=sha,prefix=git- + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=slim + flavor: | + latest=${{ github.ref == 'refs/heads/main' }} + suffix=-slim,onlatest=true + + - name: Extract metadata for Docker cache + id: cache-meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FULL_IMAGE_NAME }} + tags: | + type=ref,event=branch + ${{ github.ref_type == 'tag' && 'type=raw,value=main' || '' }} + flavor: | + prefix=cache-slim-${{ matrix.platform }}- + latest=false + + - name: Build Docker image (slim) + uses: docker/build-push-action@v5 + id: build + with: + context: . + push: true + platforms: ${{ matrix.platform }} + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true + cache-from: type=registry,ref=${{ steps.cache-meta.outputs.tags }} + cache-to: type=registry,ref=${{ steps.cache-meta.outputs.tags }},mode=max + build-args: | + BUILD_HASH=${{ github.sha }} + USE_SLIM=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-slim-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + merge-main-images: runs-on: ubuntu-latest needs: [build-main-image] @@ -640,3 +742,59 @@ jobs: - name: Inspect image run: | docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }} + + merge-slim-images: + runs-on: ubuntu-latest + needs: [build-slim-image] + steps: + # GitHub Packages requires the entire repository name to be in lowercase + # although the repository owner has a lowercase username, this prevents some people from running actions after forking + - name: Set repository and image name to lowercase + run: | + echo "IMAGE_NAME=${IMAGE_NAME,,}" >>${GITHUB_ENV} + echo "FULL_IMAGE_NAME=ghcr.io/${IMAGE_NAME,,}" >>${GITHUB_ENV} + env: + IMAGE_NAME: '${{ github.repository }}' + + - name: Download digests + uses: actions/download-artifact@v4 + with: + pattern: digests-slim-* + path: /tmp/digests + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata for Docker images (default slim tag) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FULL_IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=tag + type=sha,prefix=git- + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=slim + flavor: | + latest=${{ github.ref == 'refs/heads/main' }} + suffix=-slim,onlatest=true + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }} diff --git a/Dockerfile b/Dockerfile index 83a74365f0..0faef51330 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,7 @@ # use build args in the docker build command with --build-arg="BUILDARG=true" ARG USE_CUDA=false ARG USE_OLLAMA=false +ARG USE_SLIM=false # Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default) ARG USE_CUDA_VER=cu128 # any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers @@ -43,6 +44,7 @@ FROM python:3.11-slim-bookworm AS base ARG USE_CUDA ARG USE_OLLAMA ARG USE_CUDA_VER +ARG USE_SLIM ARG USE_EMBEDDING_MODEL ARG USE_RERANKING_MODEL ARG UID @@ -54,6 +56,7 @@ ENV ENV=prod \ # pass build args to the build USE_OLLAMA_DOCKER=${USE_OLLAMA} \ USE_CUDA_DOCKER=${USE_CUDA} \ + USE_SLIM_DOCKER=${USE_SLIM} \ USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \ USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} \ USE_RERANKING_MODEL_DOCKER=${USE_RERANKING_MODEL} @@ -120,6 +123,7 @@ RUN apt-get update && \ COPY --chown=$UID:$GID ./backend/requirements.txt ./requirements.txt RUN pip3 install --no-cache-dir uv && \ + if [ "$USE_SLIM" != "true" ]; then \ if [ "$USE_CUDA" = "true" ]; then \ # If you use CUDA the whisper and embedding model will be downloaded on first use pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \ @@ -134,10 +138,13 @@ RUN pip3 install --no-cache-dir uv && \ python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \ python -c "import os; import tiktoken; tiktoken.get_encoding(os.environ['TIKTOKEN_ENCODING_NAME'])"; \ fi; \ + else \ + uv pip install --system -r requirements.txt --no-cache-dir && \ + fi; \ chown -R $UID:$GID /app/backend/data/ # Install Ollama if requested -RUN if [ "$USE_OLLAMA" = "true" ]; then \ +RUN if [ [ "$USE_OLLAMA" = "true" ] && [ "$USE_SLIM" != "true" ] ]; then \ date +%s > /tmp/ollama_build_hash && \ echo "Cache broken at timestamp: `cat /tmp/ollama_build_hash`" && \ curl -fsSL https://ollama.com/install.sh | sh && \