LCORE-438: Fix imabe building for GPU variant (#32)

matysek · web-flow · commit fa72d05e8f9a · 2025-08-12T17:21:28.000+02:00
* LCORE-438: Fix imabe building for GPU variant

* LCORE-438: Add GH actions for gpu image

* LCORE-438: Cleanup - do not use llama-stack git version

* LCORE-438: Cleanup - update uv.lock

* LCORE-438: Fix installing in image pytorch with gpu support

* LCORE-438: Try building in Gitlab-CI

* LCORE-438: Add detection of GPU Python libs in image building

* LCORE-438: Remove detection of GPU Python libs in image building

* LCORE-438: Rebase from main

* LCORE-438: Enable downloading models again.

* LCORE-438: Fix linters.

* LCORE-438: Fix linters 2.

* LCORE-438: Fix linters 3.

* LCORE-438: Do not build gpu image in a PR. It takes ~20min.
diff --git a/.github/workflows/build_and_push_dev_gpu.yaml b/.github/workflows/build_and_push_dev_gpu.yaml
@@ -0,0 +1,70 @@
+name: Build image, main branch push quay.io
+
+on:
+  push:
+    branches: [ main ]
+
+env:
+  IMAGE_NAME: rag-content-gpu
+  IMAGE_NAMESPACE: ${{ github.repository_owner }}
+  IMAGE_REGISTRY: ghcr.io
+  LATEST_TAG: latest
+  CONTAINER_FILE: Containerfile-gpu
+
+jobs:
+  build-and-push-dev-gpu:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      # Required for image pushing to a registry
+      packages: write
+    steps:
+      - name: Install buildah
+        run: |
+          sudo apt update
+          # qemu is required for arm64 builds
+          sudo apt install -y buildah qemu-user-static
+      - name: Checkout code
+        uses: actions/checkout@v4
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@v1.3.1
+        with:
+          # this might remove tools that are actually needed,
+          # if set to "true" but frees about 6 GB
+          tool-cache: true
+
+      - name: Create dev image tag
+        run: |
+          echo "DEV_TAG=dev-$(date +%Y%m%d)-$(git rev-parse --short HEAD)" >> $GITHUB_ENV
+          echo "dev image tag: ${{ env.DEV_TAG }}"
+      - name: Build image with Buildah
+        id: build_image
+        uses: redhat-actions/buildah-build@v2
+        with:
+          image: ${{ env.IMAGE_NAME }}
+          tags: |
+            ${{ env.DEV_TAG }}
+            ${{ env.LATEST_TAG }}
+          containerfiles: |
+            ${{ env.CONTAINER_FILE }}
+          archs: amd64, arm64
+          oci: true
+      - name: Check images
+        run: |
+          buildah images | grep '${{ env.IMAGE_NAME }}'
+          echo '${{ steps.build_image.outputs.image }}'
+          echo '${{ steps.build_image.outputs.tags }}'
+      - name: Check manifest
+        run: |
+          set -x
+          buildah manifest inspect ${{ steps.build_image.outputs.image }}:${{ env.LATEST_TAG }}
+
+      - name: Push image to Quay.io
+        uses: redhat-actions/push-to-registry@v2
+        if: ${{ github.event_name != 'pull_request' }}
+        with:
+          image: ${{ steps.build_image.outputs.image }}
+          tags: ${{ steps.build_image.outputs.tags }}
+          registry: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_NAMESPACE }}
+          username: ${{ github.actor }}
+          password: ${{ github.token }}
diff --git a/Containerfile-gpu b/Containerfile-gpu
@@ -1,36 +1,53 @@
-ARG FLAVOR=gpu
+# Image with GPU CUDA only backend.
+FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubi9
 
-FROM registry.access.redhat.com/ubi9/python-312 as cpu-base
-ARG FLAVOR
-
-FROM nvcr.io/nvidia/cuda:12.6.2-devel-ubi9 as gpu-base
-ARG FLAVOR
-RUN dnf install -y python3.12 python3.12-pip libcudnn8 libnccl git
+# Install Python
+RUN dnf install -y --nodocs --setopt=keepcache=0 --setopt=tsflags=nodocs \
+    python3.12 python3.12-devel python3.12-pip libcudnn9 libnccl libcusparselt0 && \
+    dnf clean all
 RUN ln -sf /usr/bin/python3.12 /usr/bin/python
-ENV LD_LIBRARY_PATH=/usr/local/cuda-12.6/compat:$LD_LIBRARY_PATH
-
-FROM ${FLAVOR}-base as lightspeed-core-rag-builder
-ARG FLAVOR
+ENV LD_LIBRARY_PATH=/usr/local/cuda-12/compat:$LD_LIBRARY_PATH
 
-USER 0
+# Install asciidoctor
 RUN dnf install -y rubygems && \
     dnf clean all && \
     gem install asciidoctor
+# Install uv package manager
+RUN pip3.12 install uv==0.7.20
 
 WORKDIR /rag-content
-ENV EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
 
-COPY . /rag-content
-RUN make install-global
+COPY Makefile pyproject.toml uv.lock README.md ./
+COPY src ./src
+COPY tests ./tests
+COPY scripts ./scripts
+
+# Configure UV environment variables for optimal performance
+# Pytorch backend - cpu. `uv` contains convenient way to specify the backend.
+ENV UV_COMPILE_BYTECODE=0 \
+    UV_PYTHON_DOWNLOADS=0
+
+# Remove pytorch-cpu dependency from pyproject.toml
+RUN uv venv && uv pip install tomlkit
+RUN uv run python ./scripts/remove_pytorch_cpu_pyproject.py
 
-# Test torch
-RUN if [[ $(echo $LD_LIBRARY_PATH) == *"/usr/local/cuda-12.6/compat"* ]]; then \
-        python -c "import torch; print(torch.version.cuda); print(torch.cuda.is_available());"; \
-    fi
+# Update uv.lock file and install dependencies
+RUN uv lock && uv sync --locked --no-install-project
 
-# # Download embeddings model
+# Then, add the rest of the project source code and install it
+# Installing separately from its dependencies allows optimal layer caching
+RUN uv sync --locked
+
+# Add executables from .venv to system PATH
+ENV PATH="/rag-content/.venv/bin:$PATH"
+
+# Download embeddings model
+ENV EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
 RUN python ./scripts/download_embeddings_model.py \
-        -l ./embeddings_model \
-        -r ${EMBEDDING_MODEL}
+       -l ./embeddings_model \
+       -r ${EMBEDDING_MODEL}
+
+# Reset the entrypoint.
+ENTRYPOINT []
 
 LABEL description="Contains embedding model and dependencies needed to generate a vector database"
diff --git a/pyproject.toml b/pyproject.toml
@@ -46,6 +46,7 @@ dependencies = [
     "llama-stack-client==0.2.16",
     "aiosqlite==0.21.0",
     "sqlite-vec==0.1.6",
+    "tomlkit",
 ]
 requires-python = "==3.12.*"
 dynamic = ["license", "readme"]
@@ -57,9 +58,7 @@ name = "pytorch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
 explicit = true
 [tool.uv.sources]
-torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]
-llama-stack = { git = "https://github.com/meta-llama/llama-stack.git", rev = "20c319795259f097101917d7304e63aeeeb21c5c" }
 
 [dependency-groups]
 dev = [
diff --git a/scripts/remove_pytorch_cpu_pyproject.py b/scripts/remove_pytorch_cpu_pyproject.py
@@ -0,0 +1,51 @@
+"""
+Removes the pytorch-cpu dependency from the pyproject.toml file.
+
+This script removes the pytorch-cpu dependency from the pyproject.toml file.
+It is used to create a container image with GPU CUDA backend.
+
+Usage:
+    python remove_pytorch_cpu_pyproject.py
+
+The script will remove the 'tool.uv.index' and 'tool.uv.sources' sections
+from the pyproject.toml file in the current directory.
+"""
+
+from tomlkit import parse, dumps
+from pathlib import Path
+
+
+def remove_sections(file_path: str, sections_to_remove: list[str]) -> None:
+    """
+    Remove specified sections from a TOML file.
+
+    Args:
+        file_path (str): Path to the TOML file to modify
+        sections_to_remove (list[str]): List of section paths to remove,
+                                      using dot notation (e.g., "tool.uv.index")
+
+    The function parses the TOML file, removes the specified sections,
+    and writes the modified content back to the file.
+    """
+    path = Path(file_path)
+    content = path.read_text(encoding="utf-8")
+    doc = parse(content)
+
+    for section in sections_to_remove:
+        keys = section.split(".")
+        current = doc
+        for key in keys[:-1]:
+            if key not in current:
+                break
+            current = current[key]  # type: ignore
+        else:
+            current.pop(keys[-1], None)
+
+    path.write_text(dumps(doc), encoding="utf-8")
+
+
+if __name__ == "__main__":
+    file_path = "pyproject.toml"
+    print(f"pyproject file path: {file_path}")
+    sections = ["tool.uv.index", "tool.uv.sources"]
+    remove_sections(file_path, sections)
diff --git a/uv.lock b/uv.lock