Add docker build ci

liang2kl · liang2kl · commit cc275d2ac4a0 · 2026-02-28T13:01:07.000Z
diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
@@ -0,0 +1,90 @@
+name: Build Docker Images
+
+permissions:
+  contents: read
+  packages: write
+
+on:
+  push:
+    branches:
+      - main
+    paths-ignore:
+      - 'assets/'
+      - '**/*.md'
+      - 'LICENSE'
+      - '.gitignore'
+
+concurrency:
+  group: docker-build-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  discover:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Discover Dockerfiles in docker/
+        id: set-matrix
+        shell: bash
+        run: |
+          set -euo pipefail
+          files=(docker/*)
+          include=()
+          for file in "${files[@]}"; do
+            if [[ -f "$file" ]]; then
+              name="$(basename "$file" | tr '.' '-')"
+              filename="$(basename "$file")"
+              if [[ "$filename" == "Dockerfile" ]]; then
+                tag="latest"
+              else
+                tag="${filename#Dockerfile.}"
+              fi
+              include+=("{\"name\":\"$name\",\"file\":\"$file\",\"tag\":\"$tag\"}")
+            fi
+          done
+
+          if [[ ${#include[@]} -eq 0 ]]; then
+            echo "No files found in docker/" >&2
+            exit 1
+          fi
+
+          matrix="{\"include\":[$(IFS=,; echo "${include[*]}")]}"
+          echo "matrix=$matrix" >> "$GITHUB_OUTPUT"
+
+  build:
+    runs-on: ubuntu-latest
+    needs: discover
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJson(needs.discover.outputs.matrix) }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and publish ${{ matrix.file }} (amd64 + arm64)
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ${{ matrix.file }}
+          push: true
+          tags: ghcr.io/${{ github.repository }}:${{ matrix.tag }}
+          platforms: linux/amd64,linux/arm64
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -38,7 +38,7 @@ RUN python3 -m venv /opt/venv
 
 ENV VIRTUAL_ENV=/opt/venv
 ENV PATH="/opt/venv/bin:$PATH"
-ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0;10.0;12.0"
+ENV TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0 12.0"
 
 COPY requirements.txt .
 
diff --git a/docker/Dockerfile.chat-cu130 b/docker/Dockerfile.chat-cu130
@@ -0,0 +1,50 @@
+ARG CUDA_VERSION=13.0.2
+FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
+
+LABEL org.opencontainers.image.source=https://github.com/z-lab/paroquant
+LABEL org.opencontainers.image.description="Container for vLLM inference"
+LABEL org.opencontainers.image.licenses=MIT
+
+ARG PYTHON_VERSION=3.12
+
+ENV DEBIAN_FRONTEND=noninteractive
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    software-properties-common \
+    build-essential \
+    git \
+    curl \
+    ca-certificates \
+    libibverbs-dev \
+    libnuma-dev \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    libgl1 \
+    && add-apt-repository -y ppa:deadsnakes/ppa \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends \
+    python${PYTHON_VERSION} \
+    python${PYTHON_VERSION}-dev \
+    python${PYTHON_VERSION}-venv \
+    python3-pip \
+    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
+    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN python3 -m venv /opt/venv
+
+ENV VIRTUAL_ENV=/opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+ENV TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0 12.0 12.1"
+
+RUN pip install vllm==0.15.1 accelerate --no-cache-dir --extra-index-url https://wheels.vllm.ai/0.15.1/cu130 --extra-index-url https://download.pytorch.org/whl/cu130
+
+COPY ./kernels ./kernels
+RUN pip install ./kernels --no-cache-dir --no-build-isolation
+
+COPY . .
+
+ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas
+ENTRYPOINT ["python", "scripts/interactive_gen.py"]
diff --git a/docker/Dockerfile.eval-reasoning b/docker/Dockerfile.eval-reasoning
@@ -37,7 +37,7 @@ RUN python3 -m venv /opt/venv
 
 ENV VIRTUAL_ENV=/opt/venv
 ENV PATH="/opt/venv/bin:$PATH"
-ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0;10.0;12.0"
+ENV TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0 12.0"
 
 COPY experiments/tasks/reasoning/requirements.txt .
 
diff --git a/kernels/setup.py b/kernels/setup.py
@@ -6,7 +6,7 @@
 import torch
 
 # Supported NVIDIA GPU architectures.
-SUPPORTED_ARCHS = {"8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "12.0"}
+SUPPORTED_ARCHS = {"8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "12.0", "12.1"}
 
 
 def get_torch_arch_list() -> Set[str]: