Added Intel XPU support

wtripp180901 · wtripp180901 · commit f4d8e385a4dd · 2025-05-09T11:10:57.000+01:00
diff --git a/.github/workflows/build-push-vllm-xpu.yml b/.github/workflows/build-push-vllm-xpu.yml
@@ -0,0 +1,43 @@
+name: Publish vLLM XPU images
+
+on:
+  # NOTE(sd109): Since this is checking out an external
+  # it's probably safer to leave this as workflow dispatch
+  # only so that we can manually build images from specific
+  # refs rather than automatically pulling in the latest
+  # content from the remote repo.
+  workflow_dispatch:
+    inputs:
+      vllm_ref:
+        type: string
+        description: The vLLM GitHub ref (tag, branch or commit) to build.
+        required: true
+
+jobs:
+  build_push_xpu_image:
+    name: Build and push image
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write         # needed for signing the images with GitHub OIDC Token
+      packages: write         # required for pushing container images
+      security-events: write  # required for pushing SARIF files
+    steps:
+      - name: Check out the vLLM repository
+        uses: actions/checkout@v4
+        with:
+          repository: vllm-project/vllm
+          ref: ${{ inputs.vllm_ref }}
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push image
+        run: |
+          IMAGE=ghcr.io/stackhpc/vllm-xpu:${{ inputs.vllm_ref }}
+          docker build -f Dockerfile.xpu -t $IMAGE --shm-size=4g .
+          docker push $IMAGE
diff --git a/charts/azimuth-llm/templates/api/deployment.yml b/charts/azimuth-llm/templates/api/deployment.yml
@@ -19,7 +19,8 @@ spec:
     spec:
       containers:
       - name: {{ .Release.Name }}-api
-        {{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm/vllm-openai" (eq (.Values.api.gpus | int) 0)) -}}
+        {{ $gpuChart := ternary "ghrc.io/stackhpc/vllm-xpu" "vllm/vllm-openai" .Values.api.intelXPUsEnabled -}}
+        {{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" $gpuChart (eq (.Values.api.gpus | int) 0)) -}}
         image: {{ printf "%s:%s" $imageRepo .Values.api.image.version }}
         ports:
         - name: api
@@ -61,7 +62,11 @@ spec:
           periodSeconds: 10
         resources:
           limits:
+            {{- if .Values.api.intelXPUsEnabled }}
+            gpu.intel.com/i915: {{ .Values.api.gpus | int }}
+            {{- else }}
             nvidia.com/gpu: {{ .Values.api.gpus | int }}
+            {{- end }}
       volumes:
         - name: data
           {{- .Values.api.cacheVolume | toYaml | nindent 10 }}
diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml
@@ -33,7 +33,8 @@ api:
   enabled: true
   # Container image config
   image:
-    # Defaults to vllm/vllm-openai when api.gpus > 0
+    # Defaults to vllm/vllm-openai when api.gpus > 0,
+    # ghrc.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true,
     # or ghrc.io/stackhpc/vllm-cpu when api.gpus == 0
     repository:
     version: v0.8.5.post1
@@ -80,6 +81,8 @@ api:
   # distributed / multi-GPU support should be available, though it
   # has not been tested against this app.
   gpus: 1
+  # Whether pods should request Intel GPUs as opposed to the default Nvidia GPUs
+  intelXPUsEnabled: false
   # The update strategy to use for the deployment
   # See https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#updating-a-deployment
   # NOTE: The following RollingUpdate strategy offers a zero-downtime update but requires additional GPU worker nodes.