Skip to content

Commit f4d8e38

Browse files
committed
Added Intel XPU support
1 parent 4ce5dfe commit f4d8e38

File tree

3 files changed

+53
-2
lines changed

3 files changed

+53
-2
lines changed
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
name: Publish vLLM XPU images
2+
3+
on:
4+
# NOTE(sd109): Since this is checking out an external
5+
# it's probably safer to leave this as workflow dispatch
6+
# only so that we can manually build images from specific
7+
# refs rather than automatically pulling in the latest
8+
# content from the remote repo.
9+
workflow_dispatch:
10+
inputs:
11+
vllm_ref:
12+
type: string
13+
description: The vLLM GitHub ref (tag, branch or commit) to build.
14+
required: true
15+
16+
jobs:
17+
build_push_xpu_image:
18+
name: Build and push image
19+
runs-on: ubuntu-latest
20+
permissions:
21+
contents: read
22+
id-token: write # needed for signing the images with GitHub OIDC Token
23+
packages: write # required for pushing container images
24+
security-events: write # required for pushing SARIF files
25+
steps:
26+
- name: Check out the vLLM repository
27+
uses: actions/checkout@v4
28+
with:
29+
repository: vllm-project/vllm
30+
ref: ${{ inputs.vllm_ref }}
31+
32+
- name: Login to GitHub Container Registry
33+
uses: docker/login-action@v3
34+
with:
35+
registry: ghcr.io
36+
username: ${{ github.actor }}
37+
password: ${{ secrets.GITHUB_TOKEN }}
38+
39+
- name: Build and push image
40+
run: |
41+
IMAGE=ghcr.io/stackhpc/vllm-xpu:${{ inputs.vllm_ref }}
42+
docker build -f Dockerfile.xpu -t $IMAGE --shm-size=4g .
43+
docker push $IMAGE

charts/azimuth-llm/templates/api/deployment.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ spec:
1919
spec:
2020
containers:
2121
- name: {{ .Release.Name }}-api
22-
{{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm/vllm-openai" (eq (.Values.api.gpus | int) 0)) -}}
22+
{{ $gpuChart := ternary "ghrc.io/stackhpc/vllm-xpu" "vllm/vllm-openai" .Values.api.intelXPUsEnabled -}}
23+
{{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" $gpuChart (eq (.Values.api.gpus | int) 0)) -}}
2324
image: {{ printf "%s:%s" $imageRepo .Values.api.image.version }}
2425
ports:
2526
- name: api
@@ -61,7 +62,11 @@ spec:
6162
periodSeconds: 10
6263
resources:
6364
limits:
65+
{{- if .Values.api.intelXPUsEnabled }}
66+
gpu.intel.com/i915: {{ .Values.api.gpus | int }}
67+
{{- else }}
6468
nvidia.com/gpu: {{ .Values.api.gpus | int }}
69+
{{- end }}
6570
volumes:
6671
- name: data
6772
{{- .Values.api.cacheVolume | toYaml | nindent 10 }}

charts/azimuth-llm/values.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ api:
3333
enabled: true
3434
# Container image config
3535
image:
36-
# Defaults to vllm/vllm-openai when api.gpus > 0
36+
# Defaults to vllm/vllm-openai when api.gpus > 0,
37+
# ghrc.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true,
3738
# or ghrc.io/stackhpc/vllm-cpu when api.gpus == 0
3839
repository:
3940
version: v0.8.5.post1
@@ -80,6 +81,8 @@ api:
8081
# distributed / multi-GPU support should be available, though it
8182
# has not been tested against this app.
8283
gpus: 1
84+
# Whether pods should request Intel GPUs as opposed to the default Nvidia GPUs
85+
intelXPUsEnabled: false
8386
# The update strategy to use for the deployment
8487
# See https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#updating-a-deployment
8588
# NOTE: The following RollingUpdate strategy offers a zero-downtime update but requires additional GPU worker nodes.

0 commit comments

Comments
 (0)