Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions .github/workflows/build-push-vllm-xpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Publish vLLM XPU images

on:
# NOTE(sd109): Since this is checking out an external
# it's probably safer to leave this as workflow dispatch
# only so that we can manually build images from specific
# refs rather than automatically pulling in the latest
# content from the remote repo.
workflow_dispatch:
inputs:
vllm_ref:
type: string
description: The vLLM GitHub ref (tag, branch or commit) to build.
required: true

jobs:
build_push_xpu_image:
name: Build and push image
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write # needed for signing the images with GitHub OIDC Token
packages: write # required for pushing container images
security-events: write # required for pushing SARIF files
steps:
- name: Check out the vLLM repository
uses: actions/checkout@v4
with:
repository: vllm-project/vllm
ref: ${{ inputs.vllm_ref }}

- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push image
run: |
IMAGE=ghcr.io/stackhpc/vllm-xpu:${{ inputs.vllm_ref }}
docker build -f docker/Dockerfile.xpu -t $IMAGE --shm-size=4g .
docker push $IMAGE
13 changes: 11 additions & 2 deletions charts/azimuth-llm/templates/api/deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@ spec:
spec:
containers:
- name: {{ .Release.Name }}-api
{{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm/vllm-openai" (eq (.Values.api.gpus | int) 0)) -}}
image: {{ printf "%s:%s" $imageRepo .Values.api.image.version }}
{{- if eq (.Values.api.gpus | int) 0 }}
image: "ghcr.io/stackhpc/vllm-cpu:{{ .Values.api.image.version }}"
{{- else if .Values.api.intelXPUsEnabled }}
image: "ghcr.io/stackhpc/vllm-xpu:{{ .Values.api.image.version }}"
{{- else }}
image: "vllm/vllm-openai:{{ .Values.api.image.version }}"
{{- end }}
ports:
- name: api
containerPort: 8000
Expand Down Expand Up @@ -61,7 +66,11 @@ spec:
periodSeconds: 10
resources:
limits:
{{- if .Values.api.intelXPUsEnabled }}
gpu.intel.com/i915: {{ .Values.api.gpus | int }}
{{- else }}
nvidia.com/gpu: {{ .Values.api.gpus | int }}
{{- end }}
volumes:
- name: data
{{- .Values.api.cacheVolume | toYaml | nindent 10 }}
Expand Down
7 changes: 5 additions & 2 deletions charts/azimuth-llm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ api:
enabled: true
# Container image config
image:
# Defaults to vllm/vllm-openai when api.gpus > 0
# or ghrc.io/stackhpc/vllm-cpu when api.gpus == 0
# Defaults to vllm/vllm-openai when api.gpus > 0,
# ghcr.io/stackhpc/vllm-xpu when api.gpus > 0 and intelXPUsEnabled is true,
# or ghcr.io/stackhpc/vllm-cpu when api.gpus == 0
repository:
version: v0.8.5.post1
monitoring:
Expand Down Expand Up @@ -80,6 +81,8 @@ api:
# distributed / multi-GPU support should be available, though it
# has not been tested against this app.
gpus: 1
# Whether pods should request Intel GPUs as opposed to the default Nvidia GPUs
intelXPUsEnabled: false
# The update strategy to use for the deployment
# See https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#updating-a-deployment
# NOTE: The following RollingUpdate strategy offers a zero-downtime update but requires additional GPU worker nodes.
Expand Down