Skip to content

Commit d2fcb96

Browse files
authored
Merge pull request #76 from stackhpc/cpu-vllm-ci
Add support for vLLM CPU backend and improve CI test coverage
2 parents 0235060 + a508241 commit d2fcb96

File tree

23 files changed

+130
-83
lines changed

23 files changed

+130
-83
lines changed

.github/workflows/test-pr.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ jobs:
6666
with:
6767
cluster_name: ${{ env.CLUSTER_NAME }}
6868

69-
# NOTE(scott): Since the local Chart.yaml uses "appVersion: latest" and this
69+
# NOTE(scott): Since the local Chart.yaml uses "appVersion: latest" and this
7070
# only gets overwritten to the correct commit SHA during Helm chart build,
7171
# we need to pull these published images and load them into the kind cluster
7272
# with the tag correct tag.
Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,24 @@
11
azimuth-llm:
2+
huggingface:
3+
# Use the smallest LLM we can find
4+
model: &model HuggingFaceTB/SmolLM2-135M-Instruct
25
api:
3-
enabled: false
6+
# CI Kind cluster doesn't have kube-prometheus-stack
7+
monitoring:
8+
enabled: false
9+
# No GPUs in CI runners
10+
gpus: 0
411
ui:
512
service:
613
zenith:
714
enabled: false
815
appSettings:
16+
model_name: *model
917
# Verify that we can set non-standard LLM params
1018
llm_params:
1119
max_tokens: 101
1220
temperature: 0.1
21+
top_k: 2
1322
top_p: 0.15
14-
top_k: 1
1523
presence_penalty: 0.9
1624
frequency_penalty: 1
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
azimuth-llm:
2+
huggingface:
3+
# Use the smallest vision model we can find
4+
model: &model HuggingFaceTB/SmolVLM-256M-Instruct
5+
api:
6+
# CI Kind cluster doesn't have kube-prometheus-stack
7+
monitoring:
8+
enabled: false
9+
# No GPUs in CI runners
10+
gpus: 0
11+
ui:
12+
service:
13+
zenith:
14+
enabled: false
15+
appSettings:
16+
model_name: *model
17+
# Verify that we can set non-standard LLM params
18+
llm_params:
19+
max_tokens: 10 # Constrain response tokens to speed up CI test
20+
temperature: 0.1
21+
top_p: 0.15
22+
presence_penalty: 0.9
23+
frequency_penalty: 1

charts/azimuth-image-analysis/ci/ui-only-values.yaml

Lines changed: 0 additions & 15 deletions
This file was deleted.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# This is intended to test the default chart values
2+
# as close as possible given the constraints of running
3+
# inside a Kind cluster within a CI runner
4+
huggingface:
5+
# Use the smallest LLM we can find
6+
model: &model HuggingFaceTB/SmolLM2-135M-Instruct
7+
api:
8+
# CI Kind cluster doesn't have kube-prometheus-stack
9+
monitoring:
10+
enabled: false
11+
# No GPUs in CI runners
12+
gpus: 0
13+
ui:
14+
service:
15+
zenith:
16+
enabled: false
17+
appSettings:
18+
model_name: *model

charts/azimuth-llm/ci/no-api-values.yaml

Lines changed: 0 additions & 6 deletions
This file was deleted.

charts/azimuth-llm/templates/api/deployment.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ spec:
1919
spec:
2020
containers:
2121
- name: {{ .Release.Name }}-api
22-
image: {{ printf "%s:%s" .Values.api.image.repository .Values.api.image.version }}
22+
{{ $imageRepo := .Values.api.image.repository | default (ternary "ghcr.io/stackhpc/vllm-cpu" "vllm-project/vllm" (eq (.Values.api.gpus | int) 0)) -}}
23+
image: {{ printf "%s:%s" $imageRepo .Values.api.image.version }}
2324
ports:
2425
- name: api
2526
containerPort: 8000
@@ -29,7 +30,7 @@ spec:
2930
args:
3031
- --model
3132
- {{ .Values.huggingface.model }}
32-
{{- include "azimuth-llm.chatTemplate" . | nindent 10 }}
33+
{{- include "azimuth-llm.chatTemplate" . | nindent 10 -}}
3334
{{- if .Values.api.modelMaxContextLength -}}
3435
- --max-model-len
3536
- {{ .Values.api.modelMaxContextLength | quote }}
@@ -41,7 +42,7 @@ spec:
4142
{{- if .Values.api.extraArgs -}}
4243
{{- .Values.api.extraArgs | toYaml | nindent 10 }}
4344
{{- end -}}
44-
{{- if .Values.huggingface.secretName }}
45+
{{- if .Values.huggingface.secretName -}}
4546
envFrom:
4647
- secretRef:
4748
name: {{ .Values.huggingface.secretName }}

charts/azimuth-llm/templates/api/ingress.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ spec:
1616
pathType: Prefix
1717
backend:
1818
service:
19-
name: {{ .Values.api.service.name }}
19+
name: {{ .Release.Name }}-api
2020
port:
2121
# Must match Service resource
2222
number: 80

charts/azimuth-llm/templates/api/service.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
apiVersion: v1
33
kind: Service
44
metadata:
5-
name: {{ .Values.api.service.name }}
5+
name: {{ .Release.Name }}-api
66
labels:
77
{{- include "azimuth-llm.api-selectorLabels" . | nindent 4 }}
88
spec:

charts/azimuth-llm/templates/api/zenith-client.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ metadata:
88
spec:
99
reservationName: {{ .Release.Name }}-api
1010
upstream:
11-
serviceName: {{ .Values.api.service.name }}
11+
serviceName: {{ .Release.Name }}-api
1212
auth:
1313
skip: {{ .Values.api.service.zenith.skipAuth }}
1414
{{- end -}}

0 commit comments

Comments
 (0)