Skip to content

Commit 62cf67b

Browse files
HanFaruizhang0101
andauthored
[Feat] Add per-model runtimeClassName configuration support (#755)
Signed-off-by: Fang Han <fhan0520@gmail.com> Co-authored-by: Rui Zhang <51696593+zerofishnoodles@users.noreply.github.com>
1 parent 595c713 commit 62cf67b

File tree

6 files changed

+151
-6
lines changed

6 files changed

+151
-6
lines changed

helm/templates/deployment-vllm-multi.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -465,8 +465,8 @@ spec:
465465
{{- end }}
466466
{{- end }}
467467

468-
{{- if .Values.servingEngineSpec.runtimeClassName }}
469-
runtimeClassName: {{ .Values.servingEngineSpec.runtimeClassName }}
468+
{{- with (ternary $modelSpec.runtimeClassName .Values.servingEngineSpec.runtimeClassName (hasKey $modelSpec "runtimeClassName")) }}
469+
runtimeClassName: {{ . }}
470470
{{- end }}
471471
{{- if .Values.servingEngineSpec.schedulerName }}
472472
schedulerName: {{ .Values.servingEngineSpec.schedulerName }}

helm/templates/ray-cluster.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,8 @@ spec:
231231
{{- toYaml . | nindent 10 }}
232232
{{- end }}
233233
{{- end }}
234-
{{- if .Values.servingEngineSpec.runtimeClassName }}
235-
runtimeClassName: {{ .Values.servingEngineSpec.runtimeClassName }}
234+
{{- with (ternary $modelSpec.runtimeClassName .Values.servingEngineSpec.runtimeClassName (hasKey $modelSpec "runtimeClassName")) }}
235+
runtimeClassName: {{ . }}
236236
{{- end }}
237237
{{- if .Values.servingEngineSpec.schedulerName }}
238238
schedulerName: {{ .Values.servingEngineSpec.schedulerName }}
@@ -441,8 +441,8 @@ spec:
441441
{{- end }}
442442
{{- end }}
443443

444-
{{- if .Values.servingEngineSpec.runtimeClassName }}
445-
runtimeClassName: {{ .Values.servingEngineSpec.runtimeClassName }}
444+
{{- with (ternary $modelSpec.runtimeClassName .Values.servingEngineSpec.runtimeClassName (hasKey $modelSpec "runtimeClassName")) }}
445+
runtimeClassName: {{ . }}
446446
{{- end }}
447447
{{- if .Values.servingEngineSpec.schedulerName }}
448448
schedulerName: {{ .Values.servingEngineSpec.schedulerName }}
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
suite: test runtimeClassName configuration
2+
templates:
3+
- deployment-vllm-multi.yaml
4+
- ray-cluster.yaml
5+
tests:
6+
- it: should use global runtimeClassName when no model override is set
7+
set:
8+
servingEngineSpec:
9+
enableEngine: true
10+
runtimeClassName: "nvidia"
11+
modelSpec:
12+
- name: "test-model"
13+
repository: "vllm/vllm-openai"
14+
tag: "latest"
15+
modelURL: "facebook/opt-125m"
16+
replicaCount: 1
17+
requestCPU: 1
18+
requestMemory: "1Gi"
19+
requestGPU: 1
20+
asserts:
21+
- template: deployment-vllm-multi.yaml
22+
equal:
23+
path: spec.template.spec.runtimeClassName
24+
value: "nvidia"
25+
26+
- it: should use model-specific runtimeClassName when set
27+
set:
28+
servingEngineSpec:
29+
enableEngine: true
30+
runtimeClassName: "nvidia"
31+
modelSpec:
32+
- name: "test-model-custom"
33+
repository: "vllm/vllm-openai"
34+
tag: "latest"
35+
modelURL: "facebook/opt-125m"
36+
runtimeClassName: "custom-runtime"
37+
replicaCount: 1
38+
requestCPU: 1
39+
requestMemory: "1Gi"
40+
requestGPU: 1
41+
asserts:
42+
- template: deployment-vllm-multi.yaml
43+
equal:
44+
path: spec.template.spec.runtimeClassName
45+
value: "custom-runtime"
46+
47+
- it: should use model-specific runtimeClassName in Ray cluster head and worker nodes
48+
set:
49+
servingEngineSpec:
50+
enableEngine: true
51+
runtimeClassName: "nvidia"
52+
modelSpec:
53+
- name: "ray-model-custom"
54+
repository: "vllm/vllm-openai"
55+
tag: "latest"
56+
modelURL: "facebook/opt-125m"
57+
runtimeClassName: "custom-ray"
58+
replicaCount: 2
59+
requestCPU: 1
60+
requestMemory: "1Gi"
61+
requestGPU: 1
62+
raySpec:
63+
enabled: true
64+
headNode:
65+
requestCPU: 1
66+
requestMemory: "1Gi"
67+
requestGPU: 1
68+
asserts:
69+
- template: ray-cluster.yaml
70+
documentIndex: 0
71+
equal:
72+
path: spec.headGroupSpec.template.spec.runtimeClassName
73+
value: "custom-ray"
74+
- template: ray-cluster.yaml
75+
documentIndex: 0
76+
equal:
77+
path: spec.workerGroupSpecs[0].template.spec.runtimeClassName
78+
value: "custom-ray"
79+
80+
- it: should default to nvidia if runtimeClassName is not provided
81+
set:
82+
servingEngineSpec:
83+
enableEngine: true
84+
modelSpec:
85+
- name: "test-model-no-runtime"
86+
repository: "vllm/vllm-openai"
87+
tag: "latest"
88+
modelURL: "facebook/opt-125m"
89+
replicaCount: 1
90+
requestCPU: 1
91+
requestMemory: "1Gi"
92+
requestGPU: 1
93+
asserts:
94+
- template: deployment-vllm-multi.yaml
95+
equal:
96+
path: spec.template.spec.runtimeClassName
97+
value: "nvidia"
98+
99+
- it: should use model-specific runtimeClassName when no global is set
100+
set:
101+
servingEngineSpec:
102+
enableEngine: true
103+
modelSpec:
104+
- name: "test-model-only-model-runtime"
105+
repository: "vllm/vllm-openai"
106+
tag: "latest"
107+
modelURL: "facebook/opt-125m"
108+
runtimeClassName: "model-only-runtime"
109+
replicaCount: 1
110+
requestCPU: 1
111+
requestMemory: "1Gi"
112+
requestGPU: 1
113+
asserts:
114+
- template: deployment-vllm-multi.yaml
115+
equal:
116+
path: spec.template.spec.runtimeClassName
117+
value: "model-only-runtime"
118+
119+
- it: should not set runtimeClassName when global is explicitly set to empty string
120+
set:
121+
servingEngineSpec:
122+
enableEngine: true
123+
runtimeClassName: ""
124+
modelSpec:
125+
- name: "test-model-empty-runtime"
126+
repository: "vllm/vllm-openai"
127+
tag: "latest"
128+
modelURL: "facebook/opt-125m"
129+
replicaCount: 1
130+
requestCPU: 1
131+
requestMemory: "1Gi"
132+
requestGPU: 1
133+
asserts:
134+
- template: deployment-vllm-multi.yaml
135+
notExists:
136+
path: spec.template.spec.runtimeClassName

helm/values-example.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
servingEngineSpec:
2+
# Global runtime class for all models (can be overridden per model)
3+
runtimeClassName: "nvidia"
4+
25
modelSpec:
36
- name: "opt125m"
47
repository: "lmcache/vllm-openai"
58
tag: "latest"
69
modelURL: "facebook/opt-125m"
10+
# Override global runtimeClassName for this specific model
11+
runtimeClassName: "custom-runtime"
712

813
replicaCount: 1
914

helm/values.schema.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@
109109
"priorityClassName": {
110110
"type": "string"
111111
},
112+
"runtimeClassName": {
113+
"type": "string"
114+
},
112115
"pvcStorage": {
113116
"type": "string"
114117
},

helm/values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ servingEngineSpec:
2121
# - annotations: (Optional, map) The annotations to add to the deployment, e.g., {model: "opt125m"}
2222
# - serviceAccountName: (Optional, string) The name of the service account to use for the deployment, e.g., "vllm-service-account"
2323
# - priorityClassName: (Optional, string) The name of the priority class name for the deployment, e.g., "high-priority"
24+
# - runtimeClassName: (Optional, string) Runtime class for the pod, e.g., "nvidia". If not specified, falls back to servingEngineSpec.runtimeClassName
2425
# - podAnnotations: (Optional, map) The annotations to add to the pod, e.g., {model: "opt125m"}
2526
# - name: (string) The name of the model, e.g., "example-model"
2627
# - repository: (string) The repository of the model, e.g., "vllm/vllm-openai"

0 commit comments

Comments
 (0)