Skip to content

Commit 6e3c06f

Browse files
Bugfix/482 helm rayspec fix (#483)
* [bugfix] Bugfixed raySpec preventing multiple deployments specified with modelSpecs. Signed-off-by: ahinsutime <ahinsutime@gmail.com> * [Doc] Updated tutorial document and values for raySpec. Signed-off-by: ahinsutime <ahinsutime@gmail.com> * [Bugfix] Updated helm chart version to sync with changes due to bugfix. Signed-off-by: ahinsutime <ahinsutime@gmail.com> * [Doc] Added guideline to deploy both ray cluster and deployments. Fixed typos. Added more example values. Signed-off-by: ahinsutime <ahinsutime@gmail.com> * [Bugfix] Fixed configmap conflicts by distinguishing configmap names. Signed-off-by: ahinsutime <ahinsutime@gmail.com> --------- Signed-off-by: ahinsutime <ahinsutime@gmail.com> Co-authored-by: Yuhan Liu <32589867+YuhanLiu11@users.noreply.github.com>
1 parent d05a4ce commit 6e3c06f

7 files changed

+78
-33
lines changed

helm/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 0.1.3
18+
version: 0.1.4
1919

2020
maintainers:
2121
- name: apostac

helm/templates/deployment-vllm-multi.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
{{- if and .Values.servingEngineSpec.enableEngine (not (hasKey .Values.servingEngineSpec "raySpec")) -}}
1+
{{- if .Values.servingEngineSpec.enableEngine -}}
22
{{- range $modelSpec := .Values.servingEngineSpec.modelSpec }}
3+
{{- if not (hasKey $modelSpec "raySpec") }}
34
{{- $kv_role := "kv_both" }}
45
{{- $kv_rank := 0 }}
56
{{- $kv_parallel_size := 1 }}
@@ -411,3 +412,4 @@ data:
411412
---
412413
{{- end }}
413414
{{- end }}
415+
{{- end }}

helm/templates/ray-cluster.yaml

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
{{- if and .Values.servingEngineSpec.enableEngine (hasKey .Values.servingEngineSpec "raySpec")}}
1+
{{- if .Values.servingEngineSpec.enableEngine }}
22
{{- range $modelSpec := .Values.servingEngineSpec.modelSpec }}
3+
{{- if (hasKey $modelSpec "raySpec") }}
34
{{- with $ -}}
45
apiVersion: ray.io/v1
56
kind: RayCluster
@@ -154,10 +155,10 @@ spec:
154155
command: ["/bin/bash", "-c", "echo TBD"]
155156
resources:
156157
limits:
157-
cpu: {{ default "2" .Values.servingEngineSpec.raySpec.headNode.requestCPU }}
158-
memory: {{ default "8Gi" .Values.servingEngineSpec.raySpec.headNode.requestMemory }}
159-
{{- if hasKey .Values.servingEngineSpec.raySpec.headNode "requestGPU" }}
160-
nvidia.com/gpu: {{ .Values.servingEngineSpec.raySpec.headNode.requestGPU }}
158+
cpu: {{ default "2" $modelSpec.raySpec.headNode.requestCPU }}
159+
memory: {{ default "8Gi" $modelSpec.raySpec.headNode.requestMemory }}
160+
{{- if hasKey $modelSpec.raySpec.headNode "requestGPU" }}
161+
nvidia.com/gpu: {{ $modelSpec.raySpec.headNode.requestGPU }}
161162
{{- end }}
162163
startupProbe:
163164
exec:
@@ -192,10 +193,10 @@ spec:
192193
volumes:
193194
- name: wait-script
194195
configMap:
195-
name: wait-for-ray-script
196+
name: "{{$modelSpec.name}}-wait-for-ray-script"
196197
- name: vllm-script
197198
configMap:
198-
name: vllm-start-script
199+
name: "{{$modelSpec.name}}-vllm-start-script"
199200
{{- if or (hasKey $modelSpec "pvcStorage") (and $modelSpec.vllmConfig (hasKey $modelSpec.vllmConfig "tensorParallelSize")) (hasKey $modelSpec "chatTemplate") (hasKey $modelSpec "extraVolumes") }}
200201
{{- if hasKey $modelSpec "pvcStorage" }}
201202
- name: {{ .Release.Name }}-storage
@@ -400,10 +401,10 @@ spec:
400401
volumes:
401402
- name: wait-script
402403
configMap:
403-
name: wait-for-ray-script
404+
name: "{{$modelSpec.name}}-wait-for-ray-script"
404405
- name: vllm-script
405406
configMap:
406-
name: vllm-start-script
407+
name: "{{$modelSpec.name}}-vllm-start-script"
407408
{{- if or (hasKey $modelSpec "pvcStorage") (and $modelSpec.vllmConfig (hasKey $modelSpec.vllmConfig "tensorParallelSize")) (hasKey $modelSpec "chatTemplate") (hasKey $modelSpec "extraVolumes") }}
408409
{{- if hasKey $modelSpec "pvcStorage" }}
409410
- name: {{ .Release.Name }}-storage
@@ -466,7 +467,7 @@ spec:
466467
apiVersion: v1
467468
kind: ConfigMap
468469
metadata:
469-
name: wait-for-ray-script
470+
name: "{{$modelSpec.name}}-wait-for-ray-script"
470471
data:
471472
wait_for_ray.py: |
472473
import ray
@@ -499,7 +500,7 @@ data:
499500
apiVersion: v1
500501
kind: ConfigMap
501502
metadata:
502-
name: vllm-start-script
503+
name: "{{$modelSpec.name}}-vllm-start-script"
503504
data:
504505
vllm-entrypoint.sh: |
505506
#!/bin/bash
@@ -618,3 +619,4 @@ data:
618619
---
619620
{{- end }}
620621
{{- end }}
622+
{{- end }}

tutorials/00-a-install-multinode-kubernetes-env.md

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,9 @@ Before you begin, ensure the following:
107107
```
108108

109109
4. **Explanation:**
110-
- Downloads, installs and configures v1.32 version of cri-o container runtime for your Kubernetes cluster.
111-
112-
5. **Explanation:**
113110
This script downloads v1.32 version of [`cri-0`](https://github.com/cri-o/packaging/blob/main/README.md#distributions-using-deb-packages), one of container runtimes for Kubernetes for managing pods on your cluster.
114111

115-
6. Repeat steps 1 to 2 on your other bare-metal server, which will serve as a worker node.
112+
5. Repeat steps 1 to 2 on your other bare-metal server, which will serve as a worker node.
116113

117114
### Step 3: Setting up a control plane node
118115

tutorials/15-basic-pipeline-parallel.md

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@ This tutorial provides a step-by-step guide for configuring and deploying the vL
3232

3333
## Step 2: Preparing the Configuration File
3434

35-
1. Locate the example configuration file [`tutorials/assets/values-15-minimal-pipeline-parallel-example.yaml`](assets/values-15-minimal-pipeline-parallel-example.yaml).
35+
1. Locate the example configuration file [`tutorials/assets/values-15-a-minimal-pipeline-parallel-example-raycluster.yaml`](assets/values-15-a-minimal-pipeline-parallel-example-raycluster.yaml).
3636

3737
2. Open the file and update the following fields:
3838

3939
- Write your actual huggingface token in `hf_token: <YOUR HF TOKEN>` in the yaml file.
4040

41-
### Explanation of Key Items in `values-15-minimal-pipeline-parallel-example.yaml`
41+
### Explanation of Key Items in `values-15-a-minimal-pipeline-parallel-example-raycluster.yaml`
4242

4343
- **`raySpec`**: Required when using KubeRay to enable pipeline parallelism.
4444
- **`headNode`**: Specifies the resource requirements for the Kuberay head node and must be defined accordingly:
@@ -74,11 +74,6 @@ In the following example, we configure a total of two Ray nodes each equipped wi
7474
```yaml
7575
servingEngineSpec:
7676
runtimeClassName: ""
77-
raySpec:
78-
headNode:
79-
requestCPU: 2
80-
requestMemory: "20Gi"
81-
requestGPU: 2
8277
modelSpec:
8378
- name: "distilgpt2"
8479
repository: "vllm/vllm-openai"
@@ -97,6 +92,12 @@ servingEngineSpec:
9792

9893
shmSize: "20Gi"
9994

95+
raySpec:
96+
headNode:
97+
requestCPU: 2
98+
requestMemory: "20Gi"
99+
requestGPU: 2
100+
100101
hf_token: <YOUR HF TOKEN>
101102
```
102103
@@ -307,3 +308,5 @@ TEST SUITE: None
307308
## Conclusion
308309

309310
In this tutorial, you configured and deployed the vLLM serving engine with support for pipeline parallelism across multiple GPUs within a multi-node Kubernetes environment using KubeRay. Additionally, you learned how to verify the deployment and monitor the associated pods to ensure proper operation. For further customization and configuration options, please consult the `values.yaml` file and the Helm chart documentation.
311+
312+
To deploy both a Ray cluster and standard Kubernetes deployments using a single Helm release, please refer to the example configuration file available at [`tutorials/assets/values-15-b-minimal-pipeline-parallel-example-multiple-modelspec.yaml`](assets/values-15-b-minimal-pipeline-parallel-example-multiple-modelspec.yaml).

tutorials/assets/values-15-minimal-pipeline-parallel-example.yaml renamed to tutorials/assets/values-15-a-minimal-pipeline-parallel-example-raycluster.yaml

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
11
servingEngineSpec:
22
runtimeClassName: ""
3-
raySpec:
4-
headNode:
5-
requestCPU: 2
6-
requestMemory: "20Gi"
7-
requestGPU: 2
83
modelSpec:
9-
- name: "distilgpt2"
4+
- name: "distilgpt2-raycluster"
105
repository: "vllm/vllm-openai"
116
tag: "latest"
127
modelURL: "distilbert/distilgpt2"
138

149
replicaCount: 1
1510

16-
requestCPU: 2
11+
requestCPU: 1
1712
requestMemory: "20Gi"
18-
requestGPU: 2
13+
requestGPU: 1
1914

2015
vllmConfig:
21-
tensorParallelSize: 2
16+
tensorParallelSize: 1
2217
pipelineParallelSize: 2
2318

2419
shmSize: "20Gi"
20+
21+
raySpec:
22+
headNode:
23+
requestCPU: 1
24+
requestMemory: "20Gi"
25+
requestGPU: 1
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
servingEngineSpec:
2+
runtimeClassName: ""
3+
modelSpec:
4+
- name: "distilgpt2-raycluster"
5+
repository: "vllm/vllm-openai"
6+
tag: "latest"
7+
modelURL: "distilbert/distilgpt2"
8+
9+
replicaCount: 1
10+
11+
requestCPU: 1
12+
requestMemory: "20Gi"
13+
requestGPU: 1
14+
15+
vllmConfig:
16+
tensorParallelSize: 1
17+
pipelineParallelSize: 2
18+
19+
shmSize: "20Gi"
20+
21+
raySpec:
22+
headNode:
23+
requestCPU: 1
24+
requestMemory: "20Gi"
25+
requestGPU: 1
26+
- name: "opt125m-deployment"
27+
repository: "vllm/vllm-openai"
28+
tag: "latest"
29+
modelURL: "facebook/opt-125m"
30+
31+
replicaCount: 1
32+
33+
requestCPU: 1
34+
requestMemory: "20Gi"
35+
requestGPU: 1
36+
37+
vllmConfig:
38+
tensorParallelSize: 1
39+
40+
shmSize: "20Gi"

0 commit comments

Comments
 (0)