Skip to content

Commit 3396b0b

Browse files
feat(helm) add PDB per deployment (#851)
* feat(helm) add pdb and expose various options in the values. Add tests Signed-off-by: enneitex <etienne.divet@gmail.com> * feat(helm) update README and json schema with new fields Signed-off-by: enneitex <etienne.divet@gmail.com> --------- Signed-off-by: enneitex <etienne.divet@gmail.com> Co-authored-by: Rui Zhang <51696593+ruizhang0101@users.noreply.github.com>
1 parent 62e8137 commit 3396b0b

14 files changed

+821
-32
lines changed

.github/values-01-2pods-minimal-example.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@ servingEngineSpec:
99
modelURL: "facebook/opt-125m"
1010

1111
replicaCount: 2
12-
12+
pdb:
13+
enabled: true
14+
minAvailable: 1
15+
labels:
16+
model: "opt125m"
17+
annotations:
18+
model: "opt125m"
1319
requestCPU: 6
1420
requestMemory: "16Gi"
1521
requestGPU: 0.5

.github/values-06-session-routing.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ servingEngineSpec:
88
tag: "v0.3.9post2"
99
modelURL: "facebook/opt-125m"
1010
replicaCount: 2
11+
pdb:
12+
enabled: true
13+
minAvailable: 1
1114
requestCPU: 6
1215
requestMemory: "30Gi"
1316
requestGPU: 1

helm/README.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,11 @@ This table documents all available configuration values for the Production Stack
8787
| `servingEngineSpec.modelSpec[].modelURL` | string | `""` | The URL of the model, e.g., "facebook/opt-125m" |
8888
| `servingEngineSpec.modelSpec[].chatTemplate` | string | `null` | (Optional) Chat template (Jinja2) specifying tokenizer configuration |
8989
| `servingEngineSpec.modelSpec[].replicaCount` | integer | `1` | The number of replicas for the model |
90+
| `servingEngineSpec.modelSpec[].pdb.enabled`| boolean | `false` | Whether to create a PodDisruptionBudget for the model |
91+
| `servingEngineSpec.modelSpec[].pdb.labels`| map | `{}` | Labels to add to the PodDisruptionBudget |
92+
| `servingEngineSpec.modelSpec[].pdb.annotations`| map | `{}` | Annotations to add to the PodDisruptionBudget |
93+
| `servingEngineSpec.modelSpec[].pdb.minAvailable`| string | `""` | Number of pods that are available after eviction as number or percentage (eg.: 50%) |
94+
| `servingEngineSpec.modelSpec[].pdb.maxUnavailable`| string | `""` | Number of pods that are unavailable after eviction as number or percentage (eg.: 50%). |
9095
| `servingEngineSpec.modelSpec[].resources` | object | `{}` | Standard Kubernetes resources block (requests/limits). If specified, this takes priority over and ignores simplified resource fields (requestCPU, requestMemory, requestGPU, etc.) |
9196
| `servingEngineSpec.modelSpec[].requestCPU` | integer | `0` | The number of CPUs requested for the model |
9297
| `servingEngineSpec.modelSpec[].requestMemory` | string | `""` | The amount of memory requested for the model, e.g., "16Gi" |
@@ -196,12 +201,17 @@ This table documents all available configuration values for the Production Stack
196201

197202
| Field | Type | Default | Description |
198203
|-------|------|---------|-------------|
204+
| `routerSpec.enableRouter` | boolean | `true` | Whether to enable the router service |
199205
| `routerSpec.repository` | string | `"lmcache/lmstack-router"` | Docker image repository for the router |
200206
| `routerSpec.tag` | string | `"latest"` | Docker image tag for the router |
201207
| `routerSpec.imagePullPolicy` | string | `"Always"` | Image pull policy for the router |
202208
| `routerSpec.imagePullSecrets` | list | `[]` | Image pull secrets for private container registries |
203-
| `routerSpec.enableRouter` | boolean | `true` | Whether to enable the router service |
204209
| `routerSpec.replicaCount` | integer | `1` | Number of replicas for the router pod |
210+
| `routerSpec.pdb.enabled`| boolean | `false` | Whether to create a PodDisruptionBudget for the model |
211+
| `routerSpec.pdb.labels`| map | `{}` | Labels to add to the PodDisruptionBudget |
212+
| `routerSpec.pdb.annotations`| map | `{}` | Annotations to add to the PodDisruptionBudget |
213+
| `routerSpec.pdb.minAvailable`| string | `""` | Number of pods that are available after eviction as number or percentage (eg.: 50%) |
214+
| `routerSpec.pdb.maxUnavailable`| string | `""` | Number of pods that are unavailable after eviction as number or percentage (eg.: 50%). |
205215
| `routerSpec.priorityClassName` | string | `""` | Priority class for router |
206216
| `routerSpec.containerPort` | integer | `8000` | Port the router container is listening on |
207217
| `routerSpec.serviceType` | string | `"ClusterIP"` | Kubernetes service type for the router |
@@ -325,7 +335,10 @@ This table documents all available configuration values for the Production Stack
325335
| `loraController.image.tag` | string | `"latest"` | Docker image tag |
326336
| `loraController.image.pullPolicy` | string | `"IfNotPresent"` | Image pull policy |
327337
| `loraController.imagePullSecrets` | list | `[]` | Image pull secrets |
338+
| `loraController.annotations` | map | `{}` | Deployment annotations |
339+
| `loraController.labels` | map | `{}` | Deployment labels |
328340
| `loraController.podAnnotations` | map | `{}` | Pod annotations |
341+
| `loraController.podLabels` | map | `{}` | Pod labels |
329342
| `loraController.podSecurityContext.runAsNonRoot` | boolean | `true` | Run as non-root user |
330343
| `loraController.podSecurityContext.seccompProfile.type` | string | `RuntimeDefault` | Seccomp profile type |
331344
| `loraController.containerSecurityContext.allowPrivilegeEscalation` | boolean | `false` | Allow privilege escalation |
@@ -336,6 +349,12 @@ This table documents all available configuration values for the Production Stack
336349
| `loraController.tolerations` | list | `[]` | Tolerations configuration |
337350
| `loraController.env` | list | `[]` | Environment variables |
338351
| `loraController.extraArgs` | list | `[]` | Extra arguments for the controller |
352+
| `loraController.metrics.enabled` | boolean | `true` | Whether to expose lora controller metrics |
353+
| `loraController.pdb.enabled`| boolean | `false` | Whether to create a PodDisruptionBudget for the loraController |
354+
| `loraController.pdb.labels`| map | `{}` | Labels to add to the PodDisruptionBudget |
355+
| `loraController.pdb.annotations`| map | `{}` | Annotations to add to the PodDisruptionBudget |
356+
| `loraController.pdb.minAvailable`| string | `""` | Number of pods that are available after eviction as number or percentage (eg.: 50%) |
357+
| `loraController.pdb.maxUnavailable`| string | `""` | Number of pods that are unavailable after eviction as number or percentage (eg.: 50%). |
339358

340359
### Shared Storage Configuration
341360

helm/templates/deployment-lora-controller.yaml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,15 @@ metadata:
88
labels:
99
app.kubernetes.io/component: lora-controller
1010
helm-release-name: {{ .Release.Name }}
11+
{{- with .Values.loraController.labels }}
12+
{{- toYaml . | nindent 4 }}
13+
{{- end }}
14+
{{- with .Values.loraController.annotations }}
15+
annotations:
16+
{{- toYaml . | nindent 4 }}
17+
{{- end }}
1118
spec:
19+
replicas: {{ .Values.loraController.replicaCount }}
1220
selector:
1321
matchLabels:
1422
app.kubernetes.io/component: lora-controller
@@ -18,6 +26,9 @@ spec:
1826
labels:
1927
app.kubernetes.io/component: lora-controller
2028
helm-release-name: {{ .Release.Name }}
29+
{{- with .Values.loraController.podLabels }}
30+
{{- toYaml . | nindent 8 }}
31+
{{- end }}
2132
{{- with .Values.loraController.podAnnotations }}
2233
annotations:
2334
{{- toYaml . | nindent 8 }}
@@ -91,8 +102,10 @@ spec:
91102
port: 8081
92103
initialDelaySeconds: 5
93104
periodSeconds: 10
105+
{{- with .Values.loraController.resources }}
94106
resources:
95-
{{- toYaml .Values.loraController.resources | nindent 12 }}
107+
{{- toYaml . | nindent 12 }}
108+
{{- end }}
96109
securityContext:
97110
{{- toYaml .Values.loraController.containerSecurityContext | nindent 12 }}
98111
volumeMounts:
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{{- if and .Values.loraController.enableLoraController .Values.loraController.pdb.enabled }}
2+
apiVersion: policy/v1
3+
kind: PodDisruptionBudget
4+
metadata:
5+
name: "{{ .Release.Name }}-pdb-lora-controller"
6+
namespace: {{ .Release.Namespace }}
7+
{{- with .Values.loraController.pdb.labels }}
8+
labels:
9+
{{- toYaml . | nindent 4 }}
10+
{{- end }}
11+
{{- with .Values.loraController.pdb.annotations }}
12+
annotations:
13+
{{- toYaml . | nindent 4 }}
14+
{{- end }}
15+
spec:
16+
{{- with .Values.loraController.pdb.maxUnavailable }}
17+
maxUnavailable: {{ . }}
18+
{{- else }}
19+
minAvailable: {{ .Values.loraController.pdb.minAvailable | default 0 }}
20+
{{- end }}
21+
selector:
22+
matchLabels:
23+
app.kubernetes.io/component: lora-controller
24+
helm-release-name: {{ .Release.Name }}
25+
{{- end }}

helm/templates/pdb-router.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{{- if and .Values.routerSpec.enableRouter .Values.routerSpec.pdb.enabled }}
2+
apiVersion: policy/v1
3+
kind: PodDisruptionBudget
4+
metadata:
5+
name: "{{ .Release.Name }}-pdb-router"
6+
namespace: {{ .Release.Namespace }}
7+
{{- with .Values.routerSpec.pdb.labels }}
8+
labels:
9+
{{- toYaml . | nindent 4 }}
10+
{{- end }}
11+
{{- with .Values.routerSpec.pdb.annotations }}
12+
annotations:
13+
{{- toYaml . | nindent 4 }}
14+
{{- end }}
15+
spec:
16+
{{- with .Values.routerSpec.pdb.maxUnavailable }}
17+
maxUnavailable: {{ . }}
18+
{{- else }}
19+
minAvailable: {{ .Values.routerSpec.pdb.minAvailable | default 0 }}
20+
{{- end }}
21+
selector:
22+
matchLabels:
23+
{{- include "chart.routerStandardLabels" (dict "releaseName" .Release.Name "chartName" .Chart.Name) | nindent 6 }}
24+
{{- include "chart.routerLabels" . | nindent 6 }}
25+
{{- end }}

helm/templates/pdb-vllm-multi.yaml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{{- if .Values.servingEngineSpec.enableEngine -}}
2+
{{- range $modelSpec := .Values.servingEngineSpec.modelSpec }}
3+
{{- if and $modelSpec.pdb $modelSpec.pdb.enabled }}
4+
{{- with $ }}
5+
---
6+
apiVersion: policy/v1
7+
kind: PodDisruptionBudget
8+
metadata:
9+
name: "{{ .Release.Name }}-{{ $modelSpec.name }}-pdb-vllm"
10+
namespace: {{ .Release.Namespace }}
11+
{{- with $modelSpec.pdb.labels }}
12+
labels:
13+
{{- toYaml . | nindent 4 }}
14+
{{- end }}
15+
{{- with $modelSpec.pdb.annotations }}
16+
annotations:
17+
{{- toYaml . | nindent 4 }}
18+
{{- end }}
19+
spec:
20+
{{- with $modelSpec.pdb.maxUnavailable }}
21+
maxUnavailable: {{ . }}
22+
{{- else }}
23+
minAvailable: {{ $modelSpec.pdb.minAvailable | default 0 }}
24+
{{- end }}
25+
selector:
26+
matchLabels:
27+
model: {{ $modelSpec.name }}
28+
helm-release-name: {{ .Release.Name }}
29+
{{- include "chart.engineStandardLabels" (dict "releaseName" .Release.Name "modelName" $modelSpec.name "chartName" .Chart.Name) | nindent 6 }}
30+
{{- include "chart.engineLabels" . | nindent 6 }}
31+
{{- end }}
32+
{{- end }}
33+
{{- end }}
34+
{{- end }}

helm/templates/poddisruptionbudget.yaml

Lines changed: 0 additions & 9 deletions
This file was deleted.

0 commit comments

Comments
 (0)