Skip to content

Commit 8e416e0

Browse files
committed
Add Helm configuration for leader election
1 parent 68c6ae9 commit 8e416e0

File tree

3 files changed

+46
-0
lines changed

3 files changed

+46
-0
lines changed

config/charts/inferencepool/templates/epp-deployment.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ spec:
4444
- "--model-server-metrics-path={{ .Values.inferenceExtension.modelServerMetricsPath }}"
4545
- "--model-server-metrics-scheme={{ .Values.inferenceExtension.modelServerMetricsScheme }}"
4646
- "--model-server-metrics-https-insecure-skip-verify={{ .Values.inferenceExtension.modelServerMetricsHttpsInsecureSkipVerify }}"
47+
{{- if .Values.inferenceExtension.enableLeaderElection }}
48+
- "--ha-enable-leader-election"
49+
{{- end }}
4750
{{- if eq (.Values.inferencePool.modelServerType | default "vllm") "triton-tensorrt-llm" }}
4851
- --total-queued-requests-metric
4952
- "nv_trt_llm_request_metrics{request_type=waiting}"
@@ -63,15 +66,27 @@ spec:
6366
{{- toYaml . | nindent 8 }}
6467
{{- end }}
6568
livenessProbe:
69+
{{- if .Values.inferenceExtension.enableLeaderElection }}
70+
grpc:
71+
port: 9003
72+
service: liveness
73+
{{- else }}
6674
grpc:
6775
port: 9003
6876
service: inference-extension
77+
{{- end }}
6978
initialDelaySeconds: 5
7079
periodSeconds: 10
7180
readinessProbe:
81+
{{- if .Values.inferenceExtension.enableLeaderElection }}
82+
grpc:
83+
port: 9003
84+
service: readiness
85+
{{- else }}
7286
grpc:
7387
port: 9003
7488
service: inference-extension
89+
{{- end }}
7590
initialDelaySeconds: 5
7691
periodSeconds: 10
7792
{{- with .Values.inferenceExtension.env }}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{{- if .Values.inferenceExtension.enableLeaderElection }}
2+
---
3+
kind: Role
4+
apiVersion: rbac.authorization.k8s.io/v1
5+
metadata:
6+
name: {{ include "gateway-api-inference-extension.name" . }}-leader-election
7+
namespace: {{ .Release.Namespace }}
8+
labels:
9+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
10+
rules:
11+
- apiGroups: [ "coordination.k8s.io" ]
12+
resources: [ "leases" ]
13+
verbs: [ "get", "list", "watch", "create", "update", "patch", "delete" ]
14+
- apiGroups: [ "" ]
15+
resources: [ "events" ]
16+
verbs: [ "create", "patch" ]
17+
---
18+
kind: RoleBinding
19+
apiVersion: rbac.authorization.k8s.io/v1
20+
metadata:
21+
name: {{ include "gateway-api-inference-extension.name" . }}-leader-election-binding
22+
namespace: {{ .Release.Namespace }}
23+
subjects:
24+
- kind: ServiceAccount
25+
name: {{ include "gateway-api-inference-extension.name" . }}
26+
roleRef:
27+
apiGroup: rbac.authorization.k8s.io
28+
kind: Role
29+
name: {{ include "gateway-api-inference-extension.name" . }}-leader-election
30+
{{- end }}

config/charts/inferencepool/values.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ inferenceExtension:
3434
extraContainerPorts: []
3535
# Define additional service ports
3636
extraServicePorts: []
37+
enableLeaderElection: false
3738

3839
inferencePool:
3940
targetPortNumber: 8000

0 commit comments

Comments
 (0)