Skip to content

Commit 8b154ba

Browse files
Make apiVersion configurable for inferencePool in the helm charts (#1542)
* Make apiVersion configurable in helm chart for inferencePool * Update helm chart to accomodate for targetPortNumber in v1alpha2 * updating helm charts to work on the cluster with v1 and v1a2 --------- Co-authored-by: Kellen Swain <[email protected]>
1 parent b9891d2 commit 8b154ba

File tree

2 files changed

+28
-1
lines changed

2 files changed

+28
-1
lines changed

config/charts/inferencepool/templates/inferencepool.yaml

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,26 @@
1+
{{ if eq .Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2"}}
2+
apiVersion: {{ .Values.inferencePool.apiVersion }}
3+
kind: InferencePool
4+
metadata:
5+
name: {{ .Release.Name }}
6+
namespace: {{ .Release.Namespace }}
7+
labels:
8+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
9+
spec:
10+
targetPortNumber: {{ .Values.inferencePool.targetPortNumber | default 8000 }}
11+
selector:
12+
{{- if .Values.inferencePool.modelServers.matchLabels }}
13+
{{- range $key, $value := .Values.inferencePool.modelServers.matchLabels }}
14+
{{ $key }}: {{ quote $value }}
15+
{{- end }}
16+
{{- end }}
17+
extensionRef:
18+
name: {{ include "gateway-api-inference-extension.name" . }}
19+
portNumber: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
20+
failureMode: {{ .Values.inferenceExtension.failureMode | default "FailClose" }}
21+
{{ else }}
122
{{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }}
2-
apiVersion: inference.networking.k8s.io/v1
23+
apiVersion: "inference.networking.k8s.io/v1"
324
kind: InferencePool
425
metadata:
526
name: {{ .Release.Name }}
@@ -22,5 +43,6 @@ spec:
2243
name: {{ include "gateway-api-inference-extension.name" . }}
2344
port:
2445
number: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
46+
{{- end }}
2547

2648

config/charts/inferencepool/values.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,15 @@ inferencePool:
4444
targetPorts:
4545
- number: 8000
4646
modelServerType: vllm # vllm, triton-tensorrt-llm
47+
apiVersion: inference.networking.k8s.io/v1
4748
# modelServers: # REQUIRED
4849
# matchLabels:
4950
# app: vllm-llama3-8b-instruct
5051

52+
# Should only used if apiVersion is inference.networking.x-k8s.io/v1alpha2,
53+
# This will soon be deprecated when upstream GW providers support v1, just doing something simple for now.
54+
targetPortNumber: 8000
55+
5156
provider:
5257
name: none
5358

0 commit comments

Comments
 (0)