From a655bda17cddc32c3609cf48591741ecbffe0d0b Mon Sep 17 00:00:00 2001 From: Rahul Gurnani Date: Fri, 5 Sep 2025 19:42:38 +0000 Subject: [PATCH 1/3] Make apiVersion configurable in helm chart for inferencePool --- config/charts/inferencepool/templates/inferencepool.yaml | 2 +- config/charts/inferencepool/values.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml index 352af7a05..bcfe23ee4 100644 --- a/config/charts/inferencepool/templates/inferencepool.yaml +++ b/config/charts/inferencepool/templates/inferencepool.yaml @@ -1,5 +1,5 @@ {{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }} -apiVersion: inference.networking.k8s.io/v1 +apiVersion: {{ .Values.inferencePool.apiVersion }} kind: InferencePool metadata: name: {{ .Release.Name }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index ba50cef97..f44b536e9 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -44,6 +44,7 @@ inferencePool: targetPorts: - number: 8000 modelServerType: vllm # vllm, triton-tensorrt-llm + apiVersion: inference.networking.k8s.io/v1 # modelServers: # REQUIRED # matchLabels: # app: vllm-llama3-8b-instruct From b74259dfedaa7be3d3da728e10d5c5fcb45a9bb0 Mon Sep 17 00:00:00 2001 From: Rahul Gurnani Date: Sat, 6 Sep 2025 00:44:06 +0000 Subject: [PATCH 2/3] Update helm chart to accomodate for targetPortNumber in v1alpha2 --- config/charts/inferencepool/templates/inferencepool.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml index bcfe23ee4..e43b397e3 100644 --- a/config/charts/inferencepool/templates/inferencepool.yaml +++ b/config/charts/inferencepool/templates/inferencepool.yaml @@ -7,10 +7,17 @@ metadata: labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} spec: + {{ if eq .Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2"}} + targetPortNumber: + {{- range .Values.inferencePool.targetPorts }} + - number: {{ .number }} + {{- end }} + {{ else }} targetPorts: {{- range .Values.inferencePool.targetPorts }} - number: {{ .number }} {{- end }} + {{- end}} selector: matchLabels: {{- if .Values.inferencePool.modelServers.matchLabels }} From 4ebab20d0506444413580171dd75d03b15ca9909 Mon Sep 17 00:00:00 2001 From: Kellen Swain Date: Sat, 6 Sep 2025 20:48:48 +0000 Subject: [PATCH 3/3] updating helm charts to work on the cluster with v1 and v1a2 --- .../templates/inferencepool.yaml | 29 ++++++++++++++----- config/charts/inferencepool/values.yaml | 6 +++- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml index e43b397e3..5c973b998 100644 --- a/config/charts/inferencepool/templates/inferencepool.yaml +++ b/config/charts/inferencepool/templates/inferencepool.yaml @@ -1,4 +1,4 @@ -{{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }} +{{ if eq .Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2"}} apiVersion: {{ .Values.inferencePool.apiVersion }} kind: InferencePool metadata: @@ -7,17 +7,31 @@ metadata: labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} spec: - {{ if eq .Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2"}} - targetPortNumber: - {{- range .Values.inferencePool.targetPorts }} - - number: {{ .number }} + targetPortNumber: {{ .Values.inferencePool.targetPortNumber | default 8000 }} + selector: + {{- if .Values.inferencePool.modelServers.matchLabels }} + {{- range $key, $value := .Values.inferencePool.modelServers.matchLabels }} + {{ $key }}: {{ quote $value }} {{- end }} - {{ else }} + {{- end }} + extensionRef: + name: {{ include "gateway-api-inference-extension.name" . }} + portNumber: {{ .Values.inferenceExtension.extProcPort | default 9002 }} + failureMode: {{ .Values.inferenceExtension.failureMode | default "FailClose" }} +{{ else }} +{{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }} +apiVersion: "inference.networking.k8s.io/v1" +kind: InferencePool +metadata: + name: {{ .Release.Name }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +spec: targetPorts: {{- range .Values.inferencePool.targetPorts }} - number: {{ .number }} {{- end }} - {{- end}} selector: matchLabels: {{- if .Values.inferencePool.modelServers.matchLabels }} @@ -29,5 +43,6 @@ spec: name: {{ include "gateway-api-inference-extension.name" . }} port: number: {{ .Values.inferenceExtension.extProcPort | default 9002 }} +{{- end }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index f44b536e9..d45e6ed39 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -44,11 +44,15 @@ inferencePool: targetPorts: - number: 8000 modelServerType: vllm # vllm, triton-tensorrt-llm - apiVersion: inference.networking.k8s.io/v1 + apiVersion: inference.networking.k8s.io/v1 # modelServers: # REQUIRED # matchLabels: # app: vllm-llama3-8b-instruct + # Should only used if apiVersion is inference.networking.x-k8s.io/v1alpha2, + # This will soon be deprecated when upstream GW providers support v1, just doing something simple for now. + targetPortNumber: 8000 + provider: name: none