diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml index 352af7a05..5c973b998 100644 --- a/config/charts/inferencepool/templates/inferencepool.yaml +++ b/config/charts/inferencepool/templates/inferencepool.yaml @@ -1,5 +1,26 @@ +{{ if eq .Values.inferencePool.apiVersion "inference.networking.x-k8s.io/v1alpha2"}} +apiVersion: {{ .Values.inferencePool.apiVersion }} +kind: InferencePool +metadata: + name: {{ .Release.Name }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +spec: + targetPortNumber: {{ .Values.inferencePool.targetPortNumber | default 8000 }} + selector: + {{- if .Values.inferencePool.modelServers.matchLabels }} + {{- range $key, $value := .Values.inferencePool.modelServers.matchLabels }} + {{ $key }}: {{ quote $value }} + {{- end }} + {{- end }} + extensionRef: + name: {{ include "gateway-api-inference-extension.name" . }} + portNumber: {{ .Values.inferenceExtension.extProcPort | default 9002 }} + failureMode: {{ .Values.inferenceExtension.failureMode | default "FailClose" }} +{{ else }} {{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }} -apiVersion: inference.networking.k8s.io/v1 +apiVersion: "inference.networking.k8s.io/v1" kind: InferencePool metadata: name: {{ .Release.Name }} @@ -22,5 +43,6 @@ spec: name: {{ include "gateway-api-inference-extension.name" . }} port: number: {{ .Values.inferenceExtension.extProcPort | default 9002 }} +{{- end }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index ba50cef97..d45e6ed39 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -44,10 +44,15 @@ inferencePool: targetPorts: - number: 8000 modelServerType: vllm # vllm, triton-tensorrt-llm + apiVersion: inference.networking.k8s.io/v1 # modelServers: # REQUIRED # matchLabels: # app: vllm-llama3-8b-instruct + # Should only used if apiVersion is inference.networking.x-k8s.io/v1alpha2, + # This will soon be deprecated when upstream GW providers support v1, just doing something simple for now. + targetPortNumber: 8000 + provider: name: none