From 4e84c84e2d8bf3d034b038827a841e6f5e467e8d Mon Sep 17 00:00:00 2001 From: greg pereira Date: Thu, 14 Aug 2025 11:16:15 -0700 Subject: [PATCH 1/3] enable istio as a provider + configuring destinationRule Signed-off-by: greg pereira --- config/charts/inferencepool/README.md | 4 ++-- .../charts/inferencepool/templates/istio.yaml | 19 +++++++++++++++++++ config/charts/inferencepool/values.yaml | 12 ++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 config/charts/inferencepool/templates/istio.yaml diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 9a8be09f9..648bdc086 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -16,7 +16,7 @@ To install via the latest published chart in staging (--version v0 indicates la ```txt $ helm install vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ - --set provider.name=[none|gke] \ + --set provider.name=[none|gke|istio] \ oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0 ``` @@ -95,7 +95,7 @@ Use `--set inferencePool.modelServerType=triton-tensorrt-llm` to install for Tri $ helm install triton-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=triton-llama3-8b-instruct \ --set inferencePool.modelServerType=triton-tensorrt-llm \ - --set provider.name=[none|gke] \ + --set provider.name=[none|gke|istio] \ oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0 ``` diff --git a/config/charts/inferencepool/templates/istio.yaml b/config/charts/inferencepool/templates/istio.yaml new file mode 100644 index 000000000..664b579d5 --- /dev/null +++ b/config/charts/inferencepool/templates/istio.yaml @@ -0,0 +1,19 @@ +{{- if eq .Values.provider.name "istio" }} +--- +{{- if .Values.istio.destinationRule.enabled }} +apiVersion: networking.istio.io/v1beta1 +kind: DestinationRule +metadata: + name: {{ include "gateway-api-inference-extension.name" . }} +spec: + host: {{ .Values.istio.destinationRule.host | default (printf "%s.%s.svc.cluster.local" (include "gateway-api-inference-extension.name" .) .Release.Namespace) }} + trafficPolicy: + tls: + mode: SIMPLE + insecureSkipVerify: true + {{- if .Values.istio.destinationRule.trafficPolicy.connectionPool }} + connectionPool: + {{- .Values.istio.destinationRule.trafficPolicy.connectionPool | toYaml | nindent 6 }} + {{- end }} +{{- end }} +{{- end }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 6476bd800..11e4683b2 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -67,6 +67,7 @@ inferencePool: # This will soon be deprecated when upstream GW providers support v1, just doing something simple for now. targetPortNumber: 8000 +# Options: ["gke", "istio", "none"] provider: name: none @@ -75,3 +76,14 @@ provider: gke: # Set to true if the cluster is an Autopilot cluster. autopilot: false + +istio: + destinationRule: + enabled: true + # Provide a way to override the default calculated host + host: "" + # Optional: Enables customization of the traffic policy + trafficPolicy: {} + # connectionPool: + # http: + # maxRequestsPerConnection: 256000 From f9eef084a85eb3550a854e87524a140e9ad0178b Mon Sep 17 00:00:00 2001 From: greg pereira Date: Thu, 14 Aug 2025 12:27:18 -0700 Subject: [PATCH 2/3] document provider specific configurations Signed-off-by: greg pereira --- config/charts/inferencepool/README.md | 26 ++++++++++++++++++- .../charts/inferencepool/templates/istio.yaml | 2 +- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 648bdc086..63fc1191c 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -188,9 +188,33 @@ The following table list the configurable parameters of the chart. | `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | | `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. | | `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. | -| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: `gke`. Defaults to `none`. | +| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. | | `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. | +### Provider Specific Configuration + +This section should document any Gateway provider specific values configurations. + +#### GKE + +These are the options available to you with `provider.name` set to `gke`: + +| **Parameter Name** | **Description** | +|---------------------------------------------|------------------------------------------------------------------------------------------------------------------------| +| `gke.monitoringSecret.name` | The name of the monitoring secret to be used. Defaults to `inference-gateway-sa-metrics-reader-secret`. | +| `gke.monitoringSecret.namespace` | The namespace that the monitoring secret lives in. Defaults to `default`. | + + +#### Istio + +These are the options available to you with `provider.name` set to `istio`: + +| **Parameter Name** | **Description** | +|---------------------------------------------|------------------------------------------------------------------------------------------------------------------------| +| `istio.destinationRule.enabled` | Enable creation of an Istio DestinationRule to configure traffic routing. | +| `istio.destinationRule.host` | Custom host value for the destination rule. If not set this will use the default value which is derrived from the epp service name and release namespace to gerenate a valid service address. | +| `istio.destinationRule.trafficPolicy.connectionPool` | Configure the connectionPool level settings of the traffic policy | + ## Notes This chart will only deploy an InferencePool and its corresponding EndpointPicker extension. Before install the chart, please make sure that the inference extension CRDs are installed in the cluster. For more details, please refer to the [getting started guide](https://gateway-api-inference-extension.sigs.k8s.io/guides/). diff --git a/config/charts/inferencepool/templates/istio.yaml b/config/charts/inferencepool/templates/istio.yaml index 664b579d5..588072507 100644 --- a/config/charts/inferencepool/templates/istio.yaml +++ b/config/charts/inferencepool/templates/istio.yaml @@ -1,6 +1,6 @@ {{- if eq .Values.provider.name "istio" }} ---- {{- if .Values.istio.destinationRule.enabled }} +--- apiVersion: networking.istio.io/v1beta1 kind: DestinationRule metadata: From 050fd495b6e431fc79f2d3890a869d7ca4f2b108 Mon Sep 17 00:00:00 2001 From: greg pereira Date: Thu, 28 Aug 2025 13:07:56 -0700 Subject: [PATCH 3/3] remove default option, always create DesitnaitonRule with istio provider Signed-off-by: greg pereira --- config/charts/inferencepool/README.md | 1 - config/charts/inferencepool/templates/istio.yaml | 3 --- config/charts/inferencepool/values.yaml | 1 - 3 files changed, 5 deletions(-) diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 63fc1191c..414301da3 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -211,7 +211,6 @@ These are the options available to you with `provider.name` set to `istio`: | **Parameter Name** | **Description** | |---------------------------------------------|------------------------------------------------------------------------------------------------------------------------| -| `istio.destinationRule.enabled` | Enable creation of an Istio DestinationRule to configure traffic routing. | | `istio.destinationRule.host` | Custom host value for the destination rule. If not set this will use the default value which is derrived from the epp service name and release namespace to gerenate a valid service address. | | `istio.destinationRule.trafficPolicy.connectionPool` | Configure the connectionPool level settings of the traffic policy | diff --git a/config/charts/inferencepool/templates/istio.yaml b/config/charts/inferencepool/templates/istio.yaml index 588072507..b50c0b021 100644 --- a/config/charts/inferencepool/templates/istio.yaml +++ b/config/charts/inferencepool/templates/istio.yaml @@ -1,6 +1,4 @@ {{- if eq .Values.provider.name "istio" }} -{{- if .Values.istio.destinationRule.enabled }} ---- apiVersion: networking.istio.io/v1beta1 kind: DestinationRule metadata: @@ -16,4 +14,3 @@ spec: {{- .Values.istio.destinationRule.trafficPolicy.connectionPool | toYaml | nindent 6 }} {{- end }} {{- end }} -{{- end }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 11e4683b2..91d6a48e6 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -79,7 +79,6 @@ provider: istio: destinationRule: - enabled: true # Provide a way to override the default calculated host host: "" # Optional: Enables customization of the traffic policy