diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 9a8be09f9..414301da3 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -16,7 +16,7 @@ To install via the latest published chart in staging (--version v0 indicates la ```txt $ helm install vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ - --set provider.name=[none|gke] \ + --set provider.name=[none|gke|istio] \ oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0 ``` @@ -95,7 +95,7 @@ Use `--set inferencePool.modelServerType=triton-tensorrt-llm` to install for Tri $ helm install triton-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=triton-llama3-8b-instruct \ --set inferencePool.modelServerType=triton-tensorrt-llm \ - --set provider.name=[none|gke] \ + --set provider.name=[none|gke|istio] \ oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0 ``` @@ -188,9 +188,32 @@ The following table list the configurable parameters of the chart. | `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | | `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. | | `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. | -| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: `gke`. Defaults to `none`. | +| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. | | `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. | +### Provider Specific Configuration + +This section should document any Gateway provider specific values configurations. + +#### GKE + +These are the options available to you with `provider.name` set to `gke`: + +| **Parameter Name** | **Description** | +|---------------------------------------------|------------------------------------------------------------------------------------------------------------------------| +| `gke.monitoringSecret.name` | The name of the monitoring secret to be used. Defaults to `inference-gateway-sa-metrics-reader-secret`. | +| `gke.monitoringSecret.namespace` | The namespace that the monitoring secret lives in. Defaults to `default`. | + + +#### Istio + +These are the options available to you with `provider.name` set to `istio`: + +| **Parameter Name** | **Description** | +|---------------------------------------------|------------------------------------------------------------------------------------------------------------------------| +| `istio.destinationRule.host` | Custom host value for the destination rule. If not set this will use the default value which is derrived from the epp service name and release namespace to gerenate a valid service address. | +| `istio.destinationRule.trafficPolicy.connectionPool` | Configure the connectionPool level settings of the traffic policy | + ## Notes This chart will only deploy an InferencePool and its corresponding EndpointPicker extension. Before install the chart, please make sure that the inference extension CRDs are installed in the cluster. For more details, please refer to the [getting started guide](https://gateway-api-inference-extension.sigs.k8s.io/guides/). diff --git a/config/charts/inferencepool/templates/istio.yaml b/config/charts/inferencepool/templates/istio.yaml new file mode 100644 index 000000000..b50c0b021 --- /dev/null +++ b/config/charts/inferencepool/templates/istio.yaml @@ -0,0 +1,16 @@ +{{- if eq .Values.provider.name "istio" }} +apiVersion: networking.istio.io/v1beta1 +kind: DestinationRule +metadata: + name: {{ include "gateway-api-inference-extension.name" . }} +spec: + host: {{ .Values.istio.destinationRule.host | default (printf "%s.%s.svc.cluster.local" (include "gateway-api-inference-extension.name" .) .Release.Namespace) }} + trafficPolicy: + tls: + mode: SIMPLE + insecureSkipVerify: true + {{- if .Values.istio.destinationRule.trafficPolicy.connectionPool }} + connectionPool: + {{- .Values.istio.destinationRule.trafficPolicy.connectionPool | toYaml | nindent 6 }} + {{- end }} +{{- end }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 6476bd800..91d6a48e6 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -67,6 +67,7 @@ inferencePool: # This will soon be deprecated when upstream GW providers support v1, just doing something simple for now. targetPortNumber: 8000 +# Options: ["gke", "istio", "none"] provider: name: none @@ -75,3 +76,13 @@ provider: gke: # Set to true if the cluster is an Autopilot cluster. autopilot: false + +istio: + destinationRule: + # Provide a way to override the default calculated host + host: "" + # Optional: Enables customization of the traffic policy + trafficPolicy: {} + # connectionPool: + # http: + # maxRequestsPerConnection: 256000