From f413e5cd5c3ef471279c5e04a77e0b31dba8cc05 Mon Sep 17 00:00:00 2001 From: bobzetian Date: Tue, 16 Sep 2025 00:58:43 +0000 Subject: [PATCH 1/5] fix gke monitoring. --- config/charts/inferencepool/README.md | 3 +- .../templates/epp-sa-token-secret.yaml | 2 +- .../charts/inferencepool/templates/gke.yaml | 74 ++++++++++++++++++- config/charts/inferencepool/values.yaml | 3 + 4 files changed, 79 insertions(+), 3 deletions(-) diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 5a5663d1a..c7ea2d239 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -133,7 +133,7 @@ inferenceExtension: **Note:** Prometheus monitoring requires the Prometheus Operator and ServiceMonitor CRD to be installed in the cluster. -For GKE environments, monitoring is automatically configured when `provider.name` is set to `gke`. +For GKE environments, monitoring is enabled by setting `provider.name` to `gke` and `inferenceExtension.monitoring.gke.enabled` to `true`. This will create the necessary `ClusterPodMonitoring` and RBAC resources for metrics collection. Then apply it with: @@ -174,6 +174,7 @@ The following table list the configurable parameters of the chart. | `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. | | `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. | | `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | +| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`ClusterPodMonitoring` and RBAC). Defaults to `false`. | | `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. | | `provider.name` | Name of the Inference Gateway implementation being used. Possible values: `gke`. Defaults to `none`. | diff --git a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml index 9abee0fcd..089bb9167 100644 --- a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml +++ b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml @@ -1,4 +1,4 @@ -{{- if or .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.gke.enabled }} +{{- if or .Values.inferenceExtension.monitoring.prometheus.enabled }} apiVersion: v1 kind: Secret metadata: diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml index f2296aafb..1b9506968 100644 --- a/config/charts/inferencepool/templates/gke.yaml +++ b/config/charts/inferencepool/templates/gke.yaml @@ -35,6 +35,30 @@ spec: timeoutSec: 300 # 5-minute timeout (adjust as needed) logging: enabled: true # log all requests by default +{{- if .Values.inferenceExtension.monitoring.gke.enabled }} +{{- $saName := printf "%s-metrics-reader-sa" .Release.Name -}} +{{- $secretName := printf "%s-metrics-reader-secret" .Release.Name -}} +{{- $clusterRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}} +{{- $clusterRoleBindingName := printf "%s-%s-metrics-reader-role-binding" .Release.Namespace .Release.Name -}} +{{- $secretReadClusterRoleName := printf "%s-%s-metrics-reader-secret-read" .Release.Namespace .Release.Name -}} +{{- $gmpCollectorRoleBindingName := printf "gmp-system:collector:%s-%s-metrics-reader-secret-read" .Release.Namespace .Release.Name -}} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ $saName }} + namespace: {{ .Release.Namespace }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ $secretName }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ $saName }} +type: kubernetes.io/service-account-token --- apiVersion: monitoring.googleapis.com/v1 kind: ClusterPodMonitoring @@ -52,10 +76,58 @@ spec: type: Bearer credentials: secret: - name: {{ .Values.inferenceExtension.monitoring.secret.name }} + name: {{ $secretName }} key: token namespace: {{ .Release.Namespace }} selector: matchLabels: {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ $clusterRoleName }} +rules: +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ $clusterRoleBindingName }} +subjects: +- kind: ServiceAccount + name: {{ $saName }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ $clusterRoleName }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ $secretReadClusterRoleName }} +rules: +- resources: + - secrets + apiGroups: [""] + verbs: ["get", "list", "watch"] + resourceNames: [{{ $secretName | quote }}] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ $gmpCollectorRoleBindingName }} +roleRef: + name: {{ $secretReadClusterRoleName }} + kind: ClusterRole + apiGroup: rbac.authorization.k8s.io +subjects: +- name: collector + namespace: gmp-system + kind: ServiceAccount +{{- end }} {{- end }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index f61b64e37..2e56a2c58 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -50,6 +50,9 @@ inferenceExtension: # Prometheus ServiceMonitor will be created when enabled for EPP metrics collection prometheus: enabled: false + + gke: + enabled: false inferencePool: targetPorts: From 05900ee1adc1d0a9e851d0530fea7bb063449b9c Mon Sep 17 00:00:00 2001 From: bobzetian Date: Tue, 16 Sep 2025 19:58:59 +0000 Subject: [PATCH 2/5] change to namespaced resources as much as possible. --- .../charts/inferencepool/templates/gke.yaml | 53 ++++++++++--------- config/charts/inferencepool/values.yaml | 2 + 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml index 1b9506968..c966109a6 100644 --- a/config/charts/inferencepool/templates/gke.yaml +++ b/config/charts/inferencepool/templates/gke.yaml @@ -36,34 +36,39 @@ spec: logging: enabled: true # log all requests by default {{- if .Values.inferenceExtension.monitoring.gke.enabled }} -{{- $saName := printf "%s-metrics-reader-sa" .Release.Name -}} -{{- $secretName := printf "%s-metrics-reader-secret" .Release.Name -}} -{{- $clusterRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}} -{{- $clusterRoleBindingName := printf "%s-%s-metrics-reader-role-binding" .Release.Namespace .Release.Name -}} -{{- $secretReadClusterRoleName := printf "%s-%s-metrics-reader-secret-read" .Release.Namespace .Release.Name -}} -{{- $gmpCollectorRoleBindingName := printf "gmp-system:collector:%s-%s-metrics-reader-secret-read" .Release.Namespace .Release.Name -}} +{{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}} +{{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}} +{{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}} +{{- $metricsReadRoleBindingName := printf "%s-%s-metrics-reader-role-binding" .Release.Namespace .Release.Name -}} +{{- $secretReadRoleName := printf "%s-metrics-reader-secret-read" .Release.Name -}} +{{- $gmpNamespace := "gmp-system" -}} +{{- if .Values.inferenceExtension.monitoring.gke.autopilot -}} +{{- $gmpNamespace = "gke-gmp-system" -}} +{{- end -}} +{{- $gmpCollectorRoleBindingName := printf "%s:collector:%s-%s-metrics-reader-secret-read" $gmpNamespace .Release.Namespace .Release.Name -}} --- apiVersion: v1 kind: ServiceAccount metadata: - name: {{ $saName }} + name: {{ $metricsReadSA }} namespace: {{ .Release.Namespace }} --- apiVersion: v1 kind: Secret metadata: - name: {{ $secretName }} + name: {{ $metricsReadSecretName }} namespace: {{ .Release.Namespace }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} annotations: - kubernetes.io/service-account.name: {{ $saName }} + kubernetes.io/service-account.name: {{ $metricsReadSA }} type: kubernetes.io/service-account-token --- apiVersion: monitoring.googleapis.com/v1 -kind: ClusterPodMonitoring +kind: PodMonitoring metadata: - name: {{ .Release.Namespace }}-{{ .Release.Name }} + name: {{ .Release.Name }} + namespace: {{ .Release.Namespace }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} spec: @@ -76,9 +81,8 @@ spec: type: Bearer credentials: secret: - name: {{ $secretName }} + name: {{ $metricsReadSecretName }} key: token - namespace: {{ .Release.Namespace }} selector: matchLabels: {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }} @@ -86,7 +90,7 @@ spec: apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: {{ $clusterRoleName }} + name: {{ $metricsReadRoleName }} rules: - nonResourceURLs: - /metrics @@ -96,38 +100,39 @@ rules: apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: {{ $clusterRoleBindingName }} + name: {{ $metricsReadRoleBindingName }} subjects: - kind: ServiceAccount - name: {{ $saName }} + name: {{ $metricsReadSA }} namespace: {{ .Release.Namespace }} roleRef: kind: ClusterRole - name: {{ $clusterRoleName }} + name: {{ $metricsReadRoleName }} apiGroup: rbac.authorization.k8s.io --- apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole +kind: Role metadata: - name: {{ $secretReadClusterRoleName }} + name: {{ $secretReadRoleName }} rules: - resources: - secrets apiGroups: [""] verbs: ["get", "list", "watch"] - resourceNames: [{{ $secretName | quote }}] + resourceNames: [{{ $metricsReadSecretName | quote }}] --- apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding +kind: RoleBinding metadata: name: {{ $gmpCollectorRoleBindingName }} + namespace: {{ .Release.Namespace }} roleRef: - name: {{ $secretReadClusterRoleName }} - kind: ClusterRole + name: {{ $secretReadRoleName }} + kind: Role apiGroup: rbac.authorization.k8s.io subjects: - name: collector - namespace: gmp-system + namespace: {{ $gmpNamespace }} kind: ServiceAccount {{- end }} {{- end }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 2e56a2c58..48b8b61ed 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -53,6 +53,8 @@ inferenceExtension: gke: enabled: false + # Set to true if the cluster is an Autopilot cluster. + autopilot: false inferencePool: targetPorts: From 2dc8516b91d4e48d58789407f716d9eaf67440f8 Mon Sep 17 00:00:00 2001 From: bobzetian Date: Tue, 16 Sep 2025 23:24:39 +0000 Subject: [PATCH 3/5] update helm chart readme. --- config/charts/inferencepool/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index c7ea2d239..6a487a380 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -174,7 +174,8 @@ The following table list the configurable parameters of the chart. | `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. | | `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. | | `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | -| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`ClusterPodMonitoring` and RBAC). Defaults to `false`. | +| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. | +| `inferenceExtension.monitoring.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This ensures the correct `gke-gmp-system` namespace is used for the GMP collector. Defaults to `false`. | | `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. | | `provider.name` | Name of the Inference Gateway implementation being used. Possible values: `gke`. Defaults to `none`. | From f43ae489f7cdd204a5fbadc2fccfec6b1eef4266 Mon Sep 17 00:00:00 2001 From: bobzetian Date: Thu, 18 Sep 2025 23:03:12 +0000 Subject: [PATCH 4/5] resolve nits. --- config/charts/inferencepool/README.md | 2 +- config/charts/inferencepool/templates/epp-sa-token-secret.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 6a487a380..e5ca67c99 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -133,7 +133,7 @@ inferenceExtension: **Note:** Prometheus monitoring requires the Prometheus Operator and ServiceMonitor CRD to be installed in the cluster. -For GKE environments, monitoring is enabled by setting `provider.name` to `gke` and `inferenceExtension.monitoring.gke.enabled` to `true`. This will create the necessary `ClusterPodMonitoring` and RBAC resources for metrics collection. +For GKE environments, monitoring is enabled by setting `provider.name` to `gke` and `inferenceExtension.monitoring.gke.enabled` to `true`. This will create the necessary `PodMonitoring` and RBAC resources for metrics collection. Then apply it with: diff --git a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml index 089bb9167..df54b3475 100644 --- a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml +++ b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml @@ -1,4 +1,4 @@ -{{- if or .Values.inferenceExtension.monitoring.prometheus.enabled }} +{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }} apiVersion: v1 kind: Secret metadata: From 39a943be837a090d11f549d32978577b004e32ef Mon Sep 17 00:00:00 2001 From: bobzetian Date: Fri, 19 Sep 2025 01:21:49 +0000 Subject: [PATCH 5/5] move autopilot to provider.gke. --- config/charts/inferencepool/README.md | 6 ++++-- config/charts/inferencepool/templates/gke.yaml | 6 +++++- config/charts/inferencepool/values.yaml | 7 +++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index e5ca67c99..b6629d2b8 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -135,6 +135,8 @@ inferenceExtension: For GKE environments, monitoring is enabled by setting `provider.name` to `gke` and `inferenceExtension.monitoring.gke.enabled` to `true`. This will create the necessary `PodMonitoring` and RBAC resources for metrics collection. +If you are using a GKE Autopilot cluster, you also need to set `provider.gke.autopilot` to `true`. + Then apply it with: ```txt @@ -174,10 +176,10 @@ The following table list the configurable parameters of the chart. | `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. | | `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. | | `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | -| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. | -| `inferenceExtension.monitoring.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This ensures the correct `gke-gmp-system` namespace is used for the GMP collector. Defaults to `false`. | +| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. | | `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. | | `provider.name` | Name of the Inference Gateway implementation being used. Possible values: `gke`. Defaults to `none`. | +| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. | ## Notes diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml index c966109a6..59e186a94 100644 --- a/config/charts/inferencepool/templates/gke.yaml +++ b/config/charts/inferencepool/templates/gke.yaml @@ -42,7 +42,11 @@ spec: {{- $metricsReadRoleBindingName := printf "%s-%s-metrics-reader-role-binding" .Release.Namespace .Release.Name -}} {{- $secretReadRoleName := printf "%s-metrics-reader-secret-read" .Release.Name -}} {{- $gmpNamespace := "gmp-system" -}} -{{- if .Values.inferenceExtension.monitoring.gke.autopilot -}} +{{- $isAutopilot := false -}} +{{- with .Values.provider.gke }} + {{- $isAutopilot = .autopilot | default false -}} +{{- end }} +{{- if $isAutopilot -}} {{- $gmpNamespace = "gke-gmp-system" -}} {{- end -}} {{- $gmpCollectorRoleBindingName := printf "%s:collector:%s-%s-metrics-reader-secret-read" $gmpNamespace .Release.Namespace .Release.Name -}} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 48b8b61ed..6476bd800 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -53,8 +53,6 @@ inferenceExtension: gke: enabled: false - # Set to true if the cluster is an Autopilot cluster. - autopilot: false inferencePool: targetPorts: @@ -72,3 +70,8 @@ inferencePool: provider: name: none + # GKE-specific configuration. + # This block is only used if name is "gke". + gke: + # Set to true if the cluster is an Autopilot cluster. + autopilot: false